Task
Perform K-mean clustering for the points and visualize the cluster results.
Dataset
Any point data
Result
Geoprocessing workflow


Generated code:
-
# This script performs spatial clustering of a point dataset using K-means and visualizes the resulting clusters.
-
# Steps:
-
# 1) Reproject (skipped because input is already in a projected CRS in meters: EPSG:3857).
-
# 2) Add Geometry Attributes: store X and Y coordinates using ‘qgis:exportaddgeometrycolumns’.
-
# 3) K-Means Clustering: run ‘native:kmeansclustering’ with a specified K to create ‘CLUSTER_ID’ and cluster size field.
-
# 4) Calculate Cluster Centroids: use ‘native:meancoordinates’ grouped by ‘CLUSTER_ID’.
-
# 5) Visualize Clusters: apply unique value symbology by ‘CLUSTER_ID’, symbolize cluster centroids, and create convex hulls per cluster (optional).
-
# Notes:
-
# – All outputs are saved to the specified output directory with unique filenames to avoid overwrite conflicts.
-
# – Each processing step is followed by a short waiting period to ensure file creation before subsequent steps.
-
# – Uses only required PyQGIS and processing tools; geopandas/numpy are not necessary here.
-
import os
-
import time
-
import random
-
import processing
-
from qgis.core import (
-
QgsVectorLayer,
-
QgsProject,
-
QgsSymbol,
-
QgsRendererCategory,
-
QgsCategorizedSymbolRenderer,
-
QgsSingleSymbolRenderer
-
)
-
from PyQt5.QtGui import QColor
-
def perform_spatial_clustering():
-
# Inputs
-
input_path = r“C:\Users\17036\Downloads\test\Hazardous_Waste_Sites\HW_Sites.shp”
-
output_dir = r“C:\Users\17036\AppData\Roaming\QGIS\QGIS3\profiles\default\python\plugins\SpatialAnalysisAgent-master\Default_workspace”
-
k_clusters = 5 # Specify number of clusters
-
# Helper to generate a unique shapefile path in output directory
-
def unique_path(base_name, ext=“.shp”):
-
base = os.path.join(output_dir, base_name + ext)
-
if not os.path.exists(base):
-
return base
-
i = 1
-
while True:
-
candidate = os.path.join(output_dir, f“{base_name}_{i}{ext}”)
-
if not os.path.exists(candidate):
-
return candidate
-
i += 1
-
# Load input point layer
-
input_layer = QgsVectorLayer(input_path, “HW_Sites”, “ogr”)
-
# Step 1: Reproject (skipped) – Input CRS is EPSG:3857 (meters), appropriate for K-means Euclidean distance
-
reprojected_layer = input_layer
-
reprojected_path = input_path
-
# Step 2: Add geometry attributes (X and Y)
-
geom_out_path = unique_path(“HW_Sites_with_geom”)
-
add_geom_params = {
-
‘INPUT’: reprojected_path,
-
‘CALC_METHOD’: 0, # Layer CRS
-
‘OUTPUT’: geom_out_path
-
}
-
add_geom_result = processing.run(‘qgis:exportaddgeometrycolumns’, add_geom_params)
-
time.sleep(1)
-
geom_layer = QgsVectorLayer(add_geom_result[‘OUTPUT’], ‘HW_Sites_with_geom’, ‘ogr’)
-
QgsProject.instance().addMapLayer(geom_layer)
-
# Step 3: K-means clustering on point geometry
-
clusters_out_path = unique_path(“HW_Sites_clusters”)
-
kmeans_params = {
-
‘INPUT’: geom_layer,
-
‘CLUSTERS’: k_clusters,
-
‘OUTPUT’: clusters_out_path,
-
‘FIELD_NAME’: ‘CLUSTER_ID’, # 10 char limit (shapefile)
-
‘SIZE_FIELD_NAME’: ‘CLS_SIZE’ # <= 10 chars
-
}
-
kmeans_result = processing.run(‘native:kmeansclustering’, kmeans_params)
-
time.sleep(1)
-
clusters_layer = QgsVectorLayer(kmeans_result[‘OUTPUT’], ‘HW_Sites_clusters’, ‘ogr’)
-
QgsProject.instance().addMapLayer(clusters_layer)
-
# Step 4: Calculate mean coordinates (centroids) grouped by cluster_id
-
fields = clusters_layer.fields() # Access fields before using any field from the layer
-
centroids_out_path = unique_path(“HW_Sites_cluster_centroids”)
-
meancoords_params = {
-
‘INPUT’: clusters_layer,
-
‘WEIGHT’: None,
-
‘UID’: ‘CLUSTER_ID’, # group by cluster id
-
‘OUTPUT’: centroids_out_path
-
}
-
meancoords_result = processing.run(‘native:meancoordinates’, meancoords_params)
-
time.sleep(1)
-
centroids_layer = QgsVectorLayer(meancoords_result[‘OUTPUT’], ‘HW_Sites_cluster_centroids’, ‘ogr’)
-
QgsProject.instance().addMapLayer(centroids_layer)
-
# Step 5 (optional): Convex hull per cluster to depict spatial extents
-
hulls_out_path = unique_path(“HW_Sites_cluster_hulls”)
-
hull_params = {
-
‘INPUT’: clusters_layer,
-
‘FIELD’: ‘CLUSTER_ID’,
-
‘OUTPUT’: hulls_out_path
-
}
-
hull_result = processing.run(‘native:convexhull’, hull_params)
-
time.sleep(1)
-
hulls_layer = QgsVectorLayer(hull_result[‘OUTPUT’], ‘HW_Sites_cluster_hulls’, ‘ogr’)
-
QgsProject.instance().addMapLayer(hulls_layer)
-
# Visualization: Unique value symbology for clusters (by CLUSTER_ID)
-
cluster_field = ‘CLUSTER_ID’
-
categories = []
-
unique_vals = set(f[cluster_field] for f in clusters_layer.getFeatures())
-
for v in sorted(unique_vals):
-
sym = QgsSymbol.defaultSymbol(clusters_layer.geometryType())
-
sym.setColor(QColor(random.randint(0, 230), random.randint(0, 230), random.randint(0, 230), 220))
-
try:
-
sym.setSize(3.2)
-
except:
-
pass
-
cat = QgsRendererCategory(v, sym, str(v))
-
categories.append(cat)
-
cat_renderer = QgsCategorizedSymbolRenderer(cluster_field, categories)
-
clusters_layer.setRenderer(cat_renderer)
-
clusters_layer.triggerRepaint()
-
# Visualization: Centroids (single symbol)
-
cent_sym = QgsSymbol.defaultSymbol(centroids_layer.geometryType())
-
cent_sym.setColor(QColor(0, 0, 0))
-
try:
-
cent_sym.setSize(5.0)
-
except:
-
pass
-
centroids_layer.setRenderer(QgsSingleSymbolRenderer(cent_sym))
-
centroids_layer.triggerRepaint()
-
# Visualization: Cluster hulls (semi-transparent fill)
-
hull_sym = QgsSymbol.defaultSymbol(hulls_layer.geometryType())
-
hull_sym.setColor(QColor(255, 215, 0, 50)) # light transparent yellow
-
hulls_layer.setRenderer(QgsSingleSymbolRenderer(hull_sym))
-
hulls_layer.triggerRepaint()
-
perform_spatial_clustering()
