Select Page

Task

Perform K-mean clustering for the points and visualize the cluster results.

Dataset

Any point data

Result

Geoprocessing workflow

Generated code: 

  1. # This script performs spatial clustering of a point dataset using K-means and visualizes the resulting clusters.
  2. # Steps:
  3. # 1) Reproject (skipped because input is already in a projected CRS in meters: EPSG:3857).
  4. # 2) Add Geometry Attributes: store X and Y coordinates using ‘qgis:exportaddgeometrycolumns’.
  5. # 3) K-Means Clustering: run ‘native:kmeansclustering’ with a specified K to create ‘CLUSTER_ID’ and cluster size field.
  6. # 4) Calculate Cluster Centroids: use ‘native:meancoordinates’ grouped by ‘CLUSTER_ID’.
  7. # 5) Visualize Clusters: apply unique value symbology by ‘CLUSTER_ID’, symbolize cluster centroids, and create convex hulls per cluster (optional).
  8. # Notes:
  9. # – All outputs are saved to the specified output directory with unique filenames to avoid overwrite conflicts.
  10. # – Each processing step is followed by a short waiting period to ensure file creation before subsequent steps.
  11. # – Uses only required PyQGIS and processing tools; geopandas/numpy are not necessary here.
  12. import os
  13. import time
  14. import random
  15. import processing
  16. from qgis.core import (
  17.     QgsVectorLayer,
  18.     QgsProject,
  19.     QgsSymbol,
  20.     QgsRendererCategory,
  21.     QgsCategorizedSymbolRenderer,
  22.     QgsSingleSymbolRenderer
  23. )
  24. from PyQt5.QtGui import QColor
  25. def perform_spatial_clustering():
  26.     # Inputs
  27.     input_path = r“C:\Users\17036\Downloads\test\Hazardous_Waste_Sites\HW_Sites.shp”
  28.     output_dir = r“C:\Users\17036\AppData\Roaming\QGIS\QGIS3\profiles\default\python\plugins\SpatialAnalysisAgent-master\Default_workspace”
  29.     k_clusters = 5  # Specify number of clusters
  30.     # Helper to generate a unique shapefile path in output directory
  31.     def unique_path(base_name, ext=“.shp”):
  32.         base = os.path.join(output_dir, base_name + ext)
  33.         if not os.path.exists(base):
  34.             return base
  35.         i = 1
  36.         while True:
  37.             candidate = os.path.join(output_dir, f“{base_name}_{i}{ext}”)
  38.             if not os.path.exists(candidate):
  39.                 return candidate
  40.             i += 1
  41.     # Load input point layer
  42.     input_layer = QgsVectorLayer(input_path, “HW_Sites”, “ogr”)
  43.     # Step 1: Reproject (skipped) – Input CRS is EPSG:3857 (meters), appropriate for K-means Euclidean distance
  44.     reprojected_layer = input_layer
  45.     reprojected_path = input_path
  46.     # Step 2: Add geometry attributes (X and Y)
  47.     geom_out_path = unique_path(“HW_Sites_with_geom”)
  48.     add_geom_params = {
  49.         ‘INPUT’: reprojected_path,
  50.         ‘CALC_METHOD’: 0,  # Layer CRS
  51.         ‘OUTPUT’: geom_out_path
  52.     }
  53.     add_geom_result = processing.run(‘qgis:exportaddgeometrycolumns’, add_geom_params)
  54.     time.sleep(1)
  55.     geom_layer = QgsVectorLayer(add_geom_result[‘OUTPUT’], ‘HW_Sites_with_geom’, ‘ogr’)
  56.     QgsProject.instance().addMapLayer(geom_layer)
  57.     # Step 3: K-means clustering on point geometry
  58.     clusters_out_path = unique_path(“HW_Sites_clusters”)
  59.     kmeans_params = {
  60.         ‘INPUT’: geom_layer,
  61.         ‘CLUSTERS’: k_clusters,
  62.         ‘OUTPUT’: clusters_out_path,
  63.         ‘FIELD_NAME’: ‘CLUSTER_ID’,  # 10 char limit (shapefile)
  64.         ‘SIZE_FIELD_NAME’: ‘CLS_SIZE’  # <= 10 chars
  65.     }
  66.     kmeans_result = processing.run(‘native:kmeansclustering’, kmeans_params)
  67.     time.sleep(1)
  68.     clusters_layer = QgsVectorLayer(kmeans_result[‘OUTPUT’], ‘HW_Sites_clusters’, ‘ogr’)
  69.     QgsProject.instance().addMapLayer(clusters_layer)
  70.     # Step 4: Calculate mean coordinates (centroids) grouped by cluster_id
  71.     fields = clusters_layer.fields()  # Access fields before using any field from the layer
  72.     centroids_out_path = unique_path(“HW_Sites_cluster_centroids”)
  73.     meancoords_params = {
  74.         ‘INPUT’: clusters_layer,
  75.         ‘WEIGHT’: None,
  76.         ‘UID’: ‘CLUSTER_ID’,  # group by cluster id
  77.         ‘OUTPUT’: centroids_out_path
  78.     }
  79.     meancoords_result = processing.run(‘native:meancoordinates’, meancoords_params)
  80.     time.sleep(1)
  81.     centroids_layer = QgsVectorLayer(meancoords_result[‘OUTPUT’], ‘HW_Sites_cluster_centroids’, ‘ogr’)
  82.     QgsProject.instance().addMapLayer(centroids_layer)
  83.     # Step 5 (optional): Convex hull per cluster to depict spatial extents
  84.     hulls_out_path = unique_path(“HW_Sites_cluster_hulls”)
  85.     hull_params = {
  86.         ‘INPUT’: clusters_layer,
  87.         ‘FIELD’: ‘CLUSTER_ID’,
  88.         ‘OUTPUT’: hulls_out_path
  89.     }
  90.     hull_result = processing.run(‘native:convexhull’, hull_params)
  91.     time.sleep(1)
  92.     hulls_layer = QgsVectorLayer(hull_result[‘OUTPUT’], ‘HW_Sites_cluster_hulls’, ‘ogr’)
  93.     QgsProject.instance().addMapLayer(hulls_layer)
  94.     # Visualization: Unique value symbology for clusters (by CLUSTER_ID)
  95.     cluster_field = ‘CLUSTER_ID’
  96.     categories = []
  97.     unique_vals = set(f[cluster_field] for f in clusters_layer.getFeatures())
  98.     for v in sorted(unique_vals):
  99.         sym = QgsSymbol.defaultSymbol(clusters_layer.geometryType())
  100.         sym.setColor(QColor(random.randint(0, 230), random.randint(0, 230), random.randint(0, 230), 220))
  101.         try:
  102.             sym.setSize(3.2)
  103.         except:
  104.             pass
  105.         cat = QgsRendererCategory(v, sym, str(v))
  106.         categories.append(cat)
  107.     cat_renderer = QgsCategorizedSymbolRenderer(cluster_field, categories)
  108.     clusters_layer.setRenderer(cat_renderer)
  109.     clusters_layer.triggerRepaint()
  110.     # Visualization: Centroids (single symbol)
  111.     cent_sym = QgsSymbol.defaultSymbol(centroids_layer.geometryType())
  112.     cent_sym.setColor(QColor(0, 0, 0))
  113.     try:
  114.         cent_sym.setSize(5.0)
  115.     except:
  116.         pass
  117.     centroids_layer.setRenderer(QgsSingleSymbolRenderer(cent_sym))
  118.     centroids_layer.triggerRepaint()
  119.     # Visualization: Cluster hulls (semi-transparent fill)
  120.     hull_sym = QgsSymbol.defaultSymbol(hulls_layer.geometryType())
  121.     hull_sym.setColor(QColor(255, 215, 0, 50))  # light transparent yellow
  122.     hulls_layer.setRenderer(QgsSingleSymbolRenderer(hull_sym))
  123.     hulls_layer.triggerRepaint()
  124. perform_spatial_clustering()