PostGIS  2.5.7dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_2d_kmeans()

int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
uint32_t  ngeoms,
uint32_t  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 240 of file lwkmeans.c.

241 {
242  uint32_t i;
243  uint32_t num_centroids = 0;
244  uint32_t num_non_empty = 0;
245  LWGEOM** centroids;
246  POINT2D* centers_raw;
247  const POINT2D* cp;
248  int result = LW_FALSE;
249 
250  /* An array of objects to be analyzed.
251  * All NULL values will be returned in the KMEANS_NULL_CLUSTER. */
252  POINT2D** objs;
253 
254  /* An array of centers for the algorithm. */
255  POINT2D** centers;
256 
257  /* Array to fill in with cluster numbers. */
258  int* clusters;
259 
260  assert(k > 0);
261  assert(n > 0);
262  assert(geoms);
263 
264  if (n < k)
265  {
266  lwerror("%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
267  k = n;
268  }
269 
270  /* We'll hold the temporary centroid objects here */
271  centroids = lwalloc(sizeof(LWGEOM*) * n);
272  memset(centroids, 0, sizeof(LWGEOM*) * n);
273 
274  /* The vector of cluster means. We have to allocate a chunk of memory for these because we'll be mutating them
275  * in the kmeans algorithm */
276  centers_raw = lwalloc(sizeof(POINT2D) * k);
277  memset(centers_raw, 0, sizeof(POINT2D) * k);
278 
279  /* K-means configuration setup */
280  objs = lwalloc(sizeof(POINT2D*) * n);
281  clusters = lwalloc(sizeof(int) * n);
282  centers = lwalloc(sizeof(POINT2D*) * k);
283 
284  /* Clean the memory */
285  memset(objs, 0, sizeof(POINT2D*) * n);
286  memset(clusters, 0, sizeof(int) * n);
287  memset(centers, 0, sizeof(POINT2D*) * k);
288 
289  /* Prepare the list of object pointers for K-means */
290  for (i = 0; i < n; i++)
291  {
292  const LWGEOM* geom = geoms[i];
293  LWPOINT* lwpoint;
294 
295  /* Null/empty geometries get a NULL pointer, set earlier with memset */
296  if ((!geom) || lwgeom_is_empty(geom)) continue;
297 
298  /* If the input is a point, use its coordinates */
299  /* If its not a point, convert it to one via centroid */
300  if (lwgeom_get_type(geom) != POINTTYPE)
301  {
303  if ((!centroid)) continue;
305  {
307  continue;
308  }
309  centroids[num_centroids++] = centroid;
310  lwpoint = lwgeom_as_lwpoint(centroid);
311  }
312  else
313  lwpoint = lwgeom_as_lwpoint(geom);
314 
315  /* Store a pointer to the POINT2D we are interested in */
316  cp = getPoint2d_cp(lwpoint->point, 0);
317  objs[i] = (POINT2D*)cp;
318  num_non_empty++;
319  }
320 
321  if (num_non_empty < k)
322  {
323  lwnotice("%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
324  k = num_non_empty;
325  }
326 
327  if (k > 1)
328  {
329  kmeans_init(objs, clusters, n, centers, centers_raw, k);
330  result = kmeans(objs, clusters, n, centers, k);
331  }
332  else
333  {
334  /* k=0: everythong is unclusterable
335  * k=1: mark up NULL and non-NULL */
336  for (i = 0; i < n; i++)
337  {
338  if (k == 0 || !objs[i])
339  clusters[i] = KMEANS_NULL_CLUSTER;
340  else
341  clusters[i] = 0;
342  }
343  result = LW_TRUE;
344  }
345 
346  /* Before error handling, might as well clean up all the inputs */
347  lwfree(objs);
348  lwfree(centers);
349  lwfree(centers_raw);
350  lwfree(centroids);
351 
352  /* Good result */
353  if (result) return clusters;
354 
355  /* Bad result, not going to need the answer */
356  lwfree(clusters);
357  return NULL;
358 }
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
#define LW_FALSE
Definition: liblwgeom.h:77
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1144
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:923
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:85
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:161
void lwfree(void *mem)
Definition: lwutil.c:244
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition: lwgeom.c:1393
void * lwalloc(size_t size)
Definition: lwutil.c:229
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:76
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from.
Definition: lwgeom_api.c:374
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static void kmeans_init(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
Definition: lwkmeans.c:132
#define KMEANS_NULL_CLUSTER
Definition: lwkmeans.c:17
static int kmeans(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
Definition: lwkmeans.c:97
Datum centroid(PG_FUNCTION_ARGS)
POINTARRAY * point
Definition: liblwgeom.h:414
unsigned int uint32_t
Definition: uthash.h:78

References centroid(), getPoint2d_cp(), kmeans(), kmeans_init(), KMEANS_NULL_CLUSTER, LW_FALSE, LW_TRUE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_type(), lwgeom_is_empty(), lwnotice(), LWPOINT::point, and POINTTYPE.

Referenced by ST_ClusterKMeans(), and test_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: