PostGIS  3.4.0dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_kmeans()

int* lwgeom_cluster_kmeans ( const LWGEOM **  geoms,
uint32_t  n,
uint32_t  k,
double  max_radius 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate
max_radiusmaxmimum radius of cluster before it's split

Definition at line 321 of file lwkmeans.c.

322 {
323  uint32_t num_non_empty = 0;
324 
325  assert(k > 0);
326  assert(n > 0);
327  assert(max_radius >= 0);
328  assert(geoms);
329 
330  if (n < k)
331  {
332  lwerror(
333  "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
334  __func__);
335  k = n;
336  }
337 
338  /* An array of objects to be analyzed. */
339  POINT4D *objs_dense = lwalloc(sizeof(POINT4D) * n);
340 
341  /* Array to mark unclusterable objects. Will be returned as KMEANS_NULL_CLUSTER. */
342  uint8_t *geom_valid = lwalloc(sizeof(uint8_t) * n);
343  memset(geom_valid, 0, sizeof(uint8_t) * n);
344 
345  /* Array to fill in with cluster numbers. */
346  int *clusters = lwalloc(sizeof(int) * n);
347  for (uint32_t i = 0; i < n; i++)
348  clusters[i] = KMEANS_NULL_CLUSTER;
349 
350  /* An array of clusters centers for the algorithm. */
351  POINT4D *centers = lwalloc(sizeof(POINT4D) * n);
352  memset(centers, 0, sizeof(POINT4D) * n);
353 
354  /* An array of clusters radii for the algorithm. */
355  double *radii = lwalloc(sizeof(double) * n);
356  memset(radii, 0, sizeof(double) * n);
357 
358  /* Prepare the list of object pointers for K-means */
359  for (uint32_t i = 0; i < n; i++)
360  {
361  const LWGEOM *geom = geoms[i];
362  /* Unset M values will be 1 */
363  POINT4D out = {0, 0, 0, 1};
364 
365  /* Null/empty geometries get geom_valid=LW_FALSE set earlier with memset */
366  if ((!geom) || lwgeom_is_empty(geom))
367  continue;
368 
369  /* If the input is a point, use its coordinates */
370  if (lwgeom_get_type(geom) == POINTTYPE)
371  {
372  out.x = lwpoint_get_x(lwgeom_as_lwpoint(geom));
373  out.y = lwpoint_get_y(lwgeom_as_lwpoint(geom));
374  if (lwgeom_has_z(geom))
375  out.z = lwpoint_get_z(lwgeom_as_lwpoint(geom));
376  if (lwgeom_has_m(geom))
377  {
378  out.m = lwpoint_get_m(lwgeom_as_lwpoint(geom));
379  if (out.m <= 0)
380  lwerror("%s has an input point geometry with weight in M less or equal to 0",
381  __func__);
382  }
383  }
384  else if (!lwgeom_has_z(geom))
385  {
386  /* For 2D, we can take a centroid */
388  if (!centroid)
389  continue;
391  {
393  continue;
394  }
398  }
399  else
400  {
401  /* For 3D non-point, we can have a box center */
402  const GBOX *box = lwgeom_get_bbox(geom);
403  if (!gbox_is_valid(box))
404  continue;
405  out.x = (box->xmax + box->xmin) / 2;
406  out.y = (box->ymax + box->ymin) / 2;
407  out.z = (box->zmax + box->zmin) / 2;
408  }
409  geom_valid[i] = LW_TRUE;
410  objs_dense[num_non_empty++] = out;
411  }
412 
413  if (num_non_empty < k)
414  {
415  lwnotice(
416  "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
417  __func__,
418  num_non_empty,
419  k);
420  k = num_non_empty;
421  }
422 
423  uint8_t converged = LW_TRUE;
424 
425  if (num_non_empty > 0)
426  {
427  uint32_t *clusters_dense = lwalloc(sizeof(uint32_t) * num_non_empty);
428  memset(clusters_dense, 0, sizeof(uint32_t) * num_non_empty);
429  uint32_t output_cluster_count = kmeans(objs_dense, clusters_dense, num_non_empty, centers, radii, k, max_radius);
430 
431  uint32_t d = 0;
432  for (uint32_t i = 0; i < n; i++)
433  if (geom_valid[i])
434  clusters[i] = (int)clusters_dense[d++];
435 
436  converged = output_cluster_count > 0;
437  lwfree(clusters_dense);
438  }
439 
440  /* Before error handling, might as well clean up all the inputs */
441  lwfree(objs_dense);
442  lwfree(centers);
443  lwfree(geom_valid);
444  lwfree(radii);
445 
446  /* Good result */
447  if (converged)
448  return clusters;
449 
450  /* Bad result, not going to need the answer */
451  lwfree(clusters);
452  return NULL;
453 }
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
double lwpoint_get_m(const LWPOINT *point)
Definition: lwpoint.c:107
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1155
double lwpoint_get_x(const LWPOINT *point)
Definition: lwpoint.c:63
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
Definition: lwgeom.c:934
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:102
void lwfree(void *mem)
Definition: lwutil.c:242
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
Definition: lwgeom.c:743
void * lwalloc(size_t size)
Definition: lwutil.c:227
double lwpoint_get_z(const LWPOINT *point)
Definition: lwpoint.c:89
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:93
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
Definition: lwgeom.c:941
double lwpoint_get_y(const LWPOINT *point)
Definition: lwpoint.c:76
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwinline.h:145
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition: lwinline.h:203
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwinline.h:131
static uint32_t kmeans(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t min_k, double max_radius)
Definition: lwkmeans.c:276
#define KMEANS_NULL_CLUSTER
Definition: lwkmeans.c:14
Datum centroid(PG_FUNCTION_ARGS)
double ymax
Definition: liblwgeom.h:357
double zmax
Definition: liblwgeom.h:359
double xmax
Definition: liblwgeom.h:355
double zmin
Definition: liblwgeom.h:358
double ymin
Definition: liblwgeom.h:356
double xmin
Definition: liblwgeom.h:354
double m
Definition: liblwgeom.h:414
double x
Definition: liblwgeom.h:414
double z
Definition: liblwgeom.h:414
double y
Definition: liblwgeom.h:414

References centroid(), gbox_is_valid(), kmeans(), KMEANS_NULL_CLUSTER, LW_TRUE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_bbox(), lwgeom_get_type(), lwgeom_has_m(), lwgeom_has_z(), lwgeom_is_empty(), lwnotice(), lwpoint_get_m(), lwpoint_get_x(), lwpoint_get_y(), lwpoint_get_z(), POINT4D::m, POINTTYPE, POINT4D::x, GBOX::xmax, GBOX::xmin, POINT4D::y, GBOX::ymax, GBOX::ymin, POINT4D::z, GBOX::zmax, and GBOX::zmin.

Referenced by ST_ClusterKMeans(), and test_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: