PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ lwgeom_cluster_kmeans()

int * lwgeom_cluster_kmeans ( const LWGEOM **  geoms,
uint32_t  n,
uint32_t  k,
double  max_radius 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate
max_radiusmaximum radius of cluster before it's split

Definition at line 321 of file lwkmeans.c.

322{
323 uint32_t num_non_empty = 0;
324
325 assert(k > 0);
326 assert(n > 0);
327 assert(max_radius >= 0);
328 assert(geoms);
329
330 if (n < k)
331 {
332 lwerror(
333 "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
334 __func__);
335 k = n;
336 }
337
338 /* An array of objects to be analyzed. */
339 POINT4D *objs_dense = lwalloc(sizeof(POINT4D) * n);
340
341 /* Array to mark unclusterable objects. Will be returned as KMEANS_NULL_CLUSTER. */
342 uint8_t *geom_valid = lwalloc(sizeof(uint8_t) * n);
343 memset(geom_valid, 0, sizeof(uint8_t) * n);
344
345 /* Array to fill in with cluster numbers. */
346 int *clusters = lwalloc(sizeof(int) * n);
347 for (uint32_t i = 0; i < n; i++)
348 clusters[i] = KMEANS_NULL_CLUSTER;
349
350 /* An array of clusters centers for the algorithm. */
351 POINT4D *centers = lwalloc(sizeof(POINT4D) * n);
352 memset(centers, 0, sizeof(POINT4D) * n);
353
354 /* An array of clusters radii for the algorithm. */
355 double *radii = lwalloc(sizeof(double) * n);
356 memset(radii, 0, sizeof(double) * n);
357
358 /* Prepare the list of object pointers for K-means */
359 for (uint32_t i = 0; i < n; i++)
360 {
361 const LWGEOM *geom = geoms[i];
362 /* Unset M values will be 1 */
363 POINT4D out = {0, 0, 0, 1};
364
365 /* Null/empty geometries get geom_valid=LW_FALSE set earlier with memset */
366 if ((!geom) || lwgeom_is_empty(geom))
367 continue;
368
369 /* If the input is a point, use its coordinates */
370 if (lwgeom_get_type(geom) == POINTTYPE)
371 {
372 out.x = lwpoint_get_x(lwgeom_as_lwpoint(geom));
373 out.y = lwpoint_get_y(lwgeom_as_lwpoint(geom));
374 if (lwgeom_has_z(geom))
375 out.z = lwpoint_get_z(lwgeom_as_lwpoint(geom));
376 if (lwgeom_has_m(geom))
377 {
378 out.m = lwpoint_get_m(lwgeom_as_lwpoint(geom));
379 if (out.m <= 0)
380 lwerror("%s has an input point geometry with weight in M less or equal to 0",
381 __func__);
382 }
383 }
384 else if (!lwgeom_has_z(geom))
385 {
386 /* For 2D, we can take a centroid */
388 if (!centroid)
389 continue;
391 {
393 continue;
394 }
398 }
399 else
400 {
401 /* For 3D non-point, we can have a box center */
402 const GBOX *box = lwgeom_get_bbox(geom);
403 if (!gbox_is_valid(box))
404 continue;
405 out.x = (box->xmax + box->xmin) / 2;
406 out.y = (box->ymax + box->ymin) / 2;
407 out.z = (box->zmax + box->zmin) / 2;
408 }
409 geom_valid[i] = LW_TRUE;
410 objs_dense[num_non_empty++] = out;
411 }
412
413 if (num_non_empty < k)
414 {
415 lwnotice(
416 "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
417 __func__,
418 num_non_empty,
419 k);
420 k = num_non_empty;
421 }
422
423 uint8_t converged = LW_TRUE;
424
425 if (num_non_empty > 0)
426 {
427 uint32_t *clusters_dense = lwalloc(sizeof(uint32_t) * num_non_empty);
428 memset(clusters_dense, 0, sizeof(uint32_t) * num_non_empty);
429 uint32_t output_cluster_count = kmeans(objs_dense, clusters_dense, num_non_empty, centers, radii, k, max_radius);
430
431 uint32_t d = 0;
432 for (uint32_t i = 0; i < n; i++)
433 if (geom_valid[i])
434 clusters[i] = (int)clusters_dense[d++];
435
436 converged = output_cluster_count > 0;
437 lwfree(clusters_dense);
438 }
439
440 /* Before error handling, might as well clean up all the inputs */
441 lwfree(objs_dense);
442 lwfree(centers);
443 lwfree(geom_valid);
444 lwfree(radii);
445
446 /* Good result */
447 if (converged)
448 return clusters;
449
450 /* Bad result, not going to need the answer */
451 lwfree(clusters);
452 return NULL;
453}
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition gbox.c:197
double lwpoint_get_m(const LWPOINT *point)
Definition lwpoint.c:107
void lwgeom_free(LWGEOM *geom)
Definition lwgeom.c:1246
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
double lwpoint_get_x(const LWPOINT *point)
Definition lwpoint.c:63
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
Definition lwgeom.c:962
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition liblwgeom.h:102
void * lwalloc(size_t size)
Definition lwutil.c:227
void lwfree(void *mem)
Definition lwutil.c:248
double lwpoint_get_z(const LWPOINT *point)
Definition lwpoint.c:89
#define LW_TRUE
Return types for functions with status returns.
Definition liblwgeom.h:93
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
Definition lwgeom.c:771
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
Definition lwgeom.c:969
double lwpoint_get_y(const LWPOINT *point)
Definition lwpoint.c:76
void lwnotice(const char *fmt,...) __attribute__((format(printf
Write a notice out to the notice handler.
void void lwerror(const char *fmt,...) __attribute__((format(printf
Write a notice out to the error handler.
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition lwinline.h:141
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition lwinline.h:127
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition lwinline.h:199
static uint32_t kmeans(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t min_k, double max_radius)
Definition lwkmeans.c:276
#define KMEANS_NULL_CLUSTER
Definition lwkmeans.c:14
Datum centroid(PG_FUNCTION_ARGS)
double ymax
Definition liblwgeom.h:357
double zmax
Definition liblwgeom.h:359
double xmax
Definition liblwgeom.h:355
double zmin
Definition liblwgeom.h:358
double ymin
Definition liblwgeom.h:356
double xmin
Definition liblwgeom.h:354
double m
Definition liblwgeom.h:414
double x
Definition liblwgeom.h:414
double z
Definition liblwgeom.h:414
double y
Definition liblwgeom.h:414

References centroid(), gbox_is_valid(), kmeans(), KMEANS_NULL_CLUSTER, LW_TRUE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_bbox(), lwgeom_get_type(), lwgeom_has_m(), lwgeom_has_z(), lwgeom_is_empty(), lwnotice(), lwpoint_get_m(), lwpoint_get_x(), lwpoint_get_y(), lwpoint_get_z(), POINT4D::m, POINTTYPE, POINT4D::x, GBOX::xmax, GBOX::xmin, POINT4D::y, GBOX::ymax, GBOX::ymin, POINT4D::z, GBOX::zmax, and GBOX::zmin.

Referenced by ST_ClusterKMeans(), and test_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: