PostGIS  3.1.6dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_kmeans()

int* lwgeom_cluster_kmeans ( const LWGEOM **  geoms,
uint32_t  ngeoms,
uint32_t  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 206 of file lwkmeans.c.

207 {
208  uint32_t num_non_empty = 0;
209  uint8_t converged = LW_FALSE;
210 
211  assert(k > 0);
212  assert(n > 0);
213  assert(geoms);
214 
215  if (n < k)
216  {
217  lwerror(
218  "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
219  __func__);
220  k = n;
221  }
222 
223  /* An array of objects to be analyzed. */
224  POINT4D *objs = lwalloc(sizeof(POINT4D) * n);
225 
226  /* Array to mark unclusterable objects. Will be returned as KMEANS_NULL_CLUSTER. */
227  uint8_t *geom_valid = lwalloc(sizeof(uint8_t) * n);
228  memset(geom_valid, 0, sizeof(uint8_t) * n);
229 
230  /* Array to fill in with cluster numbers. */
231  int *clusters = lwalloc(sizeof(int) * n);
232  memset(clusters, 0, sizeof(int) * n);
233 
234  /* An array of clusters centers for the algorithm. */
235  POINT4D *centers = lwalloc(sizeof(POINT4D) * k);
236  memset(centers, 0, sizeof(POINT4D) * k);
237 
238  /* Prepare the list of object pointers for K-means */
239  for (uint32_t i = 0; i < n; i++)
240  {
241  const LWGEOM *geom = geoms[i];
242  /* Unset M values will be 1 */
243  POINT4D out = {0, 0, 0, 1};
244 
245  /* Null/empty geometries get geom_valid=LW_FALSE set earlier with memset */
246  if ((!geom) || lwgeom_is_empty(geom))
247  continue;
248 
249  /* If the input is a point, use its coordinates */
250  if (lwgeom_get_type(geom) == POINTTYPE)
251  {
252  out.x = lwpoint_get_x(lwgeom_as_lwpoint(geom));
253  out.y = lwpoint_get_y(lwgeom_as_lwpoint(geom));
254  if (lwgeom_has_z(geom))
255  out.z = lwpoint_get_z(lwgeom_as_lwpoint(geom));
256  if (lwgeom_has_m(geom))
257  {
258  out.m = lwpoint_get_m(lwgeom_as_lwpoint(geom));
259  if (out.m <= 0)
260  lwerror("%s has an input point geometry with weight in M less or equal to 0",
261  __func__);
262  }
263  }
264  else if (!lwgeom_has_z(geom))
265  {
266  /* For 2D, we can take a centroid*/
268  if (!centroid)
269  continue;
271  {
273  continue;
274  }
278  }
279  else
280  {
281  /* For 3D non-point, we can have a box center */
282  const GBOX *box = lwgeom_get_bbox(geom);
283  if (!gbox_is_valid(box))
284  continue;
285  out.x = (box->xmax + box->xmin) / 2;
286  out.y = (box->ymax + box->ymin) / 2;
287  out.z = (box->zmax + box->zmin) / 2;
288  }
289  geom_valid[i] = LW_TRUE;
290  objs[num_non_empty++] = out;
291  }
292 
293  if (num_non_empty < k)
294  {
295  lwnotice(
296  "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
297  __func__,
298  num_non_empty,
299  k);
300  k = num_non_empty;
301  }
302 
303  if (k > 1)
304  {
305  int *clusters_dense = lwalloc(sizeof(int) * num_non_empty);
306  memset(clusters_dense, 0, sizeof(int) * num_non_empty);
307 
308  kmeans_init(objs, num_non_empty, centers, k);
309  converged = kmeans(objs, clusters_dense, num_non_empty, centers, k);
310 
311  if (converged)
312  {
313  uint32_t d = 0;
314  for (uint32_t i = 0; i < n; i++)
315  if (geom_valid[i])
316  clusters[i] = clusters_dense[d++];
317  else
318  clusters[i] = KMEANS_NULL_CLUSTER;
319  }
320  lwfree(clusters_dense);
321  }
322  else if (k == 0)
323  {
324  /* k=0: everything is unclusterable */
325  for (uint32_t i = 0; i < n; i++)
326  clusters[i] = KMEANS_NULL_CLUSTER;
327  converged = LW_TRUE;
328  }
329  else
330  {
331  /* k=1: mark up NULL and non-NULL */
332  for (uint32_t i = 0; i < n; i++)
333  {
334  if (!geom_valid[i])
335  clusters[i] = KMEANS_NULL_CLUSTER;
336  else
337  clusters[i] = 0;
338  }
339  converged = LW_TRUE;
340  }
341 
342  /* Before error handling, might as well clean up all the inputs */
343  lwfree(objs);
344  lwfree(centers);
345  lwfree(geom_valid);
346 
347  /* Good result */
348  if (converged)
349  return clusters;
350 
351  /* Bad result, not going to need the answer */
352  lwfree(clusters);
353  return NULL;
354 }
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
#define LW_FALSE
Definition: liblwgeom.h:108
double lwpoint_get_m(const LWPOINT *point)
Definition: lwpoint.c:107
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1138
double lwpoint_get_x(const LWPOINT *point)
Definition: lwpoint.c:63
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
Definition: lwgeom.c:917
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:116
void lwfree(void *mem)
Definition: lwutil.c:242
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
Definition: lwgeom.c:726
void * lwalloc(size_t size)
Definition: lwutil.c:227
double lwpoint_get_z(const LWPOINT *point)
Definition: lwpoint.c:89
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:107
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
Definition: lwgeom.c:924
double lwpoint_get_y(const LWPOINT *point)
Definition: lwpoint.c:76
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwinline.h:145
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition: lwinline.h:203
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwinline.h:131
static uint8_t kmeans(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
Definition: lwkmeans.c:90
static void kmeans_init(POINT4D *objs, uint32_t n, POINT4D *centers, uint32_t k)
Definition: lwkmeans.c:108
#define KMEANS_NULL_CLUSTER
Definition: lwkmeans.c:14
Datum centroid(PG_FUNCTION_ARGS)
double ymax
Definition: liblwgeom.h:371
double zmax
Definition: liblwgeom.h:373
double xmax
Definition: liblwgeom.h:369
double zmin
Definition: liblwgeom.h:372
double ymin
Definition: liblwgeom.h:370
double xmin
Definition: liblwgeom.h:368
double m
Definition: liblwgeom.h:428
double x
Definition: liblwgeom.h:428
double z
Definition: liblwgeom.h:428
double y
Definition: liblwgeom.h:428

References centroid(), gbox_is_valid(), kmeans(), kmeans_init(), KMEANS_NULL_CLUSTER, LW_FALSE, LW_TRUE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_bbox(), lwgeom_get_type(), lwgeom_has_m(), lwgeom_has_z(), lwgeom_is_empty(), lwnotice(), lwpoint_get_m(), lwpoint_get_x(), lwpoint_get_y(), lwpoint_get_z(), POINT4D::m, POINTTYPE, POINT4D::x, GBOX::xmax, GBOX::xmin, POINT4D::y, GBOX::ymax, GBOX::ymin, POINT4D::z, GBOX::zmax, and GBOX::zmin.

Referenced by ST_ClusterKMeans(), and test_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: