PostGIS  3.0.6dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_2d_kmeans()

int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
uint32_t  ngeoms,
uint32_t  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 244 of file lwkmeans.c.

245 {
246  uint32_t i;
247  uint32_t num_centroids = 0;
248  uint32_t num_non_empty = 0;
249  LWGEOM** centroids;
250  POINT2D* centers_raw;
251  const POINT2D* cp;
252  int result = LW_FALSE;
253 
254  /* An array of objects to be analyzed.
255  * All NULL values will be returned in the KMEANS_NULL_CLUSTER. */
256  POINT2D** objs;
257 
258  /* An array of centers for the algorithm. */
259  POINT2D** centers;
260 
261  /* Array to fill in with cluster numbers. */
262  int* clusters;
263 
264  assert(k > 0);
265  assert(n > 0);
266  assert(geoms);
267 
268  if (n < k)
269  {
270  lwerror("%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
271  k = n;
272  }
273 
274  /* We'll hold the temporary centroid objects here */
275  centroids = lwalloc(sizeof(LWGEOM*) * n);
276  memset(centroids, 0, sizeof(LWGEOM*) * n);
277 
278  /* The vector of cluster means. We have to allocate a chunk of memory for these because we'll be mutating them
279  * in the kmeans algorithm */
280  centers_raw = lwalloc(sizeof(POINT2D) * k);
281  memset(centers_raw, 0, sizeof(POINT2D) * k);
282 
283  /* K-means configuration setup */
284  objs = lwalloc(sizeof(POINT2D*) * n);
285  clusters = lwalloc(sizeof(int) * n);
286  centers = lwalloc(sizeof(POINT2D*) * k);
287 
288  /* Clean the memory */
289  memset(objs, 0, sizeof(POINT2D*) * n);
290  memset(clusters, 0, sizeof(int) * n);
291  memset(centers, 0, sizeof(POINT2D*) * k);
292 
293  /* Prepare the list of object pointers for K-means */
294  for (i = 0; i < n; i++)
295  {
296  const LWGEOM* geom = geoms[i];
297  LWPOINT* lwpoint;
298 
299  /* Null/empty geometries get a NULL pointer, set earlier with memset */
300  if ((!geom) || lwgeom_is_empty(geom)) continue;
301 
302  /* If the input is a point, use its coordinates */
303  /* If its not a point, convert it to one via centroid */
304  if (lwgeom_get_type(geom) != POINTTYPE)
305  {
307  if ((!centroid)) continue;
309  {
311  continue;
312  }
313  centroids[num_centroids++] = centroid;
314  lwpoint = lwgeom_as_lwpoint(centroid);
315  }
316  else
317  lwpoint = lwgeom_as_lwpoint(geom);
318 
319  /* Store a pointer to the POINT2D we are interested in */
320  cp = getPoint2d_cp(lwpoint->point, 0);
321  objs[i] = (POINT2D*)cp;
322  num_non_empty++;
323  }
324 
325  if (num_non_empty < k)
326  {
327  lwnotice("%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
328  k = num_non_empty;
329  }
330 
331  if (k > 1)
332  {
333  kmeans_init(objs, n, centers, centers_raw, k);
334  result = kmeans(objs, clusters, n, centers, k);
335  }
336  else
337  {
338  /* k=0: everything is unclusterable
339  * k=1: mark up NULL and non-NULL */
340  for (i = 0; i < n; i++)
341  {
342  if (k == 0 || !objs[i])
343  clusters[i] = KMEANS_NULL_CLUSTER;
344  else
345  clusters[i] = 0;
346  }
347  result = LW_TRUE;
348  }
349 
350  /* Before error handling, might as well clean up all the inputs */
351  lwfree(objs);
352  lwfree(centers);
353  lwfree(centers_raw);
354  lwfree(centroids);
355 
356  /* Good result */
357  if (result) return clusters;
358 
359  /* Bad result, not going to need the answer */
360  lwfree(clusters);
361  return NULL;
362 }
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
#define LW_FALSE
Definition: liblwgeom.h:108
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1138
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:116
void lwfree(void *mem)
Definition: lwutil.c:242
void * lwalloc(size_t size)
Definition: lwutil.c:227
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:107
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from.
Definition: lwinline.h:91
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwinline.h:135
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition: lwinline.h:193
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwinline.h:121
static void kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
Definition: lwkmeans.c:129
#define KMEANS_NULL_CLUSTER
Definition: lwkmeans.c:14
static int kmeans(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
Definition: lwkmeans.c:94
Datum centroid(PG_FUNCTION_ARGS)
POINTARRAY * point
Definition: liblwgeom.h:457

References centroid(), getPoint2d_cp(), kmeans(), kmeans_init(), KMEANS_NULL_CLUSTER, LW_FALSE, LW_TRUE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_type(), lwgeom_is_empty(), lwnotice(), LWPOINT::point, and POINTTYPE.

Referenced by ST_ClusterKMeans(), and test_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: