PostGIS  2.5.0rc1-r@@SVN_REVISION@@

◆ lwgeom_cluster_2d_kmeans()

int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
uint32_t  ngeoms,
uint32_t  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 237 of file lwkmeans.c.

References centroid(), getPoint2d_cp(), kmeans(), kmeans_init(), KMEANS_NULL_CLUSTER, LW_FALSE, LW_TRUE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_type(), lwgeom_is_empty(), lwnotice(), LWPOINT::point, and POINTTYPE.

Referenced by ST_ClusterKMeans(), and test_kmeans().

238 {
239  uint32_t i;
240  uint32_t num_centroids = 0;
241  uint32_t num_non_empty = 0;
242  LWGEOM** centroids;
243  POINT2D* centers_raw;
244  const POINT2D* cp;
245  int result = LW_FALSE;
246 
247  /* An array of objects to be analyzed.
248  * All NULL values will be returned in the KMEANS_NULL_CLUSTER. */
249  POINT2D** objs;
250 
251  /* An array of centers for the algorithm. */
252  POINT2D** centers;
253 
254  /* Array to fill in with cluster numbers. */
255  int* clusters;
256 
257  assert(k > 0);
258  assert(n > 0);
259  assert(geoms);
260 
261  if (n < k)
262  {
263  lwerror("%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
264  k = n;
265  }
266 
267  /* We'll hold the temporary centroid objects here */
268  centroids = lwalloc(sizeof(LWGEOM*) * n);
269  memset(centroids, 0, sizeof(LWGEOM*) * n);
270 
271  /* The vector of cluster means. We have to allocate a chunk of memory for these because we'll be mutating them
272  * in the kmeans algorithm */
273  centers_raw = lwalloc(sizeof(POINT2D) * k);
274  memset(centers_raw, 0, sizeof(POINT2D) * k);
275 
276  /* K-means configuration setup */
277  objs = lwalloc(sizeof(POINT2D*) * n);
278  clusters = lwalloc(sizeof(int) * n);
279  centers = lwalloc(sizeof(POINT2D*) * k);
280 
281  /* Clean the memory */
282  memset(objs, 0, sizeof(POINT2D*) * n);
283  memset(clusters, 0, sizeof(int) * n);
284  memset(centers, 0, sizeof(POINT2D*) * k);
285 
286  /* Prepare the list of object pointers for K-means */
287  for (i = 0; i < n; i++)
288  {
289  const LWGEOM* geom = geoms[i];
290  LWPOINT* lwpoint;
291 
292  /* Null/empty geometries get a NULL pointer, set earlier with memset */
293  if ((!geom) || lwgeom_is_empty(geom)) continue;
294 
295  /* If the input is a point, use its coordinates */
296  /* If its not a point, convert it to one via centroid */
297  if (lwgeom_get_type(geom) != POINTTYPE)
298  {
300  if ((!centroid)) continue;
301  if (lwgeom_is_empty(centroid))
302  {
303  lwgeom_free(centroid);
304  continue;
305  }
306  centroids[num_centroids++] = centroid;
307  lwpoint = lwgeom_as_lwpoint(centroid);
308  }
309  else
310  lwpoint = lwgeom_as_lwpoint(geom);
311 
312  /* Store a pointer to the POINT2D we are interested in */
313  cp = getPoint2d_cp(lwpoint->point, 0);
314  objs[i] = (POINT2D*)cp;
315  num_non_empty++;
316  }
317 
318  if (num_non_empty < k)
319  {
320  lwnotice("%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
321  k = num_non_empty;
322  }
323 
324  if (k > 1)
325  {
326  kmeans_init(objs, clusters, n, centers, centers_raw, k);
327  result = kmeans(objs, clusters, n, centers, k);
328  }
329  else
330  {
331  /* k=0: everythong is unclusterable
332  * k=1: mark up NULL and non-NULL */
333  for (i = 0; i < n; i++)
334  {
335  if (k == 0 || !objs[i])
336  clusters[i] = KMEANS_NULL_CLUSTER;
337  else
338  clusters[i] = 0;
339  }
340  result = LW_TRUE;
341  }
342 
343  /* Before error handling, might as well clean up all the inputs */
344  lwfree(objs);
345  lwfree(centers);
346  lwfree(centers_raw);
347  lwfree(centroids);
348 
349  /* Good result */
350  if (result) return clusters;
351 
352  /* Bad result, not going to need the answer */
353  lwfree(clusters);
354  return NULL;
355 }
#define KMEANS_NULL_CLUSTER
Definition: lwkmeans.c:17
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
void lwfree(void *mem)
Definition: lwutil.c:244
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:923
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1144
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
static int kmeans(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
Definition: lwkmeans.c:94
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:161
static void kmeans_init(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
Definition: lwkmeans.c:129
POINTARRAY * point
Definition: liblwgeom.h:413
unsigned int uint32_t
Definition: uthash.h:78
#define LW_FALSE
Definition: liblwgeom.h:76
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:75
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:84
void * lwalloc(size_t size)
Definition: lwutil.c:229
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1393
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:364
Here is the call graph for this function:
Here is the caller graph for this function: