PostGIS  2.5.0dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_2d_kmeans()

int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
uint32_t  ngeoms,
uint32_t  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 242 of file lwkmeans.c.

References centroid(), getPoint2d_cp(), kmeans(), kmeans_init(), LW_FALSE, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_free(), lwgeom_get_type(), lwgeom_is_empty(), lwnotice(), LWPOINT::point, and POINTTYPE.

Referenced by ST_ClusterKMeans(), and test_kmeans().

243 {
244  uint32_t i;
245  uint32_t num_centroids = 0;
246  uint32_t num_non_empty = 0;
247  LWGEOM** centroids;
248  POINT2D* centers_raw;
249  const POINT2D* cp;
250  int result = LW_FALSE;
251 
252  /* An array of objects to be analyzed.
253  * All NULL values will be returned in the KMEANS_NULL_CLUSTER. */
254  POINT2D** objs;
255 
256  /* An array of centers for the algorithm. */
257  POINT2D** centers;
258 
259  /* Array to fill in with cluster numbers. */
260  int* clusters;
261 
262  assert(k > 0);
263  assert(n > 0);
264  assert(geoms);
265 
266  if (n < k)
267  {
268  lwerror("%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
269  k = n;
270  }
271 
272  /* We'll hold the temporary centroid objects here */
273  centroids = lwalloc(sizeof(LWGEOM*) * n);
274  memset(centroids, 0, sizeof(LWGEOM*) * n);
275 
276  /* The vector of cluster means. We have to allocate a chunk of memory for these because we'll be mutating them
277  * in the kmeans algorithm */
278  centers_raw = lwalloc(sizeof(POINT2D) * k);
279  memset(centers_raw, 0, sizeof(POINT2D) * k);
280 
281  /* K-means configuration setup */
282  objs = lwalloc(sizeof(POINT2D*) * n);
283  clusters = lwalloc(sizeof(int) * n);
284  centers = lwalloc(sizeof(POINT2D*) * k);
285 
286  /* Clean the memory */
287  memset(objs, 0, sizeof(POINT2D*) * n);
288  memset(clusters, 0, sizeof(int) * n);
289  memset(centers, 0, sizeof(POINT2D*) * k);
290 
291  /* Prepare the list of object pointers for K-means */
292  for (i = 0; i < n; i++)
293  {
294  const LWGEOM* geom = geoms[i];
295  LWPOINT* lwpoint;
296 
297  /* Null/empty geometries get a NULL pointer, set earlier with memset */
298  if ((!geom) || lwgeom_is_empty(geom)) continue;
299 
300  /* If the input is a point, use its coordinates */
301  /* If its not a point, convert it to one via centroid */
302  if (lwgeom_get_type(geom) != POINTTYPE)
303  {
305  if ((!centroid)) continue;
306  if (lwgeom_is_empty(centroid))
307  {
308  lwgeom_free(centroid);
309  continue;
310  }
311  centroids[num_centroids++] = centroid;
312  lwpoint = lwgeom_as_lwpoint(centroid);
313  }
314  else
315  lwpoint = lwgeom_as_lwpoint(geom);
316 
317  /* Store a pointer to the POINT2D we are interested in */
318  cp = getPoint2d_cp(lwpoint->point, 0);
319  objs[i] = (POINT2D*)cp;
320  num_non_empty++;
321  }
322 
323  if (num_non_empty < k)
324  {
325  lwnotice("%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
326  k = num_non_empty;
327  }
328 
329  kmeans_init(objs, clusters, n, centers, centers_raw, k);
330 
331  result = kmeans(objs, clusters, n, centers, k);
332 
333  /* Before error handling, might as well clean up all the inputs */
334  lwfree(objs);
335  lwfree(centers);
336  lwfree(centers_raw);
337  lwfree(centroids);
338 
339  /* Good result */
340  if (result) return clusters;
341 
342  /* Bad result, not going to need the answer */
343  lwfree(clusters);
344  return NULL;
345 }
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
void lwfree(void *mem)
Definition: lwutil.c:244
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:923
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1144
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
static int kmeans(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
Definition: lwkmeans.c:94
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:161
static void kmeans_init(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
Definition: lwkmeans.c:129
POINTARRAY * point
Definition: liblwgeom.h:413
unsigned int uint32_t
Definition: uthash.h:78
#define LW_FALSE
Definition: liblwgeom.h:76
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:84
void * lwalloc(size_t size)
Definition: lwutil.c:229
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1393
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:364
Here is the call graph for this function:
Here is the caller graph for this function: