PostGIS  2.4.9dev-r@@SVN_REVISION@@
lwkmeans.c
Go to the documentation of this file.
1 #include <float.h>
2 #include <math.h>
3 
4 #include "kmeans.h"
5 #include "liblwgeom_internal.h"
6 
7 
8 static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
9 {
10  POINT2D *pa = (POINT2D*)a;
11  POINT2D *pb = (POINT2D*)b;
12 
13  double dx = (pa->x - pb->x);
14  double dy = (pa->y - pb->y);
15 
16  return dx*dx + dy*dy;
17 }
18 
19 static int lwkmeans_pt_closest(const Pointer * objs, size_t num_objs, const Pointer a)
20 {
21  int i;
22  double d;
23  double d_closest = FLT_MAX;
24  int closest = -1;
25 
26  assert(num_objs > 0);
27 
28  for (i = 0; i < num_objs; i++)
29  {
30  /* Skip nulls/empties */
31  if (!objs[i])
32  continue;
33 
34  d = lwkmeans_pt_distance(objs[i], a);
35  if (d < d_closest)
36  {
37  d_closest = d;
38  closest = i;
39  }
40  }
41 
42  return closest;
43 }
44 
45 static void lwkmeans_pt_centroid(const Pointer * objs, const int * clusters, size_t num_objs, int cluster, Pointer centroid)
46 {
47  int i;
48  int num_cluster = 0;
49  POINT2D sum;
50  POINT2D **pts = (POINT2D**)objs;
51  POINT2D *center = (POINT2D*)centroid;
52 
53  sum.x = sum.y = 0.0;
54 
55  if (num_objs <= 0) return;
56 
57  for (i = 0; i < num_objs; i++)
58  {
59  /* Skip points that are not of interest */
60  if (clusters[i] != cluster) continue;
61 
62  sum.x += pts[i]->x;
63  sum.y += pts[i]->y;
64  num_cluster++;
65  }
66  if (num_cluster)
67  {
68  sum.x /= num_cluster;
69  sum.y /= num_cluster;
70  *center = sum;
71  }
72  return;
73 }
74 
75 
76 int *
77 lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k)
78 {
79  int i;
80  int num_centroids = 0;
81  LWGEOM **centroids;
82  POINT2D *centers_raw;
83  const POINT2D *cp;
84  POINT2D min = { DBL_MAX, DBL_MAX };
85  POINT2D max = { -DBL_MAX, -DBL_MAX };
86  double dx, dy;
87  kmeans_config config;
88  kmeans_result result;
89  int *seen;
90  int sidx = 0;
91 
92  assert(k>0);
93  assert(ngeoms>0);
94  assert(geoms);
95 
96  /* Initialize our static structs */
97  memset(&config, 0, sizeof(kmeans_config));
98  memset(&result, 0, sizeof(kmeans_result));
99 
100  if (ngeoms<k)
101  {
102  lwerror("%s: number of geometries is less than the number of clusters requested", __func__);
103  }
104 
105  /* We'll hold the temporary centroid objects here */
106  centroids = lwalloc(sizeof(LWGEOM*) * ngeoms);
107  memset(centroids, 0, sizeof(LWGEOM*) * ngeoms);
108 
109  /* The vector of cluster means. We have to allocate a */
110  /* chunk of memory for these because we'll be mutating them */
111  /* in the kmeans algorithm */
112  centers_raw = lwalloc(sizeof(POINT2D) * k);
113  memset(centers_raw, 0, sizeof(POINT2D) * k);
114 
115  /* K-means configuration setup */
116  config.objs = lwalloc(sizeof(Pointer) * ngeoms);
117  config.num_objs = ngeoms;
118  config.clusters = lwalloc(sizeof(int) * ngeoms);
119  config.centers = lwalloc(sizeof(Pointer) * k);
120  config.k = k;
121  config.max_iterations = 0;
124 
125  /* Clean the memory */
126  memset(config.objs, 0, sizeof(Pointer) * ngeoms);
127  memset(config.clusters, 0, sizeof(int) * ngeoms);
128  memset(config.centers, 0, sizeof(Pointer) * k);
129 
130  /* Prepare the list of object pointers for K-means */
131  for (i = 0; i < ngeoms; i++)
132  {
133  const LWGEOM *geom = geoms[i];
134  LWPOINT *lwpoint;
135 
136  /* Null/empty geometries get a NULL pointer */
137  if ((!geom) || lwgeom_is_empty(geom))
138  {
139  config.objs[i] = NULL;
140  continue;
141  }
142 
143  /* If the input is a point, use its coordinates */
144  /* If its not a point, convert it to one via centroid */
145  if (lwgeom_get_type(geom) != POINTTYPE)
146  {
148  if ((!centroid) || lwgeom_is_empty(centroid))
149  {
150  config.objs[i] = NULL;
151  continue;
152  }
153  centroids[num_centroids++] = centroid;
154  lwpoint = lwgeom_as_lwpoint(centroid);
155  }
156  else
157  {
158  lwpoint = lwgeom_as_lwpoint(geom);
159  }
160 
161  /* Store a pointer to the POINT2D we are interested in */
162  cp = getPoint2d_cp(lwpoint->point, 0);
163  config.objs[i] = (Pointer)cp;
164 
165  /* Since we're already here, let's calculate the extrema of the set */
166  if (cp->x < min.x) min.x = cp->x;
167  if (cp->y < min.y) min.y = cp->y;
168  if (cp->x > max.x) max.x = cp->x;
169  if (cp->y > max.y) max.y = cp->y;
170  }
171 
172  /*
173  * We map a uniform assignment of points in the area covered by the set
174  * onto actual points in the set
175  */
176  dx = (max.x - min.x)/k;
177  dy = (max.y - min.y)/k;
178  seen = lwalloc(sizeof(int)*config.k);
179  memset(seen, 0, sizeof(int)*config.k);
180  for (i = 0; i < k; i++)
181  {
182  int closest;
183  POINT2D p;
184  int j;
185 
186  /* Calculate a point in the range */
187  p.x = min.x + dx * (i + 0.5);
188  p.y = min.y + dy * (i + 0.5);
189 
190  /* Find the data point closest to the calculated point */
191  closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p);
192 
193  /* If something is terrible wrong w/ data, cannot find a closest */
194  if (closest < 0)
195  lwerror("unable to calculate cluster seed points, too many NULLs or empties?");
196 
197  /* Ensure we aren't already using that point as a seed */
198  j = 0;
199  while(j < sidx)
200  {
201  if (seen[j] == closest)
202  {
203  int k, t;
204  for (k = 1; k < config.num_objs; k++)
205  {
206  t = (closest + k) % config.num_objs;
207  if (config.objs[t])
208  {
209  closest = t;
210  break;
211  }
212  }
213  j = 0;
214  }
215  else
216  {
217  j++;
218  }
219  }
220  seen[sidx++] = closest;
221 
222  /* Copy the point coordinates into the initial centers array */
223  /* This is ugly, but the centers array is an array of */
224  /* pointers to points, not an array of points */
225  centers_raw[i] = *((POINT2D*)config.objs[closest]);
226  config.centers[i] = &(centers_raw[i]);
227  }
228 
229  result = kmeans(&config);
230 
231  /* Before error handling, might as well clean up all the inputs */
232  lwfree(config.objs);
233  lwfree(config.centers);
234  lwfree(centers_raw);
235  lwfree(centroids);
236  lwfree(seen);
237 
238  /* Good result */
239  if (result == KMEANS_OK)
240  return config.clusters;
241 
242  /* Bad result, not going to need the answer */
243  lwfree(config.clusters);
244  if (result == KMEANS_EXCEEDED_MAX_ITERATIONS)
245  {
246  lwerror("%s did not converge after %d iterations", __func__, config.max_iterations);
247  return NULL;
248  }
249 
250  /* Unknown error */
251  return NULL;
252 }
253 
int * clusters
Definition: kmeans.h:120
void lwfree(void *mem)
Definition: lwutil.c:244
void * Pointer
Definition: kmeans.h:59
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:878
kmeans_result kmeans(kmeans_config *config)
Definition: kmeans.c:249
kmeans_result
Definition: kmeans.h:61
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:129
POINTARRAY * point
Definition: liblwgeom.h:411
static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
Definition: lwkmeans.c:8
double x
Definition: liblwgeom.h:328
int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
Definition: lwkmeans.c:77
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, int n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:373
static void lwkmeans_pt_centroid(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition: lwkmeans.c:45
Pointer * centers
Definition: kmeans.h:107
double y
Definition: liblwgeom.h:328
unsigned int max_iterations
Definition: kmeans.h:114
static int lwkmeans_pt_closest(const Pointer *objs, size_t num_objs, const Pointer a)
Definition: lwkmeans.c:19
size_t num_objs
Definition: kmeans.h:100
unsigned int k
Definition: kmeans.h:110
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:85
Pointer * objs
Definition: kmeans.h:97
void * lwalloc(size_t size)
Definition: lwutil.c:229
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1346
kmeans_distance_method distance_method
Definition: kmeans.h:85
kmeans_centroid_method centroid_method
Definition: kmeans.h:88
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
if(!(yy_init))