PostGIS  2.3.8dev-r@@SVN_REVISION@@
lwkmeans.c
Go to the documentation of this file.
1 #include <float.h>
2 #include <math.h>
3 
4 #include "kmeans.h"
5 #include "liblwgeom_internal.h"
6 
7 
8 static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
9 {
10  POINT2D *pa = (POINT2D*)a;
11  POINT2D *pb = (POINT2D*)b;
12 
13  double dx = (pa->x - pb->x);
14  double dy = (pa->y - pb->y);
15 
16  return dx*dx + dy*dy;
17 }
18 
19 static int lwkmeans_pt_closest(const Pointer * objs, size_t num_objs, const Pointer a)
20 {
21  int i;
22  double d;
23  double d_closest = FLT_MAX;
24  int closest = -1;
25 
26  assert(num_objs > 0);
27 
28  for (i = 0; i < num_objs; i++)
29  {
30  /* Skip nulls/empties */
31  if (!objs[i])
32  continue;
33 
34  d = lwkmeans_pt_distance(objs[i], a);
35  if (d < d_closest)
36  {
37  d_closest = d;
38  closest = i;
39  }
40  }
41 
42  return closest;
43 }
44 
45 static void lwkmeans_pt_centroid(const Pointer * objs, const int * clusters, size_t num_objs, int cluster, Pointer centroid)
46 {
47  int i;
48  int num_cluster = 0;
49  POINT2D sum;
50  POINT2D **pts = (POINT2D**)objs;
51  POINT2D *center = (POINT2D*)centroid;
52 
53  sum.x = sum.y = 0.0;
54 
55  if (num_objs <= 0) return;
56 
57  for (i = 0; i < num_objs; i++)
58  {
59  /* Skip points that are not of interest */
60  if (clusters[i] != cluster) continue;
61 
62  sum.x += pts[i]->x;
63  sum.y += pts[i]->y;
64  num_cluster++;
65  }
66  if (num_cluster)
67  {
68  sum.x /= num_cluster;
69  sum.y /= num_cluster;
70  *center = sum;
71  }
72  return;
73 }
74 
75 
76 int *
77 lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k)
78 {
79  int i;
80  int num_centroids = 0;
81  LWGEOM **centroids;
82  POINT2D *centers_raw;
83  const POINT2D *cp;
84  POINT2D min = { DBL_MAX, DBL_MAX };
85  POINT2D max = { -DBL_MAX, -DBL_MAX };
86  double dx, dy;
87  kmeans_config config;
88  kmeans_result result;
89  int *seen;
90  int sidx = 0;
91 
92  assert(k>0);
93  assert(ngeoms>0);
94  assert(geoms);
95 
96  if (ngeoms<k)
97  {
98  lwerror("%s: number of geometries is less than the number of clusters requested", __func__);
99  }
100 
101  /* We'll hold the temporary centroid objects here */
102  centroids = lwalloc(sizeof(LWGEOM*) * ngeoms);
103 
104  /* The vector of cluster means. We have to allocate a */
105  /* chunk of memory for these because we'll be mutating them */
106  /* in the kmeans algorithm */
107  centers_raw = lwalloc(sizeof(POINT2D) * k);
108 
109  /* K-means configuration setup */
110  config.objs = lwalloc(sizeof(Pointer) * ngeoms);
111  config.num_objs = ngeoms;
112  config.clusters = lwalloc(sizeof(int) * ngeoms);
113  config.centers = lwalloc(sizeof(Pointer) * k);
114  config.k = k;
115  config.max_iterations = 0;
118 
119  /* Clean the memory */
120  memset(config.objs, 0, sizeof(Pointer) * ngeoms);
121  memset(config.clusters, 0, sizeof(int) * ngeoms);
122  memset(config.centers, 0, sizeof(Pointer) * k);
123 
124  /* Prepare the list of object pointers for K-means */
125  for (i = 0; i < ngeoms; i++)
126  {
127  const LWGEOM *geom = geoms[i];
128  LWPOINT *lwpoint;
129 
130  /* Null/empty geometries get a NULL pointer */
131  if ((!geom) || lwgeom_is_empty(geom))
132  {
133  config.objs[i] = NULL;
134  continue;
135  }
136 
137  /* If the input is a point, use its coordinates */
138  /* If its not a point, convert it to one via centroid */
139  if (lwgeom_get_type(geom) != POINTTYPE)
140  {
142  if ((!centroid) || lwgeom_is_empty(centroid))
143  {
144  config.objs[i] = NULL;
145  continue;
146  }
147  centroids[num_centroids++] = centroid;
148  lwpoint = lwgeom_as_lwpoint(centroid);
149  }
150  else
151  {
152  lwpoint = lwgeom_as_lwpoint(geom);
153  }
154 
155  /* Store a pointer to the POINT2D we are interested in */
156  cp = getPoint2d_cp(lwpoint->point, 0);
157  config.objs[i] = (Pointer)cp;
158 
159  /* Since we're already here, let's calculate the extrema of the set */
160  if (cp->x < min.x) min.x = cp->x;
161  if (cp->y < min.y) min.y = cp->y;
162  if (cp->x > max.x) max.x = cp->x;
163  if (cp->y > max.y) max.y = cp->y;
164  }
165 
166  /*
167  * We map a uniform assignment of points in the area covered by the set
168  * onto actual points in the set
169  */
170  dx = (max.x - min.x)/k;
171  dy = (max.y - min.y)/k;
172  seen = lwalloc(sizeof(int)*config.k);
173  for (i = 0; i < k; i++)
174  {
175  int closest;
176  POINT2D p;
177  int j;
178 
179  /* Calculate a point in the range */
180  p.x = min.x + dx * (i + 0.5);
181  p.y = min.y + dy * (i + 0.5);
182 
183  /* Find the data point closest to the calculated point */
184  closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p);
185 
186  /* If something is terrible wrong w/ data, cannot find a closest */
187  if (closest < 0)
188  lwerror("unable to calculate cluster seed points, too many NULLs or empties?");
189 
190  /* Ensure we aren't already using that point as a seed */
191  j = 0;
192  while(j < sidx)
193  {
194  if (seen[j] == closest)
195  {
196  int k, t;
197  for (k = 1; k < config.num_objs; k++)
198  {
199  t = (closest + k) % config.num_objs;
200  if (config.objs[t])
201  {
202  closest = t;
203  break;
204  }
205  }
206  j = 0;
207  }
208  else
209  {
210  j++;
211  }
212  }
213  seen[sidx++] = closest;
214 
215  /* Copy the point coordinates into the initial centers array */
216  /* This is ugly, but the centers array is an array of */
217  /* pointers to points, not an array of points */
218  centers_raw[i] = *((POINT2D*)config.objs[closest]);
219  config.centers[i] = &(centers_raw[i]);
220  }
221 
222  result = kmeans(&config);
223 
224  /* Before error handling, might as well clean up all the inputs */
225  lwfree(config.objs);
226  lwfree(config.centers);
227  lwfree(centers_raw);
228  lwfree(centroids);
229  lwfree(seen);
230 
231  /* Good result */
232  if (result == KMEANS_OK)
233  return config.clusters;
234 
235  /* Bad result, not going to need the answer */
236  lwfree(config.clusters);
237  if (result == KMEANS_EXCEEDED_MAX_ITERATIONS)
238  {
239  lwerror("%s did not converge after %d iterations", __func__, config.max_iterations);
240  return NULL;
241  }
242 
243  /* Unknown error */
244  return NULL;
245 }
246 
int * clusters
Definition: kmeans.h:120
void lwfree(void *mem)
Definition: lwutil.c:242
void * Pointer
Definition: kmeans.h:59
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:842
kmeans_result kmeans(kmeans_config *config)
Definition: kmeans.c:249
kmeans_result
Definition: kmeans.h:61
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:93
POINTARRAY * point
Definition: liblwgeom.h:410
static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
Definition: lwkmeans.c:8
double x
Definition: liblwgeom.h:327
int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
Definition: lwkmeans.c:77
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, int n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:485
static void lwkmeans_pt_centroid(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition: lwkmeans.c:45
Pointer * centers
Definition: kmeans.h:107
double y
Definition: liblwgeom.h:327
unsigned int max_iterations
Definition: kmeans.h:114
static int lwkmeans_pt_closest(const Pointer *objs, size_t num_objs, const Pointer a)
Definition: lwkmeans.c:19
size_t num_objs
Definition: kmeans.h:100
unsigned int k
Definition: kmeans.h:110
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:84
Pointer * objs
Definition: kmeans.h:97
void * lwalloc(size_t size)
Definition: lwutil.c:227
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1310
kmeans_distance_method distance_method
Definition: kmeans.h:85
kmeans_centroid_method centroid_method
Definition: kmeans.h:88
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:102
if(!(yy_init))