PostGIS  2.4.9dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_2d_kmeans()

int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
int  ngeoms,
int  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 77 of file lwkmeans.c.

References kmeans_config::centers, centroid(), kmeans_config::centroid_method, kmeans_config::clusters, kmeans_config::distance_method, getPoint2d_cp(), if(), kmeans_config::k, kmeans(), KMEANS_EXCEEDED_MAX_ITERATIONS, KMEANS_OK, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_get_type(), lwgeom_is_empty(), lwkmeans_pt_centroid(), lwkmeans_pt_closest(), lwkmeans_pt_distance(), kmeans_config::max_iterations, kmeans_config::num_objs, kmeans_config::objs, LWPOINT::point, POINTTYPE, POINT2D::x, and POINT2D::y.

Referenced by ST_ClusterKMeans(), and test_kmeans().

78 {
79  int i;
80  int num_centroids = 0;
81  LWGEOM **centroids;
82  POINT2D *centers_raw;
83  const POINT2D *cp;
84  POINT2D min = { DBL_MAX, DBL_MAX };
85  POINT2D max = { -DBL_MAX, -DBL_MAX };
86  double dx, dy;
87  kmeans_config config;
88  kmeans_result result;
89  int *seen;
90  int sidx = 0;
91 
92  assert(k>0);
93  assert(ngeoms>0);
94  assert(geoms);
95 
96  /* Initialize our static structs */
97  memset(&config, 0, sizeof(kmeans_config));
98  memset(&result, 0, sizeof(kmeans_result));
99 
100  if (ngeoms<k)
101  {
102  lwerror("%s: number of geometries is less than the number of clusters requested", __func__);
103  }
104 
105  /* We'll hold the temporary centroid objects here */
106  centroids = lwalloc(sizeof(LWGEOM*) * ngeoms);
107  memset(centroids, 0, sizeof(LWGEOM*) * ngeoms);
108 
109  /* The vector of cluster means. We have to allocate a */
110  /* chunk of memory for these because we'll be mutating them */
111  /* in the kmeans algorithm */
112  centers_raw = lwalloc(sizeof(POINT2D) * k);
113  memset(centers_raw, 0, sizeof(POINT2D) * k);
114 
115  /* K-means configuration setup */
116  config.objs = lwalloc(sizeof(Pointer) * ngeoms);
117  config.num_objs = ngeoms;
118  config.clusters = lwalloc(sizeof(int) * ngeoms);
119  config.centers = lwalloc(sizeof(Pointer) * k);
120  config.k = k;
121  config.max_iterations = 0;
124 
125  /* Clean the memory */
126  memset(config.objs, 0, sizeof(Pointer) * ngeoms);
127  memset(config.clusters, 0, sizeof(int) * ngeoms);
128  memset(config.centers, 0, sizeof(Pointer) * k);
129 
130  /* Prepare the list of object pointers for K-means */
131  for (i = 0; i < ngeoms; i++)
132  {
133  const LWGEOM *geom = geoms[i];
134  LWPOINT *lwpoint;
135 
136  /* Null/empty geometries get a NULL pointer */
137  if ((!geom) || lwgeom_is_empty(geom))
138  {
139  config.objs[i] = NULL;
140  continue;
141  }
142 
143  /* If the input is a point, use its coordinates */
144  /* If its not a point, convert it to one via centroid */
145  if (lwgeom_get_type(geom) != POINTTYPE)
146  {
148  if ((!centroid) || lwgeom_is_empty(centroid))
149  {
150  config.objs[i] = NULL;
151  continue;
152  }
153  centroids[num_centroids++] = centroid;
154  lwpoint = lwgeom_as_lwpoint(centroid);
155  }
156  else
157  {
158  lwpoint = lwgeom_as_lwpoint(geom);
159  }
160 
161  /* Store a pointer to the POINT2D we are interested in */
162  cp = getPoint2d_cp(lwpoint->point, 0);
163  config.objs[i] = (Pointer)cp;
164 
165  /* Since we're already here, let's calculate the extrema of the set */
166  if (cp->x < min.x) min.x = cp->x;
167  if (cp->y < min.y) min.y = cp->y;
168  if (cp->x > max.x) max.x = cp->x;
169  if (cp->y > max.y) max.y = cp->y;
170  }
171 
172  /*
173  * We map a uniform assignment of points in the area covered by the set
174  * onto actual points in the set
175  */
176  dx = (max.x - min.x)/k;
177  dy = (max.y - min.y)/k;
178  seen = lwalloc(sizeof(int)*config.k);
179  memset(seen, 0, sizeof(int)*config.k);
180  for (i = 0; i < k; i++)
181  {
182  int closest;
183  POINT2D p;
184  int j;
185 
186  /* Calculate a point in the range */
187  p.x = min.x + dx * (i + 0.5);
188  p.y = min.y + dy * (i + 0.5);
189 
190  /* Find the data point closest to the calculated point */
191  closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p);
192 
193  /* If something is terrible wrong w/ data, cannot find a closest */
194  if (closest < 0)
195  lwerror("unable to calculate cluster seed points, too many NULLs or empties?");
196 
197  /* Ensure we aren't already using that point as a seed */
198  j = 0;
199  while(j < sidx)
200  {
201  if (seen[j] == closest)
202  {
203  int k, t;
204  for (k = 1; k < config.num_objs; k++)
205  {
206  t = (closest + k) % config.num_objs;
207  if (config.objs[t])
208  {
209  closest = t;
210  break;
211  }
212  }
213  j = 0;
214  }
215  else
216  {
217  j++;
218  }
219  }
220  seen[sidx++] = closest;
221 
222  /* Copy the point coordinates into the initial centers array */
223  /* This is ugly, but the centers array is an array of */
224  /* pointers to points, not an array of points */
225  centers_raw[i] = *((POINT2D*)config.objs[closest]);
226  config.centers[i] = &(centers_raw[i]);
227  }
228 
229  result = kmeans(&config);
230 
231  /* Before error handling, might as well clean up all the inputs */
232  lwfree(config.objs);
233  lwfree(config.centers);
234  lwfree(centers_raw);
235  lwfree(centroids);
236  lwfree(seen);
237 
238  /* Good result */
239  if (result == KMEANS_OK)
240  return config.clusters;
241 
242  /* Bad result, not going to need the answer */
243  lwfree(config.clusters);
244  if (result == KMEANS_EXCEEDED_MAX_ITERATIONS)
245  {
246  lwerror("%s did not converge after %d iterations", __func__, config.max_iterations);
247  return NULL;
248  }
249 
250  /* Unknown error */
251  return NULL;
252 }
int * clusters
Definition: kmeans.h:120
void lwfree(void *mem)
Definition: lwutil.c:244
void * Pointer
Definition: kmeans.h:59
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:878
kmeans_result kmeans(kmeans_config *config)
Definition: kmeans.c:249
kmeans_result
Definition: kmeans.h:61
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:129
POINTARRAY * point
Definition: liblwgeom.h:411
static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
Definition: lwkmeans.c:8
double x
Definition: liblwgeom.h:328
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, int n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:373
static void lwkmeans_pt_centroid(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition: lwkmeans.c:45
Pointer * centers
Definition: kmeans.h:107
double y
Definition: liblwgeom.h:328
unsigned int max_iterations
Definition: kmeans.h:114
static int lwkmeans_pt_closest(const Pointer *objs, size_t num_objs, const Pointer a)
Definition: lwkmeans.c:19
size_t num_objs
Definition: kmeans.h:100
unsigned int k
Definition: kmeans.h:110
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:85
Pointer * objs
Definition: kmeans.h:97
void * lwalloc(size_t size)
Definition: lwutil.c:229
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1346
kmeans_distance_method distance_method
Definition: kmeans.h:85
kmeans_centroid_method centroid_method
Definition: kmeans.h:88
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:190
if(!(yy_init))
Here is the call graph for this function:
Here is the caller graph for this function: