PostGIS  2.3.7dev-r@@SVN_REVISION@@
int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
int  ngeoms,
int  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 77 of file lwkmeans.c.

References kmeans_config::centers, centroid(), kmeans_config::centroid_method, kmeans_config::clusters, kmeans_config::distance_method, getPoint2d_cp(), kmeans_config::k, kmeans(), KMEANS_EXCEEDED_MAX_ITERATIONS, KMEANS_OK, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_get_type(), lwgeom_is_empty(), lwkmeans_pt_centroid(), lwkmeans_pt_closest(), lwkmeans_pt_distance(), kmeans_config::max_iterations, kmeans_config::num_objs, kmeans_config::objs, LWPOINT::point, POINTTYPE, POINT2D::x, and POINT2D::y.

Referenced by ST_ClusterKMeans(), and test_kmeans().

78 {
79  int i;
80  int num_centroids = 0;
81  LWGEOM **centroids;
82  POINT2D *centers_raw;
83  const POINT2D *cp;
84  POINT2D min = { DBL_MAX, DBL_MAX };
85  POINT2D max = { -DBL_MAX, -DBL_MAX };
86  double dx, dy;
87  kmeans_config config;
88  kmeans_result result;
89  int *seen;
90  int sidx = 0;
91 
92  assert(k>0);
93  assert(ngeoms>0);
94  assert(geoms);
95 
96  if (ngeoms<k)
97  {
98  lwerror("%s: number of geometries is less than the number of clusters requested", __func__);
99  }
100 
101  /* We'll hold the temporary centroid objects here */
102  centroids = lwalloc(sizeof(LWGEOM*) * ngeoms);
103 
104  /* The vector of cluster means. We have to allocate a */
105  /* chunk of memory for these because we'll be mutating them */
106  /* in the kmeans algorithm */
107  centers_raw = lwalloc(sizeof(POINT2D) * k);
108 
109  /* K-means configuration setup */
110  config.objs = lwalloc(sizeof(Pointer) * ngeoms);
111  config.num_objs = ngeoms;
112  config.clusters = lwalloc(sizeof(int) * ngeoms);
113  config.centers = lwalloc(sizeof(Pointer) * k);
114  config.k = k;
115  config.max_iterations = 0;
118 
119  /* Clean the memory */
120  memset(config.objs, 0, sizeof(Pointer) * ngeoms);
121  memset(config.clusters, 0, sizeof(int) * ngeoms);
122  memset(config.centers, 0, sizeof(Pointer) * k);
123 
124  /* Prepare the list of object pointers for K-means */
125  for (i = 0; i < ngeoms; i++)
126  {
127  const LWGEOM *geom = geoms[i];
128  LWPOINT *lwpoint;
129 
130  /* Null/empty geometries get a NULL pointer */
131  if ((!geom) || lwgeom_is_empty(geom))
132  {
133  config.objs[i] = NULL;
134  continue;
135  }
136 
137  /* If the input is a point, use its coordinates */
138  /* If its not a point, convert it to one via centroid */
139  if (lwgeom_get_type(geom) != POINTTYPE)
140  {
142  if ((!centroid) || lwgeom_is_empty(centroid))
143  {
144  config.objs[i] = NULL;
145  continue;
146  }
147  centroids[num_centroids++] = centroid;
148  lwpoint = lwgeom_as_lwpoint(centroid);
149  }
150  else
151  {
152  lwpoint = lwgeom_as_lwpoint(geom);
153  }
154 
155  /* Store a pointer to the POINT2D we are interested in */
156  cp = getPoint2d_cp(lwpoint->point, 0);
157  config.objs[i] = (Pointer)cp;
158 
159  /* Since we're already here, let's calculate the extrema of the set */
160  if (cp->x < min.x) min.x = cp->x;
161  if (cp->y < min.y) min.y = cp->y;
162  if (cp->x > max.x) max.x = cp->x;
163  if (cp->y > max.y) max.y = cp->y;
164  }
165 
166  /*
167  * We map a uniform assignment of points in the area covered by the set
168  * onto actual points in the set
169  */
170  dx = (max.x - min.x)/k;
171  dy = (max.y - min.y)/k;
172  seen = lwalloc(sizeof(int)*config.k);
173  for (i = 0; i < k; i++)
174  {
175  int closest;
176  POINT2D p;
177  int j;
178 
179  /* Calculate a point in the range */
180  p.x = min.x + dx * (i + 0.5);
181  p.y = min.y + dy * (i + 0.5);
182 
183  /* Find the data point closest to the calculated point */
184  closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p);
185 
186  /* If something is terrible wrong w/ data, cannot find a closest */
187  if (closest < 0)
188  lwerror("unable to calculate cluster seed points, too many NULLs or empties?");
189 
190  /* Ensure we aren't already using that point as a seed */
191  j = 0;
192  while(j < sidx)
193  {
194  if (seen[j] == closest)
195  {
196  closest = (closest + 1) % config.num_objs;
197  j = 0;
198  }
199  else
200  {
201  j++;
202  }
203  }
204  seen[sidx++] = closest;
205 
206  /* Copy the point coordinates into the initial centers array */
207  /* This is ugly, but the centers array is an array of */
208  /* pointers to points, not an array of points */
209  centers_raw[i] = *((POINT2D*)config.objs[closest]);
210  config.centers[i] = &(centers_raw[i]);
211  }
212 
213  result = kmeans(&config);
214 
215  /* Before error handling, might as well clean up all the inputs */
216  lwfree(config.objs);
217  lwfree(config.centers);
218  lwfree(centers_raw);
219  lwfree(centroids);
220  lwfree(seen);
221 
222  /* Good result */
223  if (result == KMEANS_OK)
224  return config.clusters;
225 
226  /* Bad result, not going to need the answer */
227  lwfree(config.clusters);
228  if (result == KMEANS_EXCEEDED_MAX_ITERATIONS)
229  {
230  lwerror("%s did not converge after %d iterations", __func__, config.max_iterations);
231  return NULL;
232  }
233 
234  /* Unknown error */
235  return NULL;
236 }
int * clusters
Definition: kmeans.h:120
void lwfree(void *mem)
Definition: lwutil.c:242
void * Pointer
Definition: kmeans.h:59
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:842
kmeans_result kmeans(kmeans_config *config)
Definition: kmeans.c:249
kmeans_result
Definition: kmeans.h:61
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:93
POINTARRAY * point
Definition: liblwgeom.h:410
static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
Definition: lwkmeans.c:8
double x
Definition: liblwgeom.h:327
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, int n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:485
static void lwkmeans_pt_centroid(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition: lwkmeans.c:45
Pointer * centers
Definition: kmeans.h:107
double y
Definition: liblwgeom.h:327
unsigned int max_iterations
Definition: kmeans.h:114
static int lwkmeans_pt_closest(const Pointer *objs, size_t num_objs, const Pointer a)
Definition: lwkmeans.c:19
size_t num_objs
Definition: kmeans.h:100
unsigned int k
Definition: kmeans.h:110
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:84
Pointer * objs
Definition: kmeans.h:97
void * lwalloc(size_t size)
Definition: lwutil.c:227
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1310
kmeans_distance_method distance_method
Definition: kmeans.h:85
kmeans_centroid_method centroid_method
Definition: kmeans.h:88
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:102

Here is the call graph for this function:

Here is the caller graph for this function: