PostGIS  2.3.8dev-r@@SVN_REVISION@@

◆ lwgeom_cluster_2d_kmeans()

int* lwgeom_cluster_2d_kmeans ( const LWGEOM **  geoms,
int  ngeoms,
int  k 
)

Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster each geometry is in.

Parameters
geomsthe input array of LWGEOM pointers
ngeomsthe number of elements in the array
kthe number of clusters to calculate

Definition at line 77 of file lwkmeans.c.

References kmeans_config::centers, centroid(), kmeans_config::centroid_method, kmeans_config::clusters, kmeans_config::distance_method, getPoint2d_cp(), if(), kmeans_config::k, kmeans(), KMEANS_EXCEEDED_MAX_ITERATIONS, KMEANS_OK, lwalloc(), lwerror(), lwfree(), lwgeom_as_lwpoint(), lwgeom_centroid(), lwgeom_get_type(), lwgeom_is_empty(), lwkmeans_pt_centroid(), lwkmeans_pt_closest(), lwkmeans_pt_distance(), kmeans_config::max_iterations, kmeans_config::num_objs, kmeans_config::objs, LWPOINT::point, POINTTYPE, POINT2D::x, and POINT2D::y.

Referenced by ST_ClusterKMeans(), and test_kmeans().

78 {
79  int i;
80  int num_centroids = 0;
81  LWGEOM **centroids;
82  POINT2D *centers_raw;
83  const POINT2D *cp;
84  POINT2D min = { DBL_MAX, DBL_MAX };
85  POINT2D max = { -DBL_MAX, -DBL_MAX };
86  double dx, dy;
87  kmeans_config config;
88  kmeans_result result;
89  int *seen;
90  int sidx = 0;
91 
92  assert(k>0);
93  assert(ngeoms>0);
94  assert(geoms);
95 
96  if (ngeoms<k)
97  {
98  lwerror("%s: number of geometries is less than the number of clusters requested", __func__);
99  }
100 
101  /* We'll hold the temporary centroid objects here */
102  centroids = lwalloc(sizeof(LWGEOM*) * ngeoms);
103 
104  /* The vector of cluster means. We have to allocate a */
105  /* chunk of memory for these because we'll be mutating them */
106  /* in the kmeans algorithm */
107  centers_raw = lwalloc(sizeof(POINT2D) * k);
108 
109  /* K-means configuration setup */
110  config.objs = lwalloc(sizeof(Pointer) * ngeoms);
111  config.num_objs = ngeoms;
112  config.clusters = lwalloc(sizeof(int) * ngeoms);
113  config.centers = lwalloc(sizeof(Pointer) * k);
114  config.k = k;
115  config.max_iterations = 0;
118 
119  /* Clean the memory */
120  memset(config.objs, 0, sizeof(Pointer) * ngeoms);
121  memset(config.clusters, 0, sizeof(int) * ngeoms);
122  memset(config.centers, 0, sizeof(Pointer) * k);
123 
124  /* Prepare the list of object pointers for K-means */
125  for (i = 0; i < ngeoms; i++)
126  {
127  const LWGEOM *geom = geoms[i];
128  LWPOINT *lwpoint;
129 
130  /* Null/empty geometries get a NULL pointer */
131  if ((!geom) || lwgeom_is_empty(geom))
132  {
133  config.objs[i] = NULL;
134  continue;
135  }
136 
137  /* If the input is a point, use its coordinates */
138  /* If its not a point, convert it to one via centroid */
139  if (lwgeom_get_type(geom) != POINTTYPE)
140  {
142  if ((!centroid) || lwgeom_is_empty(centroid))
143  {
144  config.objs[i] = NULL;
145  continue;
146  }
147  centroids[num_centroids++] = centroid;
148  lwpoint = lwgeom_as_lwpoint(centroid);
149  }
150  else
151  {
152  lwpoint = lwgeom_as_lwpoint(geom);
153  }
154 
155  /* Store a pointer to the POINT2D we are interested in */
156  cp = getPoint2d_cp(lwpoint->point, 0);
157  config.objs[i] = (Pointer)cp;
158 
159  /* Since we're already here, let's calculate the extrema of the set */
160  if (cp->x < min.x) min.x = cp->x;
161  if (cp->y < min.y) min.y = cp->y;
162  if (cp->x > max.x) max.x = cp->x;
163  if (cp->y > max.y) max.y = cp->y;
164  }
165 
166  /*
167  * We map a uniform assignment of points in the area covered by the set
168  * onto actual points in the set
169  */
170  dx = (max.x - min.x)/k;
171  dy = (max.y - min.y)/k;
172  seen = lwalloc(sizeof(int)*config.k);
173  for (i = 0; i < k; i++)
174  {
175  int closest;
176  POINT2D p;
177  int j;
178 
179  /* Calculate a point in the range */
180  p.x = min.x + dx * (i + 0.5);
181  p.y = min.y + dy * (i + 0.5);
182 
183  /* Find the data point closest to the calculated point */
184  closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p);
185 
186  /* If something is terrible wrong w/ data, cannot find a closest */
187  if (closest < 0)
188  lwerror("unable to calculate cluster seed points, too many NULLs or empties?");
189 
190  /* Ensure we aren't already using that point as a seed */
191  j = 0;
192  while(j < sidx)
193  {
194  if (seen[j] == closest)
195  {
196  int k, t;
197  for (k = 1; k < config.num_objs; k++)
198  {
199  t = (closest + k) % config.num_objs;
200  if (config.objs[t])
201  {
202  closest = t;
203  break;
204  }
205  }
206  j = 0;
207  }
208  else
209  {
210  j++;
211  }
212  }
213  seen[sidx++] = closest;
214 
215  /* Copy the point coordinates into the initial centers array */
216  /* This is ugly, but the centers array is an array of */
217  /* pointers to points, not an array of points */
218  centers_raw[i] = *((POINT2D*)config.objs[closest]);
219  config.centers[i] = &(centers_raw[i]);
220  }
221 
222  result = kmeans(&config);
223 
224  /* Before error handling, might as well clean up all the inputs */
225  lwfree(config.objs);
226  lwfree(config.centers);
227  lwfree(centers_raw);
228  lwfree(centroids);
229  lwfree(seen);
230 
231  /* Good result */
232  if (result == KMEANS_OK)
233  return config.clusters;
234 
235  /* Bad result, not going to need the answer */
236  lwfree(config.clusters);
237  if (result == KMEANS_EXCEEDED_MAX_ITERATIONS)
238  {
239  lwerror("%s did not converge after %d iterations", __func__, config.max_iterations);
240  return NULL;
241  }
242 
243  /* Unknown error */
244  return NULL;
245 }
int * clusters
Definition: kmeans.h:120
void lwfree(void *mem)
Definition: lwutil.c:242
void * Pointer
Definition: kmeans.h:59
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:842
kmeans_result kmeans(kmeans_config *config)
Definition: kmeans.c:249
kmeans_result
Definition: kmeans.h:61
Datum centroid(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition: lwgeom.c:93
POINTARRAY * point
Definition: liblwgeom.h:410
static double lwkmeans_pt_distance(const Pointer a, const Pointer b)
Definition: lwkmeans.c:8
double x
Definition: liblwgeom.h:327
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, int n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from...
Definition: lwgeom_api.c:485
static void lwkmeans_pt_centroid(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition: lwkmeans.c:45
Pointer * centers
Definition: kmeans.h:107
double y
Definition: liblwgeom.h:327
unsigned int max_iterations
Definition: kmeans.h:114
static int lwkmeans_pt_closest(const Pointer *objs, size_t num_objs, const Pointer a)
Definition: lwkmeans.c:19
size_t num_objs
Definition: kmeans.h:100
unsigned int k
Definition: kmeans.h:110
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:84
Pointer * objs
Definition: kmeans.h:97
void * lwalloc(size_t size)
Definition: lwutil.c:227
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members) ...
Definition: lwgeom.c:1310
kmeans_distance_method distance_method
Definition: kmeans.h:85
kmeans_centroid_method centroid_method
Definition: kmeans.h:88
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
Definition: lwutil.c:102
if(!(yy_init))
Here is the call graph for this function:
Here is the caller graph for this function: