PostGIS  2.5.7dev-r@@SVN_REVISION@@

◆ kmeans_init()

static void kmeans_init ( POINT2D **  objs,
int *  clusters,
uint32_t  n,
POINT2D **  centers,
POINT2D centers_raw,
uint32_t  k 
)
static

Definition at line 132 of file lwkmeans.c.

133 {
134  double* distances;
135  uint32_t p1 = 0, p2 = 0;
136  uint32_t i, j;
137  uint32_t duplicate_count = 1; /* a point is a duplicate of itself */
138  double max_dst = -1;
139  double dst_p1, dst_p2;
140 
141  /* k=0, k=1: "clustering" is just input validation */
142  assert(k > 1);
143 
144  /* k >= 2: find two distant points greedily */
145  for (i = 1; i < n; i++)
146  {
147  /* skip null */
148  if (!objs[i]) continue;
149 
150  /* reinit if first element happened to be null */
151  if (!objs[p1] && !objs[p2])
152  {
153  p1 = i;
154  p2 = i;
155  continue;
156  }
157 
158  /* if we found a larger distance, replace our choice */
159  dst_p1 = distance2d_sqr_pt_pt(objs[i], objs[p1]);
160  dst_p2 = distance2d_sqr_pt_pt(objs[i], objs[p2]);
161  if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
162  {
163  max_dst = fmax(dst_p1, dst_p2);
164  if (dst_p1 > dst_p2)
165  p2 = i;
166  else
167  p1 = i;
168  }
169  if ((dst_p1 == 0) || (dst_p2 == 0)) duplicate_count++;
170  }
171  if (duplicate_count > 1)
172  lwnotice(
173  "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
174  __func__,
175  duplicate_count);
176 
177  /* by now two points should be found and non-same */
178  assert(p1 != p2 && objs[p1] && objs[p2] && max_dst >= 0);
179 
180  /* accept these two points */
181  centers_raw[0] = *((POINT2D *)objs[p1]);
182  centers[0] = &(centers_raw[0]);
183  centers_raw[1] = *((POINT2D *)objs[p2]);
184  centers[1] = &(centers_raw[1]);
185 
186  if (k > 2)
187  {
188  /* array of minimum distance to a point from accepted cluster centers */
189  distances = lwalloc(sizeof(double) * n);
190 
191  /* initialize array with distance to first object */
192  for (j = 0; j < n; j++)
193  {
194  if (objs[j])
195  distances[j] = distance2d_sqr_pt_pt(objs[j], centers[0]);
196  else
197  distances[j] = -1;
198  }
199  distances[p1] = -1;
200  distances[p2] = -1;
201 
202  /* loop i on clusters, skip 0 and 1 as found already */
203  for (i = 2; i < k; i++)
204  {
205  uint32_t candidate_center = 0;
206  double max_distance = -DBL_MAX;
207 
208  /* loop j on objs */
209  for (j = 0; j < n; j++)
210  {
211  /* empty objs and accepted clusters are already marked with distance = -1 */
212  if (distances[j] < 0) continue;
213 
214  /* update minimal distance with previosuly accepted cluster */
215  distances[j] = fmin(distance2d_sqr_pt_pt(objs[j], centers[i - 1]), distances[j]);
216 
217  /* greedily take a point that's farthest from any of accepted clusters */
218  if (distances[j] > max_distance)
219  {
220  candidate_center = j;
221  max_distance = distances[j];
222  }
223  }
224 
225  /* Checked earlier by counting entries on input, just in case */
226  assert(max_distance >= 0);
227 
228  /* accept candidate to centers */
229  distances[candidate_center] = -1;
230  /* Copy the point coordinates into the initial centers array
231  * Centers array is an array of pointers to points, not an array of points */
232  centers_raw[i] = *((POINT2D *)objs[candidate_center]);
233  centers[i] = &(centers_raw[i]);
234  }
235  lwfree(distances);
236  }
237 }
double distance2d_sqr_pt_pt(const POINT2D *p1, const POINT2D *p2)
Definition: measures.c:2323
void lwfree(void *mem)
Definition: lwutil.c:244
void * lwalloc(size_t size)
Definition: lwutil.c:229
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
unsigned int uint32_t
Definition: uthash.h:78

References distance2d_sqr_pt_pt(), lwalloc(), lwfree(), and lwnotice().

Referenced by lwgeom_cluster_2d_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: