PostGIS  3.0.6dev-r@@SVN_REVISION@@

◆ kmeans_init()

static void kmeans_init ( POINT2D **  objs,
uint32_t  n,
POINT2D **  centers,
POINT2D centers_raw,
uint32_t  k 
)
static

Definition at line 129 of file lwkmeans.c.

130 {
131  double* distances;
132  uint32_t p1 = 0, p2 = 0;
133  uint32_t i, j;
134  uint32_t duplicate_count = 1; /* a point is a duplicate of itself */
135  double max_dst = -1, current_distance;
136  double dst_p1, dst_p2;
137 
138  /* k=0, k=1: "clustering" is just input validation */
139  assert(k > 1);
140 
141  /* k >= 2: find two distant points greedily */
142  for (i = 1; i < n; i++)
143  {
144  /* skip null */
145  if (!objs[i]) continue;
146 
147  /* reinit if first element happened to be null */
148  if (!objs[p1] && !objs[p2])
149  {
150  p1 = i;
151  p2 = i;
152  continue;
153  }
154 
155  /* if we found a larger distance, replace our choice */
156  dst_p1 = distance2d_sqr_pt_pt(objs[i], objs[p1]);
157  dst_p2 = distance2d_sqr_pt_pt(objs[i], objs[p2]);
158  if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
159  {
160  if (dst_p1 > dst_p2)
161  {
162  max_dst = dst_p1;
163  p2 = i;
164  }
165  else
166  {
167  max_dst = dst_p2;
168  p1 = i;
169  }
170  }
171  if ((dst_p1 == 0) || (dst_p2 == 0)) duplicate_count++;
172  }
173  if (duplicate_count > 1)
174  lwnotice(
175  "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
176  __func__,
177  duplicate_count);
178 
179  /* by now two points should be found and non-same */
180  assert(p1 != p2 && objs[p1] && objs[p2] && max_dst >= 0);
181 
182  /* accept these two points */
183  centers_raw[0] = *((POINT2D *)objs[p1]);
184  centers[0] = &(centers_raw[0]);
185  centers_raw[1] = *((POINT2D *)objs[p2]);
186  centers[1] = &(centers_raw[1]);
187 
188  if (k > 2)
189  {
190  /* array of minimum distance to a point from accepted cluster centers */
191  distances = lwalloc(sizeof(double) * n);
192 
193  /* initialize array with distance to first object */
194  for (j = 0; j < n; j++)
195  {
196  if (objs[j])
197  distances[j] = distance2d_sqr_pt_pt(objs[j], centers[0]);
198  else
199  distances[j] = -1;
200  }
201  distances[p1] = -1;
202  distances[p2] = -1;
203 
204  /* loop i on clusters, skip 0 and 1 as found already */
205  for (i = 2; i < k; i++)
206  {
207  uint32_t candidate_center = 0;
208  double max_distance = -DBL_MAX;
209 
210  /* loop j on objs */
211  for (j = 0; j < n; j++)
212  {
213  /* empty objs and accepted clusters are already marked with distance = -1 */
214  if (distances[j] < 0) continue;
215 
216  /* update minimal distance with previosuly accepted cluster */
217  current_distance = distance2d_sqr_pt_pt(objs[j], centers[i - 1]);
218  if (current_distance < distances[j])
219  distances[j] = current_distance;
220 
221  /* greedily take a point that's farthest from any of accepted clusters */
222  if (distances[j] > max_distance)
223  {
224  candidate_center = j;
225  max_distance = distances[j];
226  }
227  }
228 
229  /* Checked earlier by counting entries on input, just in case */
230  assert(max_distance >= 0);
231 
232  /* accept candidate to centers */
233  distances[candidate_center] = -1;
234  /* Copy the point coordinates into the initial centers array
235  * Centers array is an array of pointers to points, not an array of points */
236  centers_raw[i] = *((POINT2D *)objs[candidate_center]);
237  centers[i] = &(centers_raw[i]);
238  }
239  lwfree(distances);
240  }
241 }
void lwfree(void *mem)
Definition: lwutil.c:242
void * lwalloc(size_t size)
Definition: lwutil.c:227
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static double distance2d_sqr_pt_pt(const POINT2D *p1, const POINT2D *p2)
Definition: lwinline.h:35

References distance2d_sqr_pt_pt(), lwalloc(), lwfree(), and lwnotice().

Referenced by lwgeom_cluster_2d_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: