PostGIS  3.4.0dev-r@@SVN_REVISION@@

◆ kmeans_init()

static void kmeans_init ( POINT4D objs,
uint32_t  n,
POINT4D centers,
uint32_t  k 
)
static

Definition at line 175 of file lwkmeans.c.

176 {
177  double *distances;
178  uint32_t p1 = 0, p2 = 0;
179  uint32_t duplicate_count = 1; /* a point is a duplicate of itself */
180  double max_dst = -1;
181 
182  /* k=0, k=1: any point will do */
183  assert(n > 0);
184  if (k < 2)
185  {
186  centers[0] = objs[0];
187  return;
188  }
189 
190  /* k >= 2: find two distant points greedily */
191  for (uint32_t i = 1; i < n; i++)
192  {
193  /* if we found a larger distance, replace our choice */
194  double dst_p1 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p1]);
195  double dst_p2 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p2]);
196  if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
197  {
198  if (dst_p1 > dst_p2)
199  {
200  max_dst = dst_p1;
201  p2 = i;
202  }
203  else
204  {
205  max_dst = dst_p2;
206  p1 = i;
207  }
208  }
209  if ((dst_p1 == 0) || (dst_p2 == 0))
210  duplicate_count++;
211  }
212  if (duplicate_count > 1)
213  lwnotice(
214  "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
215  __func__,
216  duplicate_count);
217 
218  /* by now two points should be found and non-same */
219  assert(p1 != p2 && max_dst >= 0);
220 
221  /* accept these two points */
222  centers[0] = objs[p1];
223  centers[1] = objs[p2];
224 
225  if (k > 2)
226  {
227  /* array of minimum distance to a point from accepted cluster centers */
228  distances = lwalloc(sizeof(double) * n);
229 
230  /* initialize array with distance to first object */
231  for (uint32_t j = 0; j < n; j++)
232  distances[j] = distance3d_sqr_pt4d_pt4d(&objs[j], &centers[0]);
233  distances[p1] = -1;
234  distances[p2] = -1;
235 
236  /* loop i on clusters, skip 0 and 1 as found already */
237  for (uint32_t i = 2; i < k; i++)
238  {
239  uint32_t candidate_center = 0;
240  double max_distance = -DBL_MAX;
241 
242  /* loop j on objs */
243  for (uint32_t j = 0; j < n; j++)
244  {
245  /* empty objs and accepted clusters are already marked with distance = -1 */
246  if (distances[j] < 0)
247  continue;
248 
249  /* update minimal distance with previosuly accepted cluster */
250  double current_distance = distance3d_sqr_pt4d_pt4d(&objs[j], &centers[i - 1]);
251  if (current_distance < distances[j])
252  distances[j] = current_distance;
253 
254  /* greedily take a point that's farthest from any of accepted clusters */
255  if (distances[j] > max_distance)
256  {
257  candidate_center = j;
258  max_distance = distances[j];
259  }
260  }
261 
262  /* Checked earlier by counting entries on input, just in case */
263  assert(max_distance >= 0);
264 
265  /* accept candidate to centers */
266  distances[candidate_center] = -1;
267  /* Copy the point coordinates into the initial centers array
268  * Centers array is an array of pointers to points, not an array of points */
269  centers[i] = objs[candidate_center];
270  }
271  lwfree(distances);
272  }
273 }
void lwfree(void *mem)
Definition: lwutil.c:242
void * lwalloc(size_t size)
Definition: lwutil.c:227
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static double distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
Definition: lwkmeans.c:31

References distance3d_sqr_pt4d_pt4d(), lwalloc(), lwfree(), and lwnotice().

Referenced by kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: