PostGIS  3.1.6dev-r@@SVN_REVISION@@

◆ kmeans_init()

static void kmeans_init ( POINT4D objs,
uint32_t  n,
POINT4D centers,
uint32_t  k 
)
static

Definition at line 108 of file lwkmeans.c.

109 {
110  double *distances;
111  uint32_t p1 = 0, p2 = 0;
112  uint32_t i, j;
113  uint32_t duplicate_count = 1; /* a point is a duplicate of itself */
114  double max_dst = -1, current_distance;
115  double dst_p1, dst_p2;
116 
117  /* k=0, k=1: "clustering" is just input validation */
118  assert(k > 1);
119 
120  /* k >= 2: find two distant points greedily */
121  for (i = 1; i < n; i++)
122  {
123  /* if we found a larger distance, replace our choice */
124  dst_p1 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p1]);
125  dst_p2 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p2]);
126  if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
127  {
128  if (dst_p1 > dst_p2)
129  {
130  max_dst = dst_p1;
131  p2 = i;
132  }
133  else
134  {
135  max_dst = dst_p2;
136  p1 = i;
137  }
138  }
139  if ((dst_p1 == 0) || (dst_p2 == 0))
140  duplicate_count++;
141  }
142  if (duplicate_count > 1)
143  lwnotice(
144  "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
145  __func__,
146  duplicate_count);
147 
148  /* by now two points should be found and non-same */
149  assert(p1 != p2 && max_dst >= 0);
150 
151  /* accept these two points */
152  centers[0] = objs[p1];
153  centers[1] = objs[p2];
154 
155  if (k > 2)
156  {
157  /* array of minimum distance to a point from accepted cluster centers */
158  distances = lwalloc(sizeof(double) * n);
159 
160  /* initialize array with distance to first object */
161  for (j = 0; j < n; j++)
162  distances[j] = distance3d_sqr_pt4d_pt4d(&objs[j], &centers[0]);
163  distances[p1] = -1;
164  distances[p2] = -1;
165 
166  /* loop i on clusters, skip 0 and 1 as found already */
167  for (i = 2; i < k; i++)
168  {
169  uint32_t candidate_center = 0;
170  double max_distance = -DBL_MAX;
171 
172  /* loop j on objs */
173  for (j = 0; j < n; j++)
174  {
175  /* empty objs and accepted clusters are already marked with distance = -1 */
176  if (distances[j] < 0)
177  continue;
178 
179  /* update minimal distance with previosuly accepted cluster */
180  current_distance = distance3d_sqr_pt4d_pt4d(&objs[j], &centers[i - 1]);
181  if (current_distance < distances[j])
182  distances[j] = current_distance;
183 
184  /* greedily take a point that's farthest from any of accepted clusters */
185  if (distances[j] > max_distance)
186  {
187  candidate_center = j;
188  max_distance = distances[j];
189  }
190  }
191 
192  /* Checked earlier by counting entries on input, just in case */
193  assert(max_distance >= 0);
194 
195  /* accept candidate to centers */
196  distances[candidate_center] = -1;
197  /* Copy the point coordinates into the initial centers array
198  * Centers array is an array of pointers to points, not an array of points */
199  centers[i] = objs[candidate_center];
200  }
201  lwfree(distances);
202  }
203 }
void lwfree(void *mem)
Definition: lwutil.c:242
void * lwalloc(size_t size)
Definition: lwutil.c:227
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
static double distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
Definition: lwkmeans.c:23

References distance3d_sqr_pt4d_pt4d(), lwalloc(), lwfree(), and lwnotice().

Referenced by lwgeom_cluster_kmeans().

Here is the call graph for this function:
Here is the caller graph for this function: