14 #define KMEANS_NULL_CLUSTER -1
20 #define KMEANS_MAX_ITERATIONS 1000
28 uint32_t cluster, curr_cluster;
30 for (i = 0; i < n; i++)
46 for (cluster = 1; cluster < k; cluster++)
52 curr_cluster = cluster;
57 clusters[i] = (int) curr_cluster;
67 memset(weights, 0,
sizeof(uint32_t) * k);
68 for (i = 0; i < k; i++)
73 for (i = 0; i < n; i++)
75 cluster = clusters[i];
78 centers[cluster]->
x += objs[i]->
x;
79 centers[cluster]->
y += objs[i]->
y;
80 weights[cluster] += 1;
83 for (i = 0; i < k; i++)
87 centers[i]->
x /= weights[i];
88 centers[i]->
y /= weights[i];
99 size_t clusters_sz =
sizeof(int) * n;
102 weights =
lwalloc(
sizeof(
int) * k);
105 clusters_last =
lwalloc(clusters_sz);
112 memcpy(clusters_last, clusters, clusters_sz);
114 update_r(objs, clusters, n, centers, k);
118 converged = memcmp(clusters_last, clusters, clusters_sz) == 0;
124 lwerror(
"%s did not converge after %d iterations", __func__, i);
132 uint32_t p1 = 0, p2 = 0;
134 uint32_t duplicate_count = 1;
135 double max_dst = -1, current_distance;
136 double dst_p1, dst_p2;
142 for (i = 1; i < n; i++)
145 if (!objs[i])
continue;
148 if (!objs[p1] && !objs[p2])
158 if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
171 if ((dst_p1 == 0) || (dst_p2 == 0)) duplicate_count++;
173 if (duplicate_count > 1)
175 "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
180 assert(p1 != p2 && objs[p1] && objs[p2] && max_dst >= 0);
183 centers_raw[0] = *((
POINT2D *)objs[p1]);
184 centers[0] = &(centers_raw[0]);
185 centers_raw[1] = *((
POINT2D *)objs[p2]);
186 centers[1] = &(centers_raw[1]);
191 distances =
lwalloc(
sizeof(
double) * n);
194 for (j = 0; j < n; j++)
205 for (i = 2; i < k; i++)
207 uint32_t candidate_center = 0;
208 double max_distance = -DBL_MAX;
211 for (j = 0; j < n; j++)
214 if (distances[j] < 0)
continue;
218 if (current_distance < distances[j])
219 distances[j] = current_distance;
222 if (distances[j] > max_distance)
224 candidate_center = j;
225 max_distance = distances[j];
230 assert(max_distance >= 0);
233 distances[candidate_center] = -1;
236 centers_raw[i] = *((
POINT2D *)objs[candidate_center]);
237 centers[i] = &(centers_raw[i]);
247 uint32_t num_centroids = 0;
248 uint32_t num_non_empty = 0;
270 lwerror(
"%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
276 memset(centroids, 0,
sizeof(
LWGEOM*) * n);
281 memset(centers_raw, 0,
sizeof(
POINT2D) * k);
285 clusters =
lwalloc(
sizeof(
int) * n);
289 memset(objs, 0,
sizeof(
POINT2D*) * n);
290 memset(clusters, 0,
sizeof(
int) * n);
291 memset(centers, 0,
sizeof(
POINT2D*) * k);
294 for (i = 0; i < n; i++)
296 const LWGEOM* geom = geoms[i];
313 centroids[num_centroids++] =
centroid;
325 if (num_non_empty < k)
327 lwnotice(
"%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
334 result =
kmeans(objs, clusters, n, centers, k);
340 for (i = 0; i < n; i++)
342 if (k == 0 || !objs[i])
357 if (result)
return clusters;
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
void lwgeom_free(LWGEOM *geom)
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
void * lwalloc(size_t size)
#define LW_TRUE
Return types for functions with status returns.
#define LW_ON_INTERRUPT(x)
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
static const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from.
static double distance2d_sqr_pt_pt(const POINT2D *p1, const POINT2D *p2)
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
#define KMEANS_MAX_ITERATIONS
int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
static void update_r(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
static void kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
#define KMEANS_NULL_CLUSTER
static void update_means(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t *weights, uint32_t k)
static int kmeans(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
static double distance(double x1, double y1, double x2, double y2)
Datum centroid(PG_FUNCTION_ARGS)