17 #define KMEANS_NULL_CLUSTER -1
23 #define KMEANS_MAX_ITERATIONS 1000
33 for (i = 0; i < n; i++)
49 for (cluster = 1; cluster < k; cluster++)
55 curr_cluster = cluster;
60 clusters[i] = (int) curr_cluster;
70 memset(weights, 0,
sizeof(
uint32_t) * k);
71 for (i = 0; i < k; i++)
76 for (i = 0; i < n; i++)
78 cluster = clusters[i];
81 centers[cluster]->
x += objs[i]->
x;
82 centers[cluster]->
y += objs[i]->
y;
83 weights[cluster] += 1;
86 for (i = 0; i < k; i++)
90 centers[i]->
x /= weights[i];
91 centers[i]->
y /= weights[i];
102 size_t clusters_sz =
sizeof(int) * n;
105 weights =
lwalloc(
sizeof(
int) * k);
108 clusters_last =
lwalloc(clusters_sz);
115 memcpy(clusters_last, clusters, clusters_sz);
117 update_r(objs, clusters, n, centers, k);
121 converged = memcmp(clusters_last, clusters, clusters_sz) == 0;
127 lwerror(
"%s did not converge after %d iterations", __func__, i);
139 double dst_p1, dst_p2;
145 for (i = 1; i < n; i++)
148 if (!objs[i])
continue;
151 if (!objs[p1] && !objs[p2])
161 if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
163 max_dst = fmax(dst_p1, dst_p2);
169 if ((dst_p1 == 0) || (dst_p2 == 0)) duplicate_count++;
171 if (duplicate_count > 1)
173 "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
178 assert(p1 != p2 && objs[p1] && objs[p2] && max_dst >= 0);
181 centers_raw[0] = *((
POINT2D *)objs[p1]);
182 centers[0] = &(centers_raw[0]);
183 centers_raw[1] = *((
POINT2D *)objs[p2]);
184 centers[1] = &(centers_raw[1]);
189 distances =
lwalloc(
sizeof(
double) * n);
192 for (j = 0; j < n; j++)
203 for (i = 2; i < k; i++)
206 double max_distance = -DBL_MAX;
209 for (j = 0; j < n; j++)
212 if (distances[j] < 0)
continue;
218 if (distances[j] > max_distance)
220 candidate_center = j;
221 max_distance = distances[j];
226 assert(max_distance >= 0);
229 distances[candidate_center] = -1;
232 centers_raw[i] = *((
POINT2D *)objs[candidate_center]);
233 centers[i] = &(centers_raw[i]);
266 lwerror(
"%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
272 memset(centroids, 0,
sizeof(
LWGEOM*) * n);
277 memset(centers_raw, 0,
sizeof(
POINT2D) * k);
281 clusters =
lwalloc(
sizeof(
int) * n);
285 memset(objs, 0,
sizeof(
POINT2D*) * n);
286 memset(clusters, 0,
sizeof(
int) * n);
287 memset(centers, 0,
sizeof(
POINT2D*) * k);
290 for (i = 0; i < n; i++)
292 const LWGEOM* geom = geoms[i];
309 centroids[num_centroids++] =
centroid;
321 if (num_non_empty < k)
323 lwnotice(
"%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
329 kmeans_init(objs, clusters, n, centers, centers_raw, k);
330 result =
kmeans(objs, clusters, n, centers, k);
336 for (i = 0; i < n; i++)
338 if (k == 0 || !objs[i])
353 if (result)
return clusters;
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
void lwgeom_free(LWGEOM *geom)
double distance2d_sqr_pt_pt(const POINT2D *p1, const POINT2D *p2)
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
void * lwalloc(size_t size)
#define LW_TRUE
Return types for functions with status returns.
const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from.
#define LW_ON_INTERRUPT(x)
Datum distance(PG_FUNCTION_ARGS)
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
#define KMEANS_MAX_ITERATIONS
int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
static void kmeans_init(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
static void update_r(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
#define KMEANS_NULL_CLUSTER
static void update_means(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t *weights, uint32_t k)
static int kmeans(POINT2D **objs, int *clusters, uint32_t n, POINT2D **centers, uint32_t k)
Datum centroid(PG_FUNCTION_ARGS)