14 #define KMEANS_NULL_CLUSTER -1
20 #define KMEANS_MAX_ITERATIONS 1000
33 double hside = p2->
x - p1->
x;
34 double vside = p2->
y - p1->
y;
35 double zside = p2->
z - p1->
z;
37 return hside * hside + vside * vside + zside * zside;
54 double max_radius_sq = max_radius * max_radius;
57 uint32_t first_cluster_to_split = 0;
58 for (; first_cluster_to_split < k; first_cluster_to_split++)
59 if (radii[first_cluster_to_split] > max_radius_sq)
61 if (first_cluster_to_split == k)
65 uint32_t *temp_clusters =
lwalloc(
sizeof(uint32_t) * n);
66 double *temp_radii =
lwalloc(
sizeof(
double) * n);
71 for (uint32_t cluster = first_cluster_to_split; cluster < k; cluster++)
73 if (radii[cluster] <= max_radius_sq)
77 uint32_t cluster_size = 0;
78 for (uint32_t i = 0; i < n; i++)
79 if (clusters[i] == cluster)
80 temp_objs[cluster_size++] = objs[i];
81 if (cluster_size <= 1)
85 kmeans(temp_objs, temp_clusters, cluster_size, temp_centers, temp_radii, 2, 0);
89 for (uint32_t i = 0; i < n; i++)
90 if (clusters[i] == cluster)
91 if (temp_clusters[d++])
94 centers[cluster] = temp_centers[0];
95 centers[new_k] = temp_centers[1];
96 radii[cluster] = temp_radii[0];
97 radii[new_k] = temp_radii[1];
113 memset(radii, 0,
sizeof(
double) * k);
115 for (uint32_t i = 0; i < n; i++)
121 uint32_t curr_cluster = 0;
124 for (uint32_t cluster = 1; cluster < k; cluster++)
130 curr_cluster = cluster;
135 if (clusters[i] != curr_cluster)
138 clusters[i] = curr_cluster;
141 if (radii[curr_cluster] < curr_distance)
142 radii[curr_cluster] = curr_distance;
151 memset(centers, 0,
sizeof(
POINT4D) * k);
153 for (uint32_t i = 0; i < n; i++)
155 uint32_t cluster = clusters[i];
156 centers[cluster].
x += objs[i].
x * objs[i].
m;
157 centers[cluster].
y += objs[i].
y * objs[i].
m;
158 centers[cluster].
z += objs[i].
z * objs[i].
m;
159 centers[cluster].
m += objs[i].
m;
162 for (uint32_t i = 0; i < k; i++)
166 centers[i].
x /= centers[i].
m;
167 centers[i].
y /= centers[i].
m;
168 centers[i].
z /= centers[i].
m;
178 uint32_t p1 = 0, p2 = 0;
179 uint32_t duplicate_count = 1;
186 centers[0] = objs[0];
191 for (uint32_t i = 1; i < n; i++)
196 if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
209 if ((dst_p1 == 0) || (dst_p2 == 0))
212 if (duplicate_count > 1)
214 "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
219 assert(p1 != p2 && max_dst >= 0);
222 centers[0] = objs[p1];
223 centers[1] = objs[p2];
228 distances =
lwalloc(
sizeof(
double) * n);
231 for (uint32_t j = 0; j < n; j++)
237 for (uint32_t i = 2; i < k; i++)
239 uint32_t candidate_center = 0;
240 double max_distance = -DBL_MAX;
243 for (uint32_t j = 0; j < n; j++)
246 if (distances[j] < 0)
251 if (current_distance < distances[j])
252 distances[j] = current_distance;
255 if (distances[j] > max_distance)
257 candidate_center = j;
258 max_distance = distances[j];
263 assert(max_distance >= 0);
266 distances[candidate_center] = -1;
269 centers[i] = objs[candidate_center];
285 uint32_t cur_k = min_k;
289 update_r(objs, clusters, n, centers, radii, cur_k);
297 converged =
update_r(objs, clusters, n, centers, radii, cur_k);
302 if (!converged || !max_radius)
306 uint32_t new_k =
improve_structure(objs, clusters, n, centers, radii, cur_k, max_radius);
323 uint32_t num_non_empty = 0;
327 assert(max_radius >= 0);
333 "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
342 uint8_t *geom_valid =
lwalloc(
sizeof(uint8_t) * n);
343 memset(geom_valid, 0,
sizeof(uint8_t) * n);
346 int *clusters =
lwalloc(
sizeof(
int) * n);
347 for (uint32_t i = 0; i < n; i++)
352 memset(centers, 0,
sizeof(
POINT4D) * n);
355 double *radii =
lwalloc(
sizeof(
double) * n);
356 memset(radii, 0,
sizeof(
double) * n);
359 for (uint32_t i = 0; i < n; i++)
361 const LWGEOM *geom = geoms[i];
380 lwerror(
"%s has an input point geometry with weight in M less or equal to 0",
410 objs_dense[num_non_empty++] = out;
413 if (num_non_empty < k)
416 "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
425 if (num_non_empty > 0)
427 uint32_t *clusters_dense =
lwalloc(
sizeof(uint32_t) * num_non_empty);
428 memset(clusters_dense, 0,
sizeof(uint32_t) * num_non_empty);
429 uint32_t output_cluster_count =
kmeans(objs_dense, clusters_dense, num_non_empty, centers, radii, k, max_radius);
432 for (uint32_t i = 0; i < n; i++)
434 clusters[i] = (int)clusters_dense[d++];
436 converged = output_cluster_count > 0;
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
double lwpoint_get_m(const LWPOINT *point)
void lwgeom_free(LWGEOM *geom)
double lwpoint_get_x(const LWPOINT *point)
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
void * lwalloc(size_t size)
double lwpoint_get_z(const LWPOINT *point)
#define LW_TRUE
Return types for functions with status returns.
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
double lwpoint_get_y(const LWPOINT *point)
#define LW_ON_INTERRUPT(x)
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
#define KMEANS_MAX_ITERATIONS
static uint32_t kmeans(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t min_k, double max_radius)
static void kmeans_init(POINT4D *objs, uint32_t n, POINT4D *centers, uint32_t k)
static uint8_t update_r(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t k)
int * lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k, double max_radius)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
static void update_means(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, uint32_t k)
#define KMEANS_NULL_CLUSTER
static double distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
static uint32_t improve_structure(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t k, double max_radius)
static double distance(double x1, double y1, double x2, double y2)
Datum centroid(PG_FUNCTION_ARGS)