14 #define KMEANS_NULL_CLUSTER -1
20 #define KMEANS_MAX_ITERATIONS 1000
25 double hside = p2->
x - p1->
x;
26 double vside = p2->
y - p1->
y;
27 double zside = p2->
z - p1->
z;
29 return hside * hside + vside * vside + zside * zside;
37 for (uint32_t i = 0; i < n; i++)
46 for (
int cluster = 1; cluster < (int)k; cluster++)
52 curr_cluster = cluster;
57 if (clusters[i] != curr_cluster)
60 clusters[i] = curr_cluster;
69 memset(centers, 0,
sizeof(
POINT4D) * k);
70 for (uint32_t i = 0; i < n; i++)
72 int cluster = clusters[i];
73 centers[cluster].
x += objs[i].
x * objs[i].
m;
74 centers[cluster].
y += objs[i].
y * objs[i].
m;
75 centers[cluster].
z += objs[i].
z * objs[i].
m;
76 centers[cluster].
m += objs[i].
m;
78 for (uint32_t i = 0; i < k; i++)
82 centers[i].
x /= centers[i].
m;
83 centers[i].
y /= centers[i].
m;
84 centers[i].
z /= centers[i].
m;
97 converged =
update_r(objs, clusters, n, centers, k);
111 uint32_t p1 = 0, p2 = 0;
113 uint32_t duplicate_count = 1;
114 double max_dst = -1, current_distance;
115 double dst_p1, dst_p2;
121 for (i = 1; i < n; i++)
126 if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
139 if ((dst_p1 == 0) || (dst_p2 == 0))
142 if (duplicate_count > 1)
144 "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
149 assert(p1 != p2 && max_dst >= 0);
152 centers[0] = objs[p1];
153 centers[1] = objs[p2];
158 distances =
lwalloc(
sizeof(
double) * n);
161 for (j = 0; j < n; j++)
167 for (i = 2; i < k; i++)
169 uint32_t candidate_center = 0;
170 double max_distance = -DBL_MAX;
173 for (j = 0; j < n; j++)
176 if (distances[j] < 0)
181 if (current_distance < distances[j])
182 distances[j] = current_distance;
185 if (distances[j] > max_distance)
187 candidate_center = j;
188 max_distance = distances[j];
193 assert(max_distance >= 0);
196 distances[candidate_center] = -1;
199 centers[i] = objs[candidate_center];
208 uint32_t num_non_empty = 0;
218 "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
227 uint8_t *geom_valid =
lwalloc(
sizeof(uint8_t) * n);
228 memset(geom_valid, 0,
sizeof(uint8_t) * n);
231 int *clusters =
lwalloc(
sizeof(
int) * n);
232 memset(clusters, 0,
sizeof(
int) * n);
236 memset(centers, 0,
sizeof(
POINT4D) * k);
239 for (uint32_t i = 0; i < n; i++)
241 const LWGEOM *geom = geoms[i];
260 lwerror(
"%s has an input point geometry with weight in M less or equal to 0",
290 objs[num_non_empty++] = out;
293 if (num_non_empty < k)
296 "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
305 int *clusters_dense =
lwalloc(
sizeof(
int) * num_non_empty);
306 memset(clusters_dense, 0,
sizeof(
int) * num_non_empty);
309 converged =
kmeans(objs, clusters_dense, num_non_empty, centers, k);
314 for (uint32_t i = 0; i < n; i++)
316 clusters[i] = clusters_dense[d++];
325 for (uint32_t i = 0; i < n; i++)
332 for (uint32_t i = 0; i < n; i++)
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
double lwpoint_get_m(const LWPOINT *point)
void lwgeom_free(LWGEOM *geom)
double lwpoint_get_x(const LWPOINT *point)
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
void * lwalloc(size_t size)
double lwpoint_get_z(const LWPOINT *point)
#define LW_TRUE
Return types for functions with status returns.
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
double lwpoint_get_y(const LWPOINT *point)
#define LW_ON_INTERRUPT(x)
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
static uint8_t kmeans(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
#define KMEANS_MAX_ITERATIONS
static void kmeans_init(POINT4D *objs, uint32_t n, POINT4D *centers, uint32_t k)
static void update_means(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
int * lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
static uint8_t update_r(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
#define KMEANS_NULL_CLUSTER
static double distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
static double distance(double x1, double y1, double x2, double y2)
Datum centroid(PG_FUNCTION_ARGS)