14 #define KMEANS_NULL_CLUSTER -1 
   20 #define KMEANS_MAX_ITERATIONS 1000 
   33         double hside = p2->
x - p1->
x;
 
   34         double vside = p2->
y - p1->
y;
 
   35         double zside = p2->
z - p1->
z;
 
   37         return hside * hside + vside * vside + zside * zside;
 
   54         double max_radius_sq = max_radius * max_radius;
 
   57         uint32_t first_cluster_to_split = 0;
 
   58         for (; first_cluster_to_split < k; first_cluster_to_split++)
 
   59                 if (radii[first_cluster_to_split] > max_radius_sq)
 
   61         if (first_cluster_to_split == k)
 
   65         uint32_t *temp_clusters = 
lwalloc(
sizeof(uint32_t) * n);
 
   66         double *temp_radii = 
lwalloc(
sizeof(
double) * n);
 
   71         for (uint32_t cluster = first_cluster_to_split; cluster < k; cluster++)
 
   73                 if (radii[cluster] <= max_radius_sq)
 
   77                 uint32_t cluster_size = 0;
 
   78                 for (uint32_t i = 0; i < n; i++)
 
   79                         if (clusters[i] == cluster)
 
   80                                 temp_objs[cluster_size++] = objs[i];
 
   81                 if (cluster_size <= 1)
 
   85                 kmeans(temp_objs, temp_clusters, cluster_size, temp_centers, temp_radii, 2, 0);
 
   89                 for (uint32_t i = 0; i < n; i++)
 
   90                         if (clusters[i] == cluster)
 
   91                                 if (temp_clusters[d++])
 
   94                 centers[cluster] = temp_centers[0];
 
   95                 centers[new_k] = temp_centers[1];
 
   96                 radii[cluster] = temp_radii[0];
 
   97                 radii[new_k] = temp_radii[1];
 
  113                 memset(radii, 0, 
sizeof(
double) * k);
 
  115         for (uint32_t i = 0; i < n; i++)
 
  121                 uint32_t curr_cluster = 0;
 
  124                 for (uint32_t cluster = 1; cluster < k; cluster++)
 
  130                                 curr_cluster = cluster;
 
  135                 if (clusters[i] != curr_cluster)
 
  138                         clusters[i] = curr_cluster;
 
  141                         if (radii[curr_cluster] < curr_distance)
 
  142                                 radii[curr_cluster] = curr_distance;
 
  151         memset(centers, 0, 
sizeof(
POINT4D) * k);
 
  153         for (uint32_t i = 0; i < n; i++)
 
  155                 uint32_t cluster = clusters[i];
 
  156                 centers[cluster].
x += objs[i].
x * objs[i].
m;
 
  157                 centers[cluster].
y += objs[i].
y * objs[i].
m;
 
  158                 centers[cluster].
z += objs[i].
z * objs[i].
m;
 
  159                 centers[cluster].
m += objs[i].
m;
 
  162         for (uint32_t i = 0; i < k; i++)
 
  166                         centers[i].
x /= centers[i].
m;
 
  167                         centers[i].
y /= centers[i].
m;
 
  168                         centers[i].
z /= centers[i].
m;
 
  178         uint32_t p1 = 0, p2 = 0;
 
  179         uint32_t duplicate_count = 1; 
 
  186                 centers[0] = objs[0];
 
  191         for (uint32_t i = 1; i < n; i++)
 
  196                 if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
 
  209                 if ((dst_p1 == 0) || (dst_p2 == 0))
 
  212         if (duplicate_count > 1)
 
  214                     "%s: there are at least %u duplicate inputs, number of output clusters may be less than you requested",
 
  219         assert(p1 != p2 && max_dst >= 0);
 
  222         centers[0] = objs[p1];
 
  223         centers[1] = objs[p2];
 
  228                 distances = 
lwalloc(
sizeof(
double) * n);
 
  231                 for (uint32_t j = 0; j < n; j++)
 
  237                 for (uint32_t i = 2; i < k; i++)
 
  239                         uint32_t candidate_center = 0;
 
  240                         double max_distance = -DBL_MAX;
 
  243                         for (uint32_t j = 0; j < n; j++)
 
  246                                 if (distances[j] < 0)
 
  251                                 if (current_distance < distances[j])
 
  252                                         distances[j] = current_distance;
 
  255                                 if (distances[j] > max_distance)
 
  257                                         candidate_center = j;
 
  258                                         max_distance = distances[j];
 
  263                         assert(max_distance >= 0);
 
  266                         distances[candidate_center] = -1;
 
  269                         centers[i] = objs[candidate_center];
 
  285         uint32_t cur_k = min_k;
 
  289         update_r(objs, clusters, n, centers, radii, cur_k);
 
  297                         converged = 
update_r(objs, clusters, n, centers, radii, cur_k);
 
  302                 if (!converged || !max_radius)
 
  306                 uint32_t new_k = 
improve_structure(objs, clusters, n, centers, radii, cur_k, max_radius);
 
  323         uint32_t num_non_empty = 0;
 
  327         assert(max_radius >= 0);
 
  333                     "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
 
  342         uint8_t *geom_valid = 
lwalloc(
sizeof(uint8_t) * n);
 
  343         memset(geom_valid, 0, 
sizeof(uint8_t) * n);
 
  346         int *clusters = 
lwalloc(
sizeof(
int) * n);
 
  347         for (uint32_t i = 0; i < n; i++)
 
  352         memset(centers, 0, 
sizeof(
POINT4D) * n);
 
  355         double *radii = 
lwalloc(
sizeof(
double) * n);
 
  356         memset(radii, 0, 
sizeof(
double) * n);
 
  359         for (uint32_t i = 0; i < n; i++)
 
  361                 const LWGEOM *geom = geoms[i];
 
  380                                         lwerror(
"%s has an input point geometry with weight in M less or equal to 0",
 
  410                 objs_dense[num_non_empty++] = out;
 
  413         if (num_non_empty < k)
 
  416                     "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
 
  425         if (num_non_empty > 0)
 
  427                 uint32_t *clusters_dense = 
lwalloc(
sizeof(uint32_t) * num_non_empty);
 
  428                 memset(clusters_dense, 0, 
sizeof(uint32_t) * num_non_empty);
 
  429                 uint32_t output_cluster_count = 
kmeans(objs_dense, clusters_dense, num_non_empty, centers, radii, k, max_radius);
 
  432                 for (uint32_t i = 0; i < n; i++)
 
  434                                 clusters[i] = (int)clusters_dense[d++];
 
  436                 converged = output_cluster_count > 0;
 
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
LWGEOM * lwgeom_centroid(const LWGEOM *geom)
double lwpoint_get_m(const LWPOINT *point)
void lwgeom_free(LWGEOM *geom)
double lwpoint_get_x(const LWPOINT *point)
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
void * lwalloc(size_t size)
double lwpoint_get_z(const LWPOINT *point)
#define LW_TRUE
Return types for functions with status returns.
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
double lwpoint_get_y(const LWPOINT *point)
#define LW_ON_INTERRUPT(x)
void lwerror(const char *fmt,...)
Write a notice out to the error handler.
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
#define KMEANS_MAX_ITERATIONS
static uint32_t kmeans(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t min_k, double max_radius)
static void kmeans_init(POINT4D *objs, uint32_t n, POINT4D *centers, uint32_t k)
static uint8_t update_r(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t k)
int * lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k, double max_radius)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
static void update_means(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, uint32_t k)
#define KMEANS_NULL_CLUSTER
static double distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
static uint32_t improve_structure(POINT4D *objs, uint32_t *clusters, uint32_t n, POINT4D *centers, double *radii, uint32_t k, double max_radius)
static double distance(double x1, double y1, double x2, double y2)
Datum centroid(PG_FUNCTION_ARGS)