PostGIS  2.4.9dev-r@@SVN_REVISION@@
kmeans.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2 *
3 * kmeans.h
4 * Generic k-means implementation
5 *
6 * Copyright (c) 2016, Paul Ramsey <pramsey@cleverelephant.ca>
7 *
8 *------------------------------------------------------------------------*/
9 
10 
11 #include <stdlib.h>
12 #include "liblwgeom_internal.h"
13 
14 /*
15 * Simple k-means implementation for arbitrary data structures
16 *
17 * Since k-means partitions based on inter-object "distance" the same
18 * machinery can be used to support any object type that can calculate a
19 * "distance" between pairs.
20 *
21 * To use the k-means infrastructure, just fill out the kmeans_config
22 * structure and invoke the kmeans() function.
23 */
24 
25 /*
26 * Threaded calculation is available using pthreads, which practically
27 * means UNIX platforms only, unless you're building with a posix
28 * compatible environment.
29 *
30 * #define KMEANS_THREADED
31 */
32 
33 /*
34 * When clustering lists with NULL elements, they will get this as
35 * their cluster number. (All the other clusters will be non-negative)
36 */
37 #define KMEANS_NULL_CLUSTER -1
38 
39 /*
40 * If the algorithm doesn't converge within this number of iterations,
41 * it will return with a failure error code.
42 */
43 #define KMEANS_MAX_ITERATIONS 1000
44 
45 /*
46 * The code doesn't try to figure out how many threads to use, so
47 * best to set this to the number of cores you expect to have
48 * available. The threshold is the the value of k*n at which to
49 * move to multi-threading.
50 */
51 #ifdef KMEANS_THREADED
52 #define KMEANS_THR_MAX 4
53 #define KMEANS_THR_THRESHOLD 250000
54 #endif
55 
56 #define kmeans_malloc(size) lwalloc(size)
57 #define kmeans_free(ptr) lwfree(ptr)
58 
59 typedef void * Pointer;
60 
61 typedef enum {
66 
67 /*
68 * Prototype for the distance calculating function
69 */
70 typedef double (*kmeans_distance_method) (const Pointer a, const Pointer b);
71 
72 /*
73 * Prototype for the centroid calculating function
74 * @param objs the list of all objects in the calculation
75 * @param clusters the list of cluster numbers for each object
76 * @param num_objs the number of objects/cluster numbers in the previous arrays
77 * @param cluster the cluster number we are actually generating a centroid for here
78 * @param centroid the object to write the centroid result into (already allocated)
79 */
80 typedef void (*kmeans_centroid_method) (const Pointer * objs, const int * clusters, size_t num_objs, int cluster, Pointer centroid);
81 
82 typedef struct kmeans_config
83 {
84  /* Function returns the "distance" between any pair of objects */
86 
87  /* Function returns the "centroid" of a collection of objects */
89 
90  /* An array of objects to be analyzed. User allocates this array */
91  /* and is responsible for freeing it. */
92  /* For objects that are not capable of participating in the distance */
93  /* calculations, but for which you still want included in the process */
94  /* (for examples, database nulls, or geometry empties) use a NULL */
95  /* value in this list. All NULL values will be returned in the */
96  /* KMEANS_NULL_CLUSTER. */
98 
99  /* Number of objects in the preceding array */
100  size_t num_objs;
101 
102  /* An array of inital centers for the algorithm */
103  /* Can be randomly assigned, or using proportions, */
104  /* unfortunately the algorithm is sensitive to starting */
105  /* points, so using a "better" set of starting points */
106  /* might be wise. User allocates and is responsible for freeing. */
108 
109  /* Number of means we are calculating, length of preceding array */
110  unsigned int k;
111 
112  /* Maximum number of times to iterate the algorithm, or 0 for */
113  /* library default */
114  unsigned int max_iterations;
115 
116  /* Iteration counter */
117  unsigned int total_iterations;
118 
119  /* Array to fill in with cluster numbers. User allocates and frees. */
120  int * clusters;
121 
122 } kmeans_config;
123 
124 /* This is where the magic happens. */
126 
int * clusters
Definition: kmeans.h:120
double(* kmeans_distance_method)(const Pointer a, const Pointer b)
Definition: kmeans.h:70
void * Pointer
Definition: kmeans.h:59
kmeans_result
Definition: kmeans.h:61
kmeans_result kmeans(kmeans_config *config)
Definition: kmeans.c:249
Datum centroid(PG_FUNCTION_ARGS)
Pointer * centers
Definition: kmeans.h:107
unsigned int total_iterations
Definition: kmeans.h:117
struct kmeans_config kmeans_config
unsigned int max_iterations
Definition: kmeans.h:114
size_t num_objs
Definition: kmeans.h:100
unsigned int k
Definition: kmeans.h:110
void(* kmeans_centroid_method)(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition: kmeans.h:80
Pointer * objs
Definition: kmeans.h:97
kmeans_distance_method distance_method
Definition: kmeans.h:85
kmeans_centroid_method centroid_method
Definition: kmeans.h:88