PostGIS
2.4.9dev-r@@SVN_REVISION@@
kmeans.h
Go to the documentation of this file.
1
/*-------------------------------------------------------------------------
2
*
3
* kmeans.h
4
* Generic k-means implementation
5
*
6
* Copyright (c) 2016, Paul Ramsey <pramsey@cleverelephant.ca>
7
*
8
*------------------------------------------------------------------------*/
9
10
11
#include <stdlib.h>
12
#include "
liblwgeom_internal.h
"
13
14
/*
15
* Simple k-means implementation for arbitrary data structures
16
*
17
* Since k-means partitions based on inter-object "distance" the same
18
* machinery can be used to support any object type that can calculate a
19
* "distance" between pairs.
20
*
21
* To use the k-means infrastructure, just fill out the kmeans_config
22
* structure and invoke the kmeans() function.
23
*/
24
25
/*
26
* Threaded calculation is available using pthreads, which practically
27
* means UNIX platforms only, unless you're building with a posix
28
* compatible environment.
29
*
30
* #define KMEANS_THREADED
31
*/
32
33
/*
34
* When clustering lists with NULL elements, they will get this as
35
* their cluster number. (All the other clusters will be non-negative)
36
*/
37
#define KMEANS_NULL_CLUSTER -1
38
39
/*
40
* If the algorithm doesn't converge within this number of iterations,
41
* it will return with a failure error code.
42
*/
43
#define KMEANS_MAX_ITERATIONS 1000
44
45
/*
46
* The code doesn't try to figure out how many threads to use, so
47
* best to set this to the number of cores you expect to have
48
* available. The threshold is the the value of k*n at which to
49
* move to multi-threading.
50
*/
51
#ifdef KMEANS_THREADED
52
#define KMEANS_THR_MAX 4
53
#define KMEANS_THR_THRESHOLD 250000
54
#endif
55
56
#define kmeans_malloc(size) lwalloc(size)
57
#define kmeans_free(ptr) lwfree(ptr)
58
59
typedef
void
*
Pointer
;
60
61
typedef
enum
{
62
KMEANS_OK
,
63
KMEANS_EXCEEDED_MAX_ITERATIONS
,
64
KMEANS_ERROR
65
}
kmeans_result
;
66
67
/*
68
* Prototype for the distance calculating function
69
*/
70
typedef
double (*
kmeans_distance_method
) (
const
Pointer
a,
const
Pointer
b);
71
72
/*
73
* Prototype for the centroid calculating function
74
* @param objs the list of all objects in the calculation
75
* @param clusters the list of cluster numbers for each object
76
* @param num_objs the number of objects/cluster numbers in the previous arrays
77
* @param cluster the cluster number we are actually generating a centroid for here
78
* @param centroid the object to write the centroid result into (already allocated)
79
*/
80
typedef
void (*
kmeans_centroid_method
) (
const
Pointer
* objs,
const
int
* clusters,
size_t
num_objs,
int
cluster,
Pointer
centroid
);
81
82
typedef
struct
kmeans_config
83
{
84
/* Function returns the "distance" between any pair of objects */
85
kmeans_distance_method
distance_method
;
86
87
/* Function returns the "centroid" of a collection of objects */
88
kmeans_centroid_method
centroid_method
;
89
90
/* An array of objects to be analyzed. User allocates this array */
91
/* and is responsible for freeing it. */
92
/* For objects that are not capable of participating in the distance */
93
/* calculations, but for which you still want included in the process */
94
/* (for examples, database nulls, or geometry empties) use a NULL */
95
/* value in this list. All NULL values will be returned in the */
96
/* KMEANS_NULL_CLUSTER. */
97
Pointer
*
objs
;
98
99
/* Number of objects in the preceding array */
100
size_t
num_objs
;
101
102
/* An array of inital centers for the algorithm */
103
/* Can be randomly assigned, or using proportions, */
104
/* unfortunately the algorithm is sensitive to starting */
105
/* points, so using a "better" set of starting points */
106
/* might be wise. User allocates and is responsible for freeing. */
107
Pointer
*
centers
;
108
109
/* Number of means we are calculating, length of preceding array */
110
unsigned
int
k
;
111
112
/* Maximum number of times to iterate the algorithm, or 0 for */
113
/* library default */
114
unsigned
int
max_iterations
;
115
116
/* Iteration counter */
117
unsigned
int
total_iterations
;
118
119
/* Array to fill in with cluster numbers. User allocates and frees. */
120
int
*
clusters
;
121
122
}
kmeans_config
;
123
124
/* This is where the magic happens. */
125
kmeans_result
kmeans
(
kmeans_config
*config);
126
kmeans_config::clusters
int * clusters
Definition:
kmeans.h:120
kmeans_distance_method
double(* kmeans_distance_method)(const Pointer a, const Pointer b)
Definition:
kmeans.h:70
Pointer
void * Pointer
Definition:
kmeans.h:59
kmeans_result
kmeans_result
Definition:
kmeans.h:61
kmeans
kmeans_result kmeans(kmeans_config *config)
Definition:
kmeans.c:249
centroid
Datum centroid(PG_FUNCTION_ARGS)
Definition:
postgis/lwgeom_geos.c:1348
liblwgeom_internal.h
kmeans_config
Definition:
kmeans.h:82
kmeans_config::centers
Pointer * centers
Definition:
kmeans.h:107
KMEANS_EXCEEDED_MAX_ITERATIONS
Definition:
kmeans.h:63
kmeans_config::total_iterations
unsigned int total_iterations
Definition:
kmeans.h:117
kmeans_config
struct kmeans_config kmeans_config
kmeans_config::max_iterations
unsigned int max_iterations
Definition:
kmeans.h:114
kmeans_config::num_objs
size_t num_objs
Definition:
kmeans.h:100
KMEANS_ERROR
Definition:
kmeans.h:64
kmeans_config::k
unsigned int k
Definition:
kmeans.h:110
KMEANS_OK
Definition:
kmeans.h:62
kmeans_centroid_method
void(* kmeans_centroid_method)(const Pointer *objs, const int *clusters, size_t num_objs, int cluster, Pointer centroid)
Definition:
kmeans.h:80
kmeans_config::objs
Pointer * objs
Definition:
kmeans.h:97
kmeans_config::distance_method
kmeans_distance_method distance_method
Definition:
kmeans.h:85
kmeans_config::centroid_method
kmeans_centroid_method centroid_method
Definition:
kmeans.h:88
liblwgeom
kmeans.h
Generated by
1.8.13