PostGIS  2.4.9dev-r@@SVN_REVISION@@
lwgeom_window.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2016 Paul Ramsey <pramsey@cleverelephant.ca>
22  * Copyright 2016 Daniel Baston <dbaston@gmail.com>
23  *
24  **********************************************************************/
25 
26 #include "../postgis_config.h"
27 
28 /* PostgreSQL */
29 #include "postgres.h"
30 #include "funcapi.h"
31 #include "windowapi.h"
32 
33 /* PostGIS */
34 #include "liblwgeom.h"
35 #include "lwunionfind.h" /* TODO move into liblwgeom.h ? */
36 #include "lwgeom_geos.h"
37 #include "lwgeom_log.h"
38 #include "lwgeom_pg.h"
39 
40 extern Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS);
41 extern Datum ST_ClusterKMeans(PG_FUNCTION_ARGS);
42 
43 typedef struct {
44  bool isdone;
45  bool isnull;
46  int result[1];
47  /* variable length */
49 
50 typedef struct
51 {
53  bool is_null; /* NULL may result from a NULL geometry input, or it may be used by
54  algorithms such as DBSCAN that do not assign all inputs to a
55  cluster. */
57 
58 typedef struct
59 {
60  char is_error;
61  dbscan_cluster_result cluster_assignments[1];
63 
64 static LWGEOM*
65 read_lwgeom_from_partition(WindowObject win_obj, uint32_t i, bool* is_null)
66 {
67  GSERIALIZED* g;
68  Datum arg = WinGetFuncArgInPartition(win_obj, 0, i, WINDOW_SEEK_HEAD, false, is_null, NULL);
69 
70  if (*is_null) {
71  /* So that the indexes in our clustering input array can match our partition positions,
72  * toss an empty point into the clustering inputs, as a pass-through.
73  * NOTE: this will cause gaps in the output cluster id sequence.
74  * */
76  }
77 
78  g = (GSERIALIZED*) PG_DETOAST_DATUM_COPY(arg);
79  return lwgeom_from_gserialized(g);
80 }
81 
83 Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
84 {
85  WindowObject win_obj = PG_WINDOW_OBJECT();
86  uint32_t row = WinGetCurrentPosition(win_obj);
87  uint32_t ngeoms = WinGetPartitionRowCount(win_obj);
88  dbscan_context* context = WinGetPartitionLocalMemory(win_obj, sizeof(dbscan_context) + ngeoms * sizeof(dbscan_cluster_result));
89 
90  if (row == 0) /* beginning of the partition; do all of the work now */
91  {
92  uint32_t i;
93  uint32_t* result_ids;
94  LWGEOM** geoms;
95  char* is_in_cluster = NULL;
96  UNIONFIND* uf;
97  bool tolerance_is_null;
98  bool minpoints_is_null;
99  Datum tolerance_datum = WinGetFuncArgCurrent(win_obj, 1, &tolerance_is_null);
100  Datum minpoints_datum = WinGetFuncArgCurrent(win_obj, 2, &minpoints_is_null);
101  double tolerance = DatumGetFloat8(tolerance_datum);
102  int minpoints = DatumGetInt32(minpoints_datum);
103 
104  context->is_error = LW_TRUE; /* until proven otherwise */
105 
106  /* Validate input parameters */
107  if (tolerance_is_null || tolerance < 0)
108  {
109  lwpgerror("Tolerance must be a positive number", tolerance);
110  PG_RETURN_NULL();
111  }
112  if (minpoints_is_null || minpoints < 0)
113  {
114  lwpgerror("Minpoints must be a positive number", minpoints);
115  }
116 
117  initGEOS(lwnotice, lwgeom_geos_error);
118  geoms = lwalloc(ngeoms * sizeof(LWGEOM*));
119  uf = UF_create(ngeoms);
120  for (i = 0; i < ngeoms; i++)
121  {
122  geoms[i] = read_lwgeom_from_partition(win_obj, i, &(context->cluster_assignments[i].is_null));
123 
124  if (!geoms[i]) {
125  /* TODO release memory ? */
126  lwpgerror("Error reading geometry.");
127  PG_RETURN_NULL();
128  }
129  }
130 
131  if (union_dbscan(geoms, ngeoms, uf, tolerance, minpoints, minpoints > 1 ? &is_in_cluster : NULL) == LW_SUCCESS)
132  context->is_error = LW_FALSE;
133 
134  for (i = 0; i < ngeoms; i++)
135  {
136  lwgeom_free(geoms[i]);
137  }
138  lwfree(geoms);
139 
140  if (context->is_error)
141  {
142  UF_destroy(uf);
143  if (is_in_cluster)
144  lwfree(is_in_cluster);
145  lwpgerror("Error during clustering");
146  PG_RETURN_NULL();
147  }
148 
149  result_ids = UF_get_collapsed_cluster_ids(uf, is_in_cluster);
150  for (i = 0; i < ngeoms; i++)
151  {
152  if (minpoints > 1 && !is_in_cluster[i])
153  {
154  context->cluster_assignments[i].is_null = LW_TRUE;
155  }
156  else
157  {
158  context->cluster_assignments[i].cluster_id = result_ids[i];
159  }
160  }
161 
162  lwfree(result_ids);
163  UF_destroy(uf);
164  }
165 
166  if (context->cluster_assignments[row].is_null)
167  PG_RETURN_NULL();
168 
169  PG_RETURN_INT32(context->cluster_assignments[row].cluster_id);
170 }
171 
173 Datum ST_ClusterKMeans(PG_FUNCTION_ARGS)
174 {
175  WindowObject winobj = PG_WINDOW_OBJECT();
176  kmeans_context *context;
177  int64 curpos, rowcount;
178 
179  rowcount = WinGetPartitionRowCount(winobj);
180  context = (kmeans_context *)
181  WinGetPartitionLocalMemory(winobj,
182  sizeof(kmeans_context) + sizeof(int) * rowcount);
183 
184  if (!context->isdone)
185  {
186  int i, k, N;
187  bool isnull, isout;
188  LWGEOM **geoms;
189  int *r;
190 
191  /* What is K? If it's NULL or invalid, we can't procede */
192  k = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull));
193  if (isnull || k <= 0)
194  {
195  context->isdone = true;
196  context->isnull = true;
197  PG_RETURN_NULL();
198  }
199 
200  /* We also need a non-zero N */
201  N = (int) WinGetPartitionRowCount(winobj);
202  if (N <= 0)
203  {
204  context->isdone = true;
205  context->isnull = true;
206  PG_RETURN_NULL();
207  }
208 
209  /* Error out if N < K */
210  if (N<k)
211  {
212  lwpgerror("K (%d) must be smaller than the number of rows in the group (%d)", k, N);
213  }
214 
215  /* Read all the geometries from the partition window into a list */
216  geoms = palloc(sizeof(LWGEOM*) * N);
217  for (i = 0; i < N; i++)
218  {
219  GSERIALIZED *g;
220  Datum arg = WinGetFuncArgInPartition(winobj, 0, i,
221  WINDOW_SEEK_HEAD, false, &isnull, &isout);
222 
223  /* Null geometries are entered as NULL pointers */
224  if (isnull)
225  {
226  geoms[i] = NULL;
227  continue;
228  }
229 
230  g = (GSERIALIZED*)PG_DETOAST_DATUM_COPY(arg);
231  geoms[i] = lwgeom_from_gserialized(g);
232  }
233 
234  /* Calculate k-means on the list! */
235  r = lwgeom_cluster_2d_kmeans((const LWGEOM **)geoms, N, k);
236 
237  /* Clean up */
238  for (i = 0; i < N; i++)
239  if (geoms[i])
240  lwgeom_free(geoms[i]);
241 
242  pfree(geoms);
243 
244  if (!r)
245  {
246  context->isdone = true;
247  context->isnull = true;
248  PG_RETURN_NULL();
249  }
250 
251  /* Safe the result */
252  memcpy(context->result, r, sizeof(int) * N);
253  pfree(r);
254  context->isdone = true;
255  }
256 
257  if (context->isnull)
258  PG_RETURN_NULL();
259 
260  curpos = WinGetCurrentPosition(winobj);
261  PG_RETURN_INT32(context->result[curpos]);
262 }
dbscan_cluster_result cluster_assignments[1]
Definition: lwgeom_window.c:61
void UF_destroy(UNIONFIND *uf)
Definition: lwunionfind.c:53
char * r
Definition: cu_in_wkt.c:24
void lwnotice(const char *fmt,...)
Write a notice out to the notice handler.
Definition: lwutil.c:177
void lwfree(void *mem)
Definition: lwutil.c:244
LWGEOM * lwgeom_from_gserialized(const GSERIALIZED *g)
Allocate a new LWGEOM from a GSERIALIZED.
static LWGEOM * read_lwgeom_from_partition(WindowObject win_obj, uint32_t i, bool *is_null)
Definition: lwgeom_window.c:65
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1099
#define LW_SUCCESS
Definition: liblwgeom.h:80
LWPOINT * lwpoint_construct_empty(int srid, char hasz, char hasm)
Definition: lwpoint.c:151
int union_dbscan(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, uint32_t min_points, char **is_in_cluster_ret)
unsigned int uint32_t
Definition: uthash.h:78
PG_FUNCTION_INFO_V1(ST_ClusterDBSCAN)
void lwgeom_geos_error(const char *fmt,...)
#define LW_FALSE
Definition: liblwgeom.h:77
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:76
int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k)
Take a list of LWGEOMs and a number of clusters and return an integer array indicating which cluster ...
Definition: lwkmeans.c:77
UNIONFIND * UF_create(uint32_t N)
Definition: lwunionfind.c:34
Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
Definition: lwgeom_window.c:83
LWGEOM * lwpoint_as_lwgeom(const LWPOINT *obj)
Definition: lwgeom.c:303
void * lwalloc(size_t size)
Definition: lwutil.c:229
Datum ST_ClusterKMeans(PG_FUNCTION_ARGS)
This library is the generic geometry handling section of PostGIS.
uint32_t * UF_get_collapsed_cluster_ids(UNIONFIND *uf, const char *is_in_cluster)
Definition: lwunionfind.c:145