PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
lwgeom_geos_cluster.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * PostGIS is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * PostGIS is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18 *
19 **********************************************************************
20 *
21 * Copyright 2015-2016 Daniel Baston <dbaston@gmail.com>
22 *
23 **********************************************************************/
24
25#include <string.h>
26#include "liblwgeom.h"
27#include "liblwgeom_internal.h"
28#include "lwgeom_log.h"
29#include "lwgeom_geos.h"
30#include "lwunionfind.h"
31
32static const int STRTREE_NODE_CAPACITY = 10;
33
34/* Utility struct used to accumulate items in GEOSSTRtree_query callback */
36{
40};
41
42/* Utility struct to keep GEOSSTRtree and associated structures to be freed after use */
43struct STRTree
44{
45 GEOSSTRtree* tree;
46 GEOSGeometry** envelopes;
47 uint32_t* geom_ids;
48 uint32_t num_geoms;
49};
50
51static struct STRTree make_strtree(void** geoms, uint32_t num_geoms, char is_lwgeom);
52static void destroy_strtree(struct STRTree * tree);
53static int combine_geometries(UNIONFIND* uf, void** geoms, uint32_t num_geoms, void*** clustersGeoms, uint32_t* num_clusters, char is_lwgeom);
54
55/* Make a minimal GEOSGeometry* whose Envelope covers the same 2D extent as
56 * the supplied GBOX. This is faster and uses less memory than building a
57 * five-point polygon with GBOX2GEOS.
58 */
59static GEOSGeometry*
61{
62 if (lwgeom_is_empty(g))
63 return GEOSGeom_createEmptyPolygon();
64
65 if (lwgeom_get_type(g) == POINTTYPE) {
66 const POINT2D* pt = getPoint2d_cp(lwgeom_as_lwpoint(g)->point, 0);
67 return make_geos_point(pt->x, pt->y);
68 } else {
69 const GBOX* box = lwgeom_get_bbox(g);
70 if (!box)
71 return NULL;
72
73 return make_geos_segment(box->xmin, box->ymin, box->xmax, box->ymax);
74 }
75}
76
79static struct STRTree
80make_strtree(void** geoms, uint32_t num_geoms, char is_lwgeom)
81{
82 struct STRTree tree;
83 tree.envelopes = 0;
84 tree.num_geoms = 0;
85 tree.geom_ids = 0;
86
87 tree.tree = GEOSSTRtree_create(STRTREE_NODE_CAPACITY);
88 if (tree.tree == NULL)
89 {
90 return tree;
91 }
92 tree.geom_ids = lwalloc(num_geoms * sizeof(uint32_t));
93 tree.num_geoms = num_geoms;
94
95 if (is_lwgeom)
96 {
97 uint32_t i;
98 tree.envelopes = lwalloc(num_geoms * sizeof(GEOSGeometry*));
99 for (i = 0; i < num_geoms; i++)
100 {
101 tree.geom_ids[i] = i;
102 tree.envelopes[i] = geos_envelope_surrogate(geoms[i]);
103 GEOSSTRtree_insert(tree.tree, tree.envelopes[i], &(tree.geom_ids[i]));
104 }
105 }
106 else
107 {
108 uint32_t i;
109 tree.envelopes = NULL;
110 for (i = 0; i < num_geoms; i++)
111 {
112 tree.geom_ids[i] = i;
113 GEOSSTRtree_insert(tree.tree, geoms[i], &(tree.geom_ids[i]));
114 }
115 }
116
117 return tree;
118}
119
121static void
123{
124 size_t i;
125 GEOSSTRtree_destroy(tree->tree);
126
127 if (tree->envelopes)
128 {
129 for (i = 0; i < tree->num_geoms; i++)
130 {
131 GEOSGeom_destroy(tree->envelopes[i]);
132 }
133 lwfree(tree->envelopes);
134 }
135 lwfree(tree->geom_ids);
136}
137
138static void
139query_accumulate(void* item, void* userdata)
140{
141 struct QueryContext *cxt = userdata;
142 if (!cxt->items_found)
143 {
144 cxt->items_found_size = 8;
145 cxt->items_found = lwalloc(cxt->items_found_size * sizeof(void*));
146 }
147
148 if (cxt->num_items_found >= cxt->items_found_size)
149 {
150 cxt->items_found_size = 2 * cxt->items_found_size;
151 cxt->items_found = lwrealloc(cxt->items_found, cxt->items_found_size * sizeof(void*));
152 }
153 cxt->items_found[cxt->num_items_found++] = item;
154}
155
156/* Identify intersecting geometries and mark them as being in the same set */
157int
158union_intersecting_pairs(GEOSGeometry** geoms, uint32_t num_geoms, UNIONFIND* uf)
159{
160 uint32_t p, i;
161 struct STRTree tree;
162 struct QueryContext cxt =
163 {
164 .items_found = NULL,
165 .num_items_found = 0,
166 .items_found_size = 0
167 };
168 int success = LW_SUCCESS;
169
170 if (num_geoms <= 1)
171 return LW_SUCCESS;
172
173 tree = make_strtree((void**) geoms, num_geoms, LW_FALSE);
174 if (tree.tree == NULL)
175 {
176 destroy_strtree(&tree);
177 return LW_FAILURE;
178 }
179
180 for (p = 0; p < num_geoms; p++)
181 {
182 const GEOSPreparedGeometry* prep = NULL;
183
184 if (!geoms[p] || GEOSisEmpty(geoms[p]))
185 continue;
186
187 cxt.num_items_found = 0;
188 GEOSSTRtree_query(tree.tree, geoms[p], &query_accumulate, &cxt);
189
190 for (i = 0; i < cxt.num_items_found; i++)
191 {
192 uint32_t q = *((uint32_t*) cxt.items_found[i]);
193
194 if (p != q && UF_find(uf, p) != UF_find(uf, q))
195 {
196 int geos_type = GEOSGeomTypeId(geoms[p]);
197 int geos_result;
198
199 /* Don't build prepared a geometry around a Point or MultiPoint -
200 * there are some problems in the implementation, and it's not clear
201 * there would be a performance benefit in any case. (See #3433)
202 */
203 if (geos_type != GEOS_POINT && geos_type != GEOS_MULTIPOINT)
204 {
205 /* Lazy initialize prepared geometry */
206 if (prep == NULL)
207 {
208 prep = GEOSPrepare(geoms[p]);
209 }
210 geos_result = GEOSPreparedIntersects(prep, geoms[q]);
211 }
212 else
213 {
214 geos_result = GEOSIntersects(geoms[p], geoms[q]);
215 }
216 if (geos_result > 1)
217 {
218 success = LW_FAILURE;
219 break;
220 }
221 else if (geos_result)
222 {
223 UF_union(uf, p, q);
224 }
225 }
226 }
227
228 if (prep)
229 GEOSPreparedGeom_destroy(prep);
230
231 if (!success)
232 break;
233 }
234
235 if (cxt.items_found)
236 lwfree(cxt.items_found);
237
238 destroy_strtree(&tree);
239 return success;
240}
241
245int
246cluster_intersecting(GEOSGeometry** geoms, uint32_t num_geoms, GEOSGeometry*** clusterGeoms, uint32_t* num_clusters)
247{
248 int cluster_success;
249 UNIONFIND* uf = UF_create(num_geoms);
250
251 if (union_intersecting_pairs(geoms, num_geoms, uf) == LW_FAILURE)
252 {
253 UF_destroy(uf);
254 return LW_FAILURE;
255 }
256
257 cluster_success = combine_geometries(uf, (void**) geoms, num_geoms, (void***) clusterGeoms, num_clusters, 0);
258 UF_destroy(uf);
259 return cluster_success;
260}
261
262static int
263dbscan_update_context(GEOSSTRtree* tree, struct QueryContext* cxt, LWGEOM** geoms, uint32_t p, double eps)
264{
265 cxt->num_items_found = 0;
266
267 GEOSGeometry* query_envelope;
268
270
271 if (geoms[p]->type == POINTTYPE)
272 {
273 const POINT2D* pt = getPoint2d_cp(lwgeom_as_lwpoint(geoms[p])->point, 0);
274 query_envelope = make_geos_segment( pt->x - eps, pt->y - eps, pt->x + eps, pt->y + eps );
275 } else {
276 const GBOX* box = lwgeom_get_bbox(geoms[p]);
277 query_envelope = make_geos_segment( box->xmin - eps, box->ymin - eps, box->xmax + eps, box->ymax + eps );
278 }
279
280 if (!query_envelope)
281 return LW_FAILURE;
282
283 GEOSSTRtree_query(tree, query_envelope, &query_accumulate, cxt);
284
285 GEOSGeom_destroy(query_envelope);
286
287 return LW_SUCCESS;
288}
289
290/* Union p's cluster with q's cluster, if q is not a border point of another cluster.
291 * Applicable to DBSCAN with minpoints > 1.
292 */
293static void
294union_if_available(UNIONFIND* uf, uint32_t p, uint32_t q, char* is_in_core, char* in_a_cluster)
295{
296 if (in_a_cluster[q])
297 {
298 /* Can we merge p's cluster with q's cluster? We can do this only
299 * if both p and q are considered _core_ points of their respective
300 * clusters.
301 */
302 if (is_in_core[q])
303 {
304 UF_union(uf, p, q);
305 }
306 }
307 else
308 {
309 UF_union(uf, p, q);
310 in_a_cluster[q] = LW_TRUE;
311 }
312}
313
314/* An optimized DBSCAN union for the case where min_points == 1.
315 * If min_points == 1, then we don't care how many neighbors we find; we can union clusters
316 * on the fly, as we go through the distance calculations. This potentially allows us
317 * to avoid some distance computations altogether.
318 */
319static int
320union_dbscan_minpoints_1(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, char** in_a_cluster_ret)
321{
322 uint32_t p, i;
323 struct STRTree tree;
324 struct QueryContext cxt =
325 {
326 .items_found = NULL,
327 .num_items_found = 0,
328 .items_found_size = 0
329 };
330 int success = LW_SUCCESS;
331
332 if (in_a_cluster_ret)
333 {
334 char* in_a_cluster = lwalloc(num_geoms * sizeof(char));
335 for (i = 0; i < num_geoms; i++)
336 in_a_cluster[i] = LW_TRUE;
337 *in_a_cluster_ret = in_a_cluster;
338 }
339
340 if (num_geoms <= 1)
341 return LW_SUCCESS;
342
343 tree = make_strtree((void**) geoms, num_geoms, LW_TRUE);
344 if (tree.tree == NULL)
345 {
346 destroy_strtree(&tree);
347 return LW_FAILURE;
348 }
349
350 for (p = 0; p < num_geoms; p++)
351 {
352 int rv = LW_SUCCESS;
353 if (lwgeom_is_empty(geoms[p]))
354 continue;
355
356 rv = dbscan_update_context(tree.tree, &cxt, geoms, p, eps);
357 if (rv == LW_FAILURE)
358 {
359 destroy_strtree(&tree);
360 return LW_FAILURE;
361 }
362 for (i = 0; i < cxt.num_items_found; i++)
363 {
364 uint32_t q = *((uint32_t*) cxt.items_found[i]);
365
366 if (UF_find(uf, p) != UF_find(uf, q))
367 {
368 double mindist = lwgeom_mindistance2d_tolerance(geoms[p], geoms[q], eps);
369 if (mindist == FLT_MAX)
370 {
371 success = LW_FAILURE;
372 break;
373 }
374
375 if (mindist <= eps)
376 UF_union(uf, p, q);
377 }
378 }
379 }
380
381 if (cxt.items_found)
382 lwfree(cxt.items_found);
383
384 destroy_strtree(&tree);
385
386 return success;
387}
388
389static int
390union_dbscan_general(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, uint32_t min_points, char** in_a_cluster_ret)
391{
392 uint32_t p, i;
393 struct STRTree tree;
394 struct QueryContext cxt =
395 {
396 .items_found = NULL,
397 .num_items_found = 0,
398 .items_found_size = 0
399 };
400 int success = LW_SUCCESS;
401 uint32_t* neighbors;
402 char* in_a_cluster;
403 char* is_in_core;
404
405 in_a_cluster = lwalloc(num_geoms * sizeof(char));
406 memset(in_a_cluster, 0, num_geoms * sizeof(char));
407
408 if (in_a_cluster_ret)
409 *in_a_cluster_ret = in_a_cluster;
410
411 /* Bail if we don't even have enough inputs to make a cluster. */
412 if (num_geoms < min_points)
413 {
414 if (!in_a_cluster_ret)
415 lwfree(in_a_cluster);
416 return LW_SUCCESS;
417 }
418
419 tree = make_strtree((void**) geoms, num_geoms, LW_TRUE);
420 if (tree.tree == NULL)
421 {
422 destroy_strtree(&tree);
423 return LW_FAILURE;
424 }
425
426 is_in_core = lwalloc(num_geoms * sizeof(char));
427 memset(is_in_core, 0, num_geoms * sizeof(char));
428 neighbors = lwalloc(min_points * sizeof(uint32_t));
429
430 for (p = 0; p < num_geoms; p++)
431 {
432 uint32_t num_neighbors = 0;
433 int rv;
434
435 if (lwgeom_is_empty(geoms[p]))
436 continue;
437
438 rv = dbscan_update_context(tree.tree, &cxt, geoms, p, eps);
439 if (rv == LW_FAILURE)
440 {
441 destroy_strtree(&tree);
442 return LW_FAILURE;
443 }
444
445 /* We didn't find enough points to do anything, even if they are all within eps. */
446 if (cxt.num_items_found < min_points)
447 continue;
448
449 for (i = 0; i < cxt.num_items_found; i++)
450 {
451 uint32_t q = *((uint32_t*) cxt.items_found[i]);
452
453 if (num_neighbors >= min_points)
454 {
455 /* If we've already identified p as a core point, and it's already
456 * in the same cluster in q, then there's nothing to learn by
457 * computing the distance.
458 */
459 if (UF_find(uf, p) == UF_find(uf, q))
460 continue;
461
462 /* Similarly, if q is already identified as a border point of another
463 * cluster, there's no point figuring out what the distance is.
464 */
465 if (in_a_cluster[q] && !is_in_core[q])
466 continue;
467 }
468
469 double mindist = lwgeom_mindistance2d_tolerance(geoms[p], geoms[q], eps);
470 if (mindist == FLT_MAX)
471 {
472 success = LW_FAILURE;
473 break;
474 }
475
476 if (mindist <= eps)
477 {
478 /* If we haven't hit min_points yet, we don't know if we can union p and q.
479 * Just set q aside for now.
480 */
481 if (num_neighbors < min_points)
482 {
483 neighbors[num_neighbors++] = q;
484
485 /* If we just hit min_points, we can now union all of the neighbor geometries
486 * we've been saving.
487 */
488 if (num_neighbors == min_points)
489 {
490 uint32_t j;
491 is_in_core[p] = LW_TRUE;
492 in_a_cluster[p] = LW_TRUE;
493 for (j = 0; j < num_neighbors; j++)
494 {
495 union_if_available(uf, p, neighbors[j], is_in_core, in_a_cluster);
496 }
497 }
498 }
499 else
500 {
501 /* If we're above min_points, no need to store our neighbors, just go ahead
502 * and union them now. This may allow us to cut out some distance
503 * computations.
504 */
505 union_if_available(uf, p, q, is_in_core, in_a_cluster);
506 }
507 }
508 }
509
510 if (!success)
511 break;
512 }
513
514 lwfree(neighbors);
515 lwfree(is_in_core);
516
517 /* Free in_a_cluster if we're not giving it to our caller */
518 if (!in_a_cluster_ret)
519 lwfree(in_a_cluster);
520
521 if (cxt.items_found)
522 lwfree(cxt.items_found);
523
524 destroy_strtree(&tree);
525 return success;
526}
527
528int union_dbscan(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, uint32_t min_points, char** in_a_cluster_ret)
529{
530 if (min_points <= 1)
531 return union_dbscan_minpoints_1(geoms, num_geoms, uf, eps, in_a_cluster_ret);
532 else
533 return union_dbscan_general(geoms, num_geoms, uf, eps, min_points, in_a_cluster_ret);
534}
535
539int
540cluster_within_distance(LWGEOM** geoms, uint32_t num_geoms, double tolerance, LWGEOM*** clusterGeoms, uint32_t* num_clusters)
541{
542 int cluster_success;
543 UNIONFIND* uf = UF_create(num_geoms);
544
545 if (union_dbscan(geoms, num_geoms, uf, tolerance, 1, NULL) == LW_FAILURE)
546 {
547 UF_destroy(uf);
548 return LW_FAILURE;
549 }
550
551 cluster_success = combine_geometries(uf, (void**) geoms, num_geoms, (void***) clusterGeoms, num_clusters, 1);
552 UF_destroy(uf);
553 return cluster_success;
554}
555
559static int
560combine_geometries(UNIONFIND* uf, void** geoms, uint32_t num_geoms, void*** clusterGeoms, uint32_t* num_clusters, char is_lwgeom)
561{
562 size_t i, j, k;
563
564 /* Combine components of each cluster into their own GeometryCollection */
565 *num_clusters = uf->num_clusters;
566 *clusterGeoms = lwalloc(*num_clusters * sizeof(void*));
567
568 void** geoms_in_cluster = lwalloc(num_geoms * sizeof(void*));
569 uint32_t* ordered_components = UF_ordered_by_cluster(uf);
570 for (i = 0, j = 0, k = 0; i < num_geoms; i++)
571 {
572 geoms_in_cluster[j++] = geoms[ordered_components[i]];
573
574 /* Is this the last geometry in the component? */
575 if ((i == num_geoms - 1) ||
576 (UF_find(uf, ordered_components[i]) != UF_find(uf, ordered_components[i+1])))
577 {
578 if (k >= uf->num_clusters) {
579 /* Should not get here - it means that we have more clusters than uf->num_clusters thinks we should. */
580 return LW_FAILURE;
581 }
582
583 if (is_lwgeom)
584 {
585 LWGEOM** components = lwalloc(j * sizeof(LWGEOM*));
586 memcpy(components, geoms_in_cluster, j * sizeof(LWGEOM*));
587 (*clusterGeoms)[k++] = lwcollection_construct(COLLECTIONTYPE, components[0]->srid, NULL, j, (LWGEOM**) components);
588 }
589 else
590 {
591 int srid = GEOSGetSRID(((GEOSGeometry**) geoms_in_cluster)[0]);
592 GEOSGeometry* combined = GEOSGeom_createCollection(GEOS_GEOMETRYCOLLECTION, (GEOSGeometry**) geoms_in_cluster, j);
593 GEOSSetSRID(combined, srid);
594 (*clusterGeoms)[k++] = combined;
595 }
596 j = 0;
597 }
598 }
599
600 lwfree(geoms_in_cluster);
601 lwfree(ordered_components);
602
603 return LW_SUCCESS;
604}
GEOSGeometry * make_geos_segment(double x1, double y1, double x2, double y2)
GEOSGeometry * make_geos_point(double x, double y)
LWCOLLECTION * lwcollection_construct(uint8_t type, int32_t srid, GBOX *bbox, uint32_t ngeoms, LWGEOM **geoms)
#define LW_FALSE
Definition liblwgeom.h:94
#define COLLECTIONTYPE
Definition liblwgeom.h:108
void * lwrealloc(void *mem, size_t size)
Definition lwutil.c:242
#define LW_FAILURE
Definition liblwgeom.h:96
#define LW_SUCCESS
Definition liblwgeom.h:97
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition liblwgeom.h:102
void * lwalloc(size_t size)
Definition lwutil.c:227
void lwfree(void *mem)
Definition lwutil.c:248
double lwgeom_mindistance2d_tolerance(const LWGEOM *lw1, const LWGEOM *lw2, double tolerance)
Function handling min distance calculations and dwithin calculations.
Definition measures.c:222
#define LW_TRUE
Return types for functions with status returns.
Definition liblwgeom.h:93
const GBOX * lwgeom_get_bbox(const LWGEOM *lwgeom)
Get a non-empty geometry bounding box, computing and caching it if not already there.
Definition lwgeom.c:771
This library is the generic geometry handling section of PostGIS.
#define LW_ON_INTERRUPT(x)
static int dbscan_update_context(GEOSSTRtree *tree, struct QueryContext *cxt, LWGEOM **geoms, uint32_t p, double eps)
static int union_dbscan_general(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, uint32_t min_points, char **in_a_cluster_ret)
int union_dbscan(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, uint32_t min_points, char **in_a_cluster_ret)
static int union_dbscan_minpoints_1(LWGEOM **geoms, uint32_t num_geoms, UNIONFIND *uf, double eps, char **in_a_cluster_ret)
static void query_accumulate(void *item, void *userdata)
static struct STRTree make_strtree(void **geoms, uint32_t num_geoms, char is_lwgeom)
Make a GEOSSTRtree that stores a pointer to a variable containing the array index of the input geoms.
static void destroy_strtree(struct STRTree *tree)
Clean up STRTree after use.
static const int STRTREE_NODE_CAPACITY
static GEOSGeometry * geos_envelope_surrogate(const LWGEOM *g)
static void union_if_available(UNIONFIND *uf, uint32_t p, uint32_t q, char *is_in_core, char *in_a_cluster)
int cluster_within_distance(LWGEOM **geoms, uint32_t num_geoms, double tolerance, LWGEOM ***clusterGeoms, uint32_t *num_clusters)
Takes an array of LWGEOM* and constructs an array of LWGEOM*, where each element in the constructed a...
int union_intersecting_pairs(GEOSGeometry **geoms, uint32_t num_geoms, UNIONFIND *uf)
int cluster_intersecting(GEOSGeometry **geoms, uint32_t num_geoms, GEOSGeometry ***clusterGeoms, uint32_t *num_clusters)
Takes an array of GEOSGeometry* and constructs an array of GEOSGeometry*, where each element in the c...
static int combine_geometries(UNIONFIND *uf, void **geoms, uint32_t num_geoms, void ***clustersGeoms, uint32_t *num_clusters, char is_lwgeom)
Uses a UNIONFIND to identify the set with which each input geometry is associated,...
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition lwinline.h:141
static LWPOINT * lwgeom_as_lwpoint(const LWGEOM *lwgeom)
Definition lwinline.h:127
static int lwgeom_is_empty(const LWGEOM *geom)
Return true or false depending on whether a geometry is an "empty" geometry (no vertices members)
Definition lwinline.h:199
static const POINT2D * getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
Returns a POINT2D pointer into the POINTARRAY serialized_ptlist, suitable for reading from.
Definition lwinline.h:97
void UF_destroy(UNIONFIND *uf)
Definition lwunionfind.c:54
uint32_t UF_find(UNIONFIND *uf, uint32_t i)
Definition lwunionfind.c:62
uint32_t * UF_ordered_by_cluster(UNIONFIND *uf)
UNIONFIND * UF_create(uint32_t N)
Definition lwunionfind.c:35
void UF_union(UNIONFIND *uf, uint32_t i, uint32_t j)
Definition lwunionfind.c:85
double ymax
Definition liblwgeom.h:357
double xmax
Definition liblwgeom.h:355
double ymin
Definition liblwgeom.h:356
double xmin
Definition liblwgeom.h:354
double y
Definition liblwgeom.h:390
double x
Definition liblwgeom.h:390
uint32_t items_found_size
GEOSGeometry ** envelopes
uint32_t * geom_ids
uint32_t num_geoms
GEOSSTRtree * tree
uint32_t num_clusters
Definition lwunionfind.h:35