PostGIS  2.2.7dev-r@@SVN_REVISION@@
gserialized_gist_nd.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * Copyright 2009 Paul Ramsey <pramsey@cleverelephant.ca>
5  *
6  * This is free software; you can redistribute and/or modify it under
7  * the terms of the GNU General Public Licence. See the COPYING file.
8  *
9  **********************************************************************/
10 
11 /*
12 ** R-Tree Bibliography
13 **
14 ** [1] A. Guttman. R-tree: a dynamic index structure for spatial searching.
15 ** Proceedings of the ACM SIGMOD Conference, pp 47-57, June 1984.
16 ** [2] C.-H. Ang and T. C. Tan. New linear node splitting algorithm for
17 ** R-Trees. Advances in Spatial Databases - 5th International Symposium,
18 ** 1997
19 ** [3] N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger. The R*tree: an
20 ** efficient and robust access method for points and rectangles.
21 ** Proceedings of the ACM SIGMOD Conference. June 1990.
22 */
23 
24 #include "postgres.h"
25 #include "access/gist.h" /* For GiST */
26 #include "access/itup.h"
27 #include "access/skey.h"
28 
29 #include "../postgis_config.h"
30 
31 /*#define POSTGIS_DEBUG_LEVEL 4*/
32 
33 #include "liblwgeom.h" /* For standard geometry types. */
34 #include "lwgeom_pg.h" /* For debugging macros. */
35 #include "gserialized_gist.h" /* For utility functions. */
36 #include "geography.h"
37 
38 #include <assert.h>
39 
40 
41 /* Fall back to older finite() if necessary */
42 #ifndef HAVE_ISFINITE
43 # ifdef HAVE_GNU_ISFINITE
44 # define _GNU_SOURCE
45 # else
46 # define isfinite finite
47 # endif
48 #endif
49 
50 /*
51 ** When is a node split not so good? If more than 90% of the entries
52 ** end up in one of the children.
53 */
54 #define LIMIT_RATIO 0.1
55 
56 /*
57 ** For debugging
58 */
59 #if POSTGIS_DEBUG_LEVEL > 0
60 static int geog_counter_leaf = 0;
61 static int geog_counter_internal = 0;
62 #endif
63 
64 /*
65 ** ND Index key type stub prototypes
66 */
67 Datum gidx_out(PG_FUNCTION_ARGS);
68 Datum gidx_in(PG_FUNCTION_ARGS);
69 
70 /*
71 ** ND GiST prototypes
72 */
73 Datum gserialized_gist_consistent(PG_FUNCTION_ARGS);
74 Datum gserialized_gist_compress(PG_FUNCTION_ARGS);
75 Datum gserialized_gist_decompress(PG_FUNCTION_ARGS);
76 Datum gserialized_gist_penalty(PG_FUNCTION_ARGS);
77 Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS);
78 Datum gserialized_gist_union(PG_FUNCTION_ARGS);
79 Datum gserialized_gist_same(PG_FUNCTION_ARGS);
80 Datum gserialized_gist_distance(PG_FUNCTION_ARGS);
81 Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS);
82 
83 /*
84 ** ND Operator prototypes
85 */
86 Datum gserialized_overlaps(PG_FUNCTION_ARGS);
87 Datum gserialized_contains(PG_FUNCTION_ARGS);
88 Datum gserialized_within(PG_FUNCTION_ARGS);
89 Datum gserialized_distance_nd(PG_FUNCTION_ARGS);
90 
91 /*
92 ** GIDX true/false test function type
93 */
94 typedef bool (*gidx_predicate)(GIDX *a, GIDX *b);
95 
96 
97 /* Allocate a new copy of GIDX */
98 static GIDX* gidx_copy(GIDX *b)
99 {
100  GIDX *c = (GIDX*)palloc(VARSIZE(b));
101  POSTGIS_DEBUGF(5, "copied gidx (%p) to gidx (%p)", b, c);
102  memcpy((void*)c, (void*)b, VARSIZE(b));
103  return c;
104 }
105 
106 
107 /* Ensure all minimums are below maximums. */
108 static inline void gidx_validate(GIDX *b)
109 {
110  int i;
111  Assert(b);
112  POSTGIS_DEBUGF(5,"validating gidx (%s)", gidx_to_string(b));
113  for ( i = 0; i < GIDX_NDIMS(b); i++ )
114  {
115  if ( GIDX_GET_MIN(b,i) > GIDX_GET_MAX(b,i) )
116  {
117  float tmp;
118  tmp = GIDX_GET_MIN(b,i);
119  GIDX_SET_MIN(b,i,GIDX_GET_MAX(b,i));
120  GIDX_SET_MAX(b,i,tmp);
121  }
122  }
123  return;
124 }
125 
126 /* An "unknown" GIDX is used to represent the bounds of an EMPTY
127  geometry or other-wise unindexable geometry (like one with NaN
128  or Inf bounds) */
129 static inline bool gidx_is_unknown(const GIDX *a)
130 {
131  size_t size = VARSIZE(a) - VARHDRSZ;
132  /* "unknown" gidx objects have a too-small size of one float */
133  if ( size <= 0.0 )
134  return TRUE;
135  return FALSE;
136 }
137 
138 static inline void gidx_set_unknown(GIDX *a)
139 {
140  SET_VARSIZE(a, VARHDRSZ);
141 }
142 
143 /* Enlarge b_union to contain b_new. If b_new contains more
144  dimensions than b_union, expand b_union to contain those dimensions. */
145 static void gidx_merge(GIDX **b_union, GIDX *b_new)
146 {
147  int i, dims_union, dims_new;
148  Assert(b_union);
149  Assert(*b_union);
150  Assert(b_new);
151 
152  /* Can't merge an unknown into any thing */
153  if( gidx_is_unknown(b_new) )
154  return;
155 
156  /* Merge of unknown and known is known */
157  if( gidx_is_unknown(*b_union) )
158  {
159  *b_union = b_new;
160  return;
161  }
162 
163  dims_union = GIDX_NDIMS(*b_union);
164  dims_new = GIDX_NDIMS(b_new);
165 
166  POSTGIS_DEBUGF(4, "merging gidx (%s) into gidx (%s)", gidx_to_string(b_new), gidx_to_string(*b_union));
167 
168  if ( dims_new > dims_union )
169  {
170  POSTGIS_DEBUGF(5, "reallocating b_union from %d dims to %d dims", dims_union, dims_new);
171  *b_union = (GIDX*)repalloc(*b_union, GIDX_SIZE(dims_new));
172  SET_VARSIZE(*b_union, VARSIZE(b_new));
173  dims_union = dims_new;
174  }
175 
176  for ( i = 0; i < dims_new; i++ )
177  {
178  /* Adjust minimums */
179  GIDX_SET_MIN(*b_union, i, Min(GIDX_GET_MIN(*b_union,i),GIDX_GET_MIN(b_new,i)));
180  /* Adjust maximums */
181  GIDX_SET_MAX(*b_union, i, Max(GIDX_GET_MAX(*b_union,i),GIDX_GET_MAX(b_new,i)));
182  }
183 
184  POSTGIS_DEBUGF(5, "merge complete (%s)", gidx_to_string(*b_union));
185  return;
186 }
187 
188 /* Calculate the volume (in n-d units) of the GIDX */
189 static float gidx_volume(GIDX *a)
190 {
191  float result;
192  int i;
193  if ( a == NULL || gidx_is_unknown(a) )
194  {
195  /* elog(ERROR, "gidx_volume received a null argument"); */
196  return 0.0;
197  }
198  result = GIDX_GET_MAX(a,0) - GIDX_GET_MIN(a,0);
199  for ( i = 1; i < GIDX_NDIMS(a); i++ )
200  result *= (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i));
201  POSTGIS_DEBUGF(5, "calculated volume of %s as %.8g", gidx_to_string(a), result);
202  return result;
203 }
204 
205 /* Ensure the first argument has the higher dimensionality. */
206 static void gidx_dimensionality_check(GIDX **a, GIDX **b)
207 {
208  if ( GIDX_NDIMS(*a) < GIDX_NDIMS(*b) )
209  {
210  GIDX *tmp = *b;
211  *b = *a;
212  *a = tmp;
213  }
214 }
215 
216 /* Calculate the volume of the union of the boxes. Avoids creating an intermediate box. */
217 static float gidx_union_volume(GIDX *a, GIDX *b)
218 {
219  float result;
220  int i;
221  int ndims_a, ndims_b;
222 
223  POSTGIS_DEBUG(5,"entered function");
224 
225  if ( a == NULL && b == NULL )
226  {
227  elog(ERROR, "gidx_union_volume received two null arguments");
228  return 0.0;
229  }
230 
231  if ( a == NULL || gidx_is_unknown(a) )
232  return gidx_volume(b);
233 
234  if ( b == NULL || gidx_is_unknown(b) )
235  return gidx_volume(a);
236 
237  if ( gidx_is_unknown(a) && gidx_is_unknown(b) )
238  {
239  return 0.0;
240  }
241 
242  /* Ensure 'a' has the most dimensions. */
244 
245  ndims_a = GIDX_NDIMS(a);
246  ndims_b = GIDX_NDIMS(b);
247 
248  /* Initialize with maximal length of first dimension. */
249  result = Max(GIDX_GET_MAX(a,0),GIDX_GET_MAX(b,0)) - Min(GIDX_GET_MIN(a,0),GIDX_GET_MIN(b,0));
250 
251  /* Multiply by maximal length of remaining dimensions. */
252  for ( i = 1; i < ndims_b; i++ )
253  {
254  result *= (Max(GIDX_GET_MAX(a,i),GIDX_GET_MAX(b,i)) - Min(GIDX_GET_MIN(a,i),GIDX_GET_MIN(b,i)));
255  }
256 
257  /* Add in dimensions of higher dimensional box. */
258  for ( i = ndims_b; i < ndims_a; i++ )
259  {
260  result *= (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i));
261  }
262 
263  POSTGIS_DEBUGF(5, "volume( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
264 
265  return result;
266 }
267 
268 /* Calculate the volume of the intersection of the boxes. */
269 static float gidx_inter_volume(GIDX *a, GIDX *b)
270 {
271  int i;
272  float result;
273 
274  POSTGIS_DEBUG(5,"entered function");
275 
276  if ( a == NULL || b == NULL )
277  {
278  elog(ERROR, "gidx_inter_volume received a null argument");
279  return 0.0;
280  }
281 
282  if ( gidx_is_unknown(a) || gidx_is_unknown(b) )
283  {
284  return 0.0;
285  }
286 
287  /* Ensure 'a' has the most dimensions. */
289 
290  /* Initialize with minimal length of first dimension. */
291  result = Min(GIDX_GET_MAX(a,0),GIDX_GET_MAX(b,0)) - Max(GIDX_GET_MIN(a,0),GIDX_GET_MIN(b,0));
292 
293  /* If they are disjoint (max < min) then return zero. */
294  if ( result < 0.0 ) return 0.0;
295 
296  /* Continue for remaining dimensions. */
297  for ( i = 1; i < GIDX_NDIMS(b); i++ )
298  {
299  float width = Min(GIDX_GET_MAX(a,i),GIDX_GET_MAX(b,i)) - Max(GIDX_GET_MIN(a,i),GIDX_GET_MIN(b,i));
300  if ( width < 0.0 ) return 0.0;
301  /* Multiply by minimal length of remaining dimensions. */
302  result *= width;
303  }
304  POSTGIS_DEBUGF(5, "volume( %s intersection %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
305  return result;
306 }
307 
308 /*
309 ** Overlapping GIDX box test.
310 **
311 ** Box(A) Overlaps Box(B) IFF for every dimension d:
312 ** min(A,d) <= max(B,d) && max(A,d) => min(B,d)
313 **
314 ** Any missing dimension is assumed by convention to
315 ** overlap whatever finite range available on the
316 ** other operand. See
317 ** http://lists.osgeo.org/pipermail/postgis-devel/2015-February/024757.html
318 **
319 ** Empty boxes never overlap.
320 */
321 static bool gidx_overlaps(GIDX *a, GIDX *b)
322 {
323  int i;
324  int ndims_b;
325  POSTGIS_DEBUG(5, "entered function");
326 
327  if ( (a == NULL) || (b == NULL) ) return FALSE;
328 
329  if ( gidx_is_unknown(a) || gidx_is_unknown(b) )
330  return FALSE;
331 
332  /* Ensure 'a' has the most dimensions. */
334 
335  ndims_b = GIDX_NDIMS(b);
336 
337  /* compare only up to dimensions of (b), missing dimensions always overlap */
338  for ( i = 0; i < ndims_b; i++ )
339  {
340  if ( GIDX_GET_MIN(a,i) > GIDX_GET_MAX(b,i) )
341  return FALSE;
342  if ( GIDX_GET_MIN(b,i) > GIDX_GET_MAX(a,i) )
343  return FALSE;
344  }
345 
346  return TRUE;
347 }
348 
349 /*
350 ** Containment GIDX test.
351 **
352 ** Box(A) CONTAINS Box(B) IFF (pt(A)LL < pt(B)LL) && (pt(A)UR > pt(B)UR)
353 */
354 static bool gidx_contains(GIDX *a, GIDX *b)
355 {
356  int i, dims_a, dims_b;
357 
358  POSTGIS_DEBUG(5, "entered function");
359 
360  if ( (a == NULL) || (b == NULL) ) return FALSE;
361 
362  if ( gidx_is_unknown(a) || gidx_is_unknown(b) )
363  return FALSE;
364 
365  dims_a = GIDX_NDIMS(a);
366  dims_b = GIDX_NDIMS(b);
367 
368  if ( dims_a < dims_b )
369  {
370  /*
371  ** If (b) is of higher dimensionality than (a) it can only be contained
372  ** if those higher dimensions are zeroes.
373  */
374  for (i = dims_a; i < dims_b; i++)
375  {
376  if ( GIDX_GET_MIN(b,i) != 0 )
377  return FALSE;
378  if ( GIDX_GET_MAX(b,i) != 0 )
379  return FALSE;
380  }
381  }
382 
383  /* Excess dimensions of (a), don't matter, it just has to contain (b) in (b)'s dimensions */
384  for (i = 0; i < Min(dims_a, dims_b); i++)
385  {
386  if ( GIDX_GET_MIN(a,i) > GIDX_GET_MIN(b,i) )
387  return FALSE;
388  if ( GIDX_GET_MAX(a,i) < GIDX_GET_MAX(b,i) )
389  return FALSE;
390  }
391 
392  return TRUE;
393 }
394 
395 /*
396 ** Equality GIDX test.
397 **
398 ** Box(A) EQUALS Box(B) IFF (pt(A)LL == pt(B)LL) && (pt(A)UR == pt(B)UR)
399 */
400 static bool gidx_equals(GIDX *a, GIDX *b)
401 {
402  int i;
403 
404  POSTGIS_DEBUG(5, "entered function");
405 
406  if ( (a == NULL) && (b == NULL) ) return TRUE;
407  if ( (a == NULL) || (b == NULL) ) return FALSE;
408 
409  if ( gidx_is_unknown(a) && gidx_is_unknown(b) )
410  return TRUE;
411 
412  if ( gidx_is_unknown(a) || gidx_is_unknown(b) )
413  return FALSE;
414 
415  /* Ensure 'a' has the most dimensions. */
417 
418  /* For all shared dimensions min(a) == min(b), max(a) == max(b) */
419  for (i = 0; i < GIDX_NDIMS(b); i++)
420  {
421  if ( GIDX_GET_MIN(a,i) != GIDX_GET_MIN(b,i) )
422  return FALSE;
423  if ( GIDX_GET_MAX(a,i) != GIDX_GET_MAX(b,i) )
424  return FALSE;
425  }
426  /* For all unshared dimensions min(a) == 0.0, max(a) == 0.0 */
427  for (i = GIDX_NDIMS(b); i < GIDX_NDIMS(a); i++)
428  {
429  if ( GIDX_GET_MIN(a,i) != 0.0 )
430  return FALSE;
431  if ( GIDX_GET_MAX(a,i) != 0.0 )
432  return FALSE;
433  }
434  return TRUE;
435 }
436 
441 static int
442 gserialized_datum_predicate(Datum gs1, Datum gs2, gidx_predicate predicate)
443 {
444  /* Put aside some stack memory and use it for GIDX pointers. */
445  char boxmem1[GIDX_MAX_SIZE];
446  char boxmem2[GIDX_MAX_SIZE];
447  GIDX *gidx1 = (GIDX*)boxmem1;
448  GIDX *gidx2 = (GIDX*)boxmem2;
449 
450  POSTGIS_DEBUG(3, "entered function");
451 
452  /* Must be able to build box for each arguement (ie, not empty geometry)
453  and predicate function to return true. */
454  if ( (gserialized_datum_get_gidx_p(gs1, gidx1) == LW_SUCCESS) &&
455  (gserialized_datum_get_gidx_p(gs2, gidx2) == LW_SUCCESS) &&
456  predicate(gidx1, gidx2) )
457  {
458  POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
459  return LW_TRUE;
460  }
461  return LW_FALSE;
462 }
463 
467 #if POSTGIS_PGSQL_VERSION < 95
468 static double gidx_distance_leaf_centroid(const GIDX *a, const GIDX *b)
469 {
470  int ndims, i;
471  double sum = 0;
472 
473  /* Base computation on least available dimensions */
474  ndims = Min(GIDX_NDIMS(b), GIDX_NDIMS(a));
475  for ( i = 0; i < ndims; ++i )
476  {
477  double ca, cb, d;
478  double amin = GIDX_GET_MIN(a,i);
479  double amax = GIDX_GET_MAX(a,i);
480  double bmin = GIDX_GET_MIN(b,i);
481  double bmax = GIDX_GET_MAX(b,i);
482  ca = amin + ( ( amax - amin ) / 2.0 );
483  cb = bmin + ( ( bmax - bmin ) / 2.0 );
484  d = ca - cb;
485  if ( ! isfinite(d) )
486  {
487  /* Can happen if a dimension was padded with FLT_MAX,
488  * effectively meaning "infinite range". In that case
489  * we take that dimension as adding 0 to the total
490  * distance.
491  */
492  continue;
493  }
494  sum += d * d;
495 /*
496  POSTGIS_DEBUGF(3, " centroid of A for dimension %d is %g", i, ca);
497  POSTGIS_DEBUGF(3, " centroid of B for dimension %d is %g", i, cb);
498  POSTGIS_DEBUGF(3, " distance on dimension %d is %g, squared as %g, grows sum to %g", i, d, d*d, sum);
499 */
500  }
501  return sqrt(sum);
502 }
503 #endif
504 
508 static double gidx_distance(const GIDX *a, const GIDX *b, int m_is_time)
509 {
510  int ndims, i;
511  double sum = 0;
512 
513  /* Base computation on least available dimensions */
514  ndims = Min(GIDX_NDIMS(b), GIDX_NDIMS(a));
515  for ( i = 0; i < ndims; ++i )
516  {
517  double d;
518  double amin = GIDX_GET_MIN(a,i);
519  double amax = GIDX_GET_MAX(a,i);
520  double bmin = GIDX_GET_MIN(b,i);
521  double bmax = GIDX_GET_MAX(b,i);
522  POSTGIS_DEBUGF(3, "A %g - %g", amin, amax);
523  POSTGIS_DEBUGF(3, "B %g - %g", bmin, bmax);
524 
525  if ( ( amin <= bmax && amax >= bmin ) )
526  {
527  /* overlaps */
528  d = 0;
529  }
530  else if ( i == 4 && m_is_time )
531  {
532  return FLT_MAX;
533  }
534  else if ( bmax < amin )
535  {
536  /* is "left" */
537  d = amin - bmax;
538  }
539  else
540  {
541  /* is "right" */
542  assert( bmin > amax );
543  d = bmin - amax;
544  }
545  if ( ! isfinite(d) )
546  {
547  /* Can happen if coordinates are corrupted/NaN */
548  continue;
549  }
550  sum += d * d;
551  POSTGIS_DEBUGF(3, "dist %g, squared %g, grows sum to %g", d, d*d, sum);
552  }
553  return sqrt(sum);
554 }
555 
556 #if POSTGIS_PGSQL_VERSION < 95
557 static double gidx_distance_node_centroid(const GIDX *node, const GIDX *query)
558 {
559  int i;
560  double sum = 0;
561 
562  /* Base computation on least available dimensions */
563  int ndims = Min(GIDX_NDIMS(node), GIDX_NDIMS(query));
564 
565  for ( i = 0; i < ndims; ++i )
566  {
567  double d;
568  double amin = GIDX_GET_MIN(query,i);
569  double amax = GIDX_GET_MAX(query,i);
570  double bmin = GIDX_GET_MIN(node,i);
571  double bmax = GIDX_GET_MAX(node,i);
572  double ca = amin + ( ( amax - amin ) / 2.0 );
573 
574  if ( ( ca <= bmax && ca >= bmin ) )
575  {
576  /* overlaps */
577  d = 0;
578  }
579  else if ( bmax < ca )
580  {
581  /* is "left" */
582  d = ca - bmax;
583  }
584  else
585  {
586  /* is "right" */
587  assert( bmin > ca );
588  d = bmin - ca;
589  }
590  if ( ! isfinite(d) )
591  {
592  /* Can happen if coordinates are corrupted/NaN */
593  continue;
594  }
595  sum += d * d;
596  POSTGIS_DEBUGF(3, "dist %g, squared %g, grows sum to %g", d, d*d, sum);
597  }
598  return sqrt(sum);
599 }
600 #else /* POSTGIS_PGSQL_VERSION >= 95 */
601 static double gidx_distance_m(const GIDX *a, const GIDX *b)
602 {
603  int mdim_a, mdim_b;
604  double d, amin, amax, bmin, bmax;
605 
606  /* Base computation on least available dimensions */
607  mdim_a = GIDX_NDIMS(a) - 1;
608  mdim_b = GIDX_NDIMS(b) - 1;
609 
610  amin = GIDX_GET_MIN(a,mdim_a);
611  amax = GIDX_GET_MAX(a,mdim_a);
612  bmin = GIDX_GET_MIN(b,mdim_b);
613  bmax = GIDX_GET_MAX(b,mdim_b);
614 
615  if ( ( amin <= bmax && amax >= bmin ) )
616  {
617  /* overlaps */
618  d = 0;
619  }
620  else if ( bmax < amin )
621  {
622  /* is "left" */
623  d = amin - bmax;
624  }
625  else
626  {
627  /* is "right" */
628  assert( bmin > amax );
629  d = bmin - amax;
630  }
631 
632  return d;
633 }
634 #endif /* POSTGIS_PGSQL_VERSION >= 96 */
635 
639 GSERIALIZED*
641 {
642  char boxmem[GIDX_MAX_SIZE];
643  GIDX *gidx = (GIDX*)boxmem;
644  float fdistance = (float)distance;
645 
646  /* Get our bounding box out of the geography, return right away if
647  input is an EMPTY geometry. */
648  if ( gserialized_get_gidx_p(g, gidx) == LW_FAILURE )
649  {
650  return g;
651  }
652 
653  gidx_expand(gidx, fdistance);
654 
655  return gserialized_set_gidx(g, gidx);
656 }
657 
658 /***********************************************************************
659 * GiST N-D Index Operator Functions
660 */
661 
662 /*
663 * Do centroid to centroid n-d distance if you don't have
664 * re-check available (PgSQL 9.5+), do "real" n-d distance
665 * if you do
666 */
668 Datum gserialized_distance_nd(PG_FUNCTION_ARGS)
669 {
670  char b1mem[GIDX_MAX_SIZE];
671  GIDX *b1 = (GIDX*)b1mem;
672  char b2mem[GIDX_MAX_SIZE];
673  GIDX *b2 = (GIDX*)b2mem;
674 
675 #if POSTGIS_PGSQL_VERSION < 95
676 
677  /* Centroid-to-centroid distance */
678  Datum gs1 = PG_GETARG_DATUM(0);
679  Datum gs2 = PG_GETARG_DATUM(1);
680  double box_distance = FLT_MAX;
681 
682  /* Must be able to build box for each argument (ie, not empty geometry). */
683  if ( (gserialized_datum_get_gidx_p(gs1, b1) == LW_SUCCESS) &&
684  (gserialized_datum_get_gidx_p(gs2, b2) == LW_SUCCESS) )
685  {
686  box_distance = gidx_distance_leaf_centroid(b1, b2);
687  POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(b1), gidx_to_string(b2));
688  }
689  PG_RETURN_FLOAT8(box_distance);
690 
691 #else /* POSTGIS_PGSQL_VERSION >= 96 */
692 
693  /* Feature-to-feature distance */
694  GSERIALIZED *geom1 = PG_GETARG_GSERIALIZED_P(0);
695  GSERIALIZED *geom2 = PG_GETARG_GSERIALIZED_P(1);
696  LWGEOM *lw1 = lwgeom_from_gserialized(geom1);
697  LWGEOM *lw2 = lwgeom_from_gserialized(geom2);
698  LWGEOM *closest;
699  double distance;
700 
701 
702  /* Find an exact shortest line w/ the dimensions we support */
703  if ( lwgeom_has_z(lw1) && lwgeom_has_z(lw2) )
704  {
705  closest = lwgeom_closest_line_3d(lw1, lw2);
706  distance = lwgeom_length(closest);
707  }
708  else
709  {
710  closest = lwgeom_closest_line(lw1, lw2);
711  distance = lwgeom_length_2d(closest);
712  }
713 
714  /* Un-sqrt the distance so we can add extra terms */
715  distance = distance*distance;
716 
717  /* Can only add the M term if both objects have M */
718  if ( lwgeom_has_m(lw1) && lwgeom_has_m(lw2) )
719  {
720  double m1, m2;
721  int usebox = false;
722 
723  if ( lwgeom_get_type(lw1) == POINTTYPE )
724  {
725  POINT4D p;
726  lwpoint_getPoint4d_p((LWPOINT*)lw1, &p);
727  m1 = p.m;
728  }
729  else if ( lwgeom_get_type(lw1) == LINETYPE )
730  {
731  LWPOINT *lwp1 = lwline_get_lwpoint(lwgeom_as_lwline(closest), 0);
732  m1 = lwgeom_interpolate_point(lw1, lwp1);
733  lwpoint_free(lwp1);
734  }
735  else
736  {
737  usebox = true;
738  }
739 
740  if ( lwgeom_get_type(lw2) == POINTTYPE )
741  {
742  POINT4D p;
743  lwpoint_getPoint4d_p((LWPOINT*)lw2, &p);
744  m2 = p.m;
745  }
746  else if ( lwgeom_get_type(lw2) == LINETYPE )
747  {
748  LWPOINT *lwp2 = lwline_get_lwpoint(lwgeom_as_lwline(closest), 1);
749  m2 = lwgeom_interpolate_point(lw2, lwp2);
750  lwpoint_free(lwp2);
751  }
752  else
753  {
754  usebox = true;
755  }
756 
757  if ( usebox )
758  {
759  double d;
760  gserialized_get_gidx_p(geom1, b1);
761  gserialized_get_gidx_p(geom2, b2);
762  d = gidx_distance_m(b1, b2);
763  distance += d*d;
764  }
765  else
766  {
767  distance += (m2-m1)*(m2-m1);
768  }
769  }
770 
771  lwgeom_free(closest);
772 
773  PG_FREE_IF_COPY(geom1, 0);
774  PG_FREE_IF_COPY(geom2, 1);
775  PG_RETURN_FLOAT8(sqrt(distance));
776 #endif /* POSTGIS_PGSQL_VERSION >= 96 */
777 }
778 
779 /*
780 ** '~' and operator function. Based on two serialized return true if
781 ** the first is contained by the second.
782 */
784 Datum gserialized_within(PG_FUNCTION_ARGS)
785 {
786  if ( gserialized_datum_predicate(PG_GETARG_DATUM(1), PG_GETARG_DATUM(0), gidx_contains) == LW_TRUE )
787  {
788  PG_RETURN_BOOL(TRUE);
789  }
790 
791  PG_RETURN_BOOL(FALSE);
792 }
793 
794 /*
795 ** '@' and operator function. Based on two serialized return true if
796 ** the first contains the second.
797 */
799 Datum gserialized_contains(PG_FUNCTION_ARGS)
800 {
801  if ( gserialized_datum_predicate(PG_GETARG_DATUM(0),PG_GETARG_DATUM(1), gidx_contains) == LW_TRUE )
802  {
803  PG_RETURN_BOOL(TRUE);
804  }
805 
806  PG_RETURN_BOOL(FALSE);
807 }
808 
809 /*
810 ** '&&' operator function. Based on two serialized return true if
811 ** they overlap and false otherwise.
812 */
814 Datum gserialized_overlaps(PG_FUNCTION_ARGS)
815 {
816  if ( gserialized_datum_predicate(PG_GETARG_DATUM(0),PG_GETARG_DATUM(1), gidx_overlaps) == LW_TRUE )
817  {
818  PG_RETURN_BOOL(TRUE);
819  }
820 
821  PG_RETURN_BOOL(FALSE);
822 }
823 
824 /***********************************************************************
825 * GiST Index Support Functions
826 */
827 
828 /*
829 ** GiST support function. Given a geography, return a "compressed"
830 ** version. In this case, we convert the geography into a geocentric
831 ** bounding box. If the geography already has the box embedded in it
832 ** we pull that out and hand it back.
833 */
835 Datum gserialized_gist_compress(PG_FUNCTION_ARGS)
836 {
837  GISTENTRY *entry_in = (GISTENTRY*)PG_GETARG_POINTER(0);
838  GISTENTRY *entry_out = NULL;
839  char gidxmem[GIDX_MAX_SIZE];
840  GIDX *bbox_out = (GIDX*)gidxmem;
841  int result = LW_SUCCESS;
842  int i;
843 
844  POSTGIS_DEBUG(4, "[GIST] 'compress' function called");
845 
846  /*
847  ** Not a leaf key? There's nothing to do.
848  ** Return the input unchanged.
849  */
850  if ( ! entry_in->leafkey )
851  {
852  POSTGIS_DEBUG(4, "[GIST] non-leafkey entry, returning input unaltered");
853  PG_RETURN_POINTER(entry_in);
854  }
855 
856  POSTGIS_DEBUG(4, "[GIST] processing leafkey input");
857  entry_out = palloc(sizeof(GISTENTRY));
858 
859  /*
860  ** Null key? Make a copy of the input entry and
861  ** return.
862  */
863  if ( DatumGetPointer(entry_in->key) == NULL )
864  {
865  POSTGIS_DEBUG(4, "[GIST] leafkey is null");
866  gistentryinit(*entry_out, (Datum) 0, entry_in->rel,
867  entry_in->page, entry_in->offset, FALSE);
868  POSTGIS_DEBUG(4, "[GIST] returning copy of input");
869  PG_RETURN_POINTER(entry_out);
870  }
871 
872  /* Extract our index key from the GiST entry. */
873  result = gserialized_datum_get_gidx_p(entry_in->key, bbox_out);
874 
875  /* Is the bounding box valid (non-empty, non-infinite) ?
876  * If not, use the "unknown" GIDX. */
877  if ( result == LW_FAILURE )
878  {
879  POSTGIS_DEBUG(4, "[GIST] empty geometry!");
880  gidx_set_unknown(bbox_out);
881  gistentryinit(*entry_out, PointerGetDatum(gidx_copy(bbox_out)),
882  entry_in->rel, entry_in->page,
883  entry_in->offset, FALSE);
884  PG_RETURN_POINTER(entry_out);
885  }
886 
887  POSTGIS_DEBUGF(4, "[GIST] got entry_in->key: %s", gidx_to_string(bbox_out));
888 
889  /* Check all the dimensions for finite values.
890  * If not, use the "unknown" GIDX as a key */
891  for ( i = 0; i < GIDX_NDIMS(bbox_out); i++ )
892  {
893  if ( ! isfinite(GIDX_GET_MAX(bbox_out, i))
894  || ! isfinite(GIDX_GET_MIN(bbox_out, i)) )
895  {
896  gidx_set_unknown(bbox_out);
897  gistentryinit(*entry_out,
898  PointerGetDatum(gidx_copy(bbox_out)),
899  entry_in->rel, entry_in->page,
900  entry_in->offset, FALSE);
901  PG_RETURN_POINTER(entry_out);
902  }
903  }
904 
905  /* Enure bounding box has minimums below maximums. */
906  gidx_validate(bbox_out);
907 
908  /* Prepare GISTENTRY for return. */
909  gistentryinit(*entry_out, PointerGetDatum(gidx_copy(bbox_out)),
910  entry_in->rel, entry_in->page, entry_in->offset, FALSE);
911 
912  /* Return GISTENTRY. */
913  POSTGIS_DEBUG(4, "[GIST] 'compress' function complete");
914  PG_RETURN_POINTER(entry_out);
915 }
916 
917 /*
918 ** GiST support function.
919 ** Decompress an entry. Unused for geography, so we return.
920 */
922 Datum gserialized_gist_decompress(PG_FUNCTION_ARGS)
923 {
924  POSTGIS_DEBUG(5, "[GIST] 'decompress' function called");
925  /* We don't decompress. Just return the input. */
926  PG_RETURN_POINTER(PG_GETARG_POINTER(0));
927 }
928 
929 /*
930 ** GiST support function. Called from gserialized_gist_consistent below.
931 */
932 static inline bool gserialized_gist_consistent_leaf(GIDX *key, GIDX *query, StrategyNumber strategy)
933 {
934  bool retval;
935 
936  POSTGIS_DEBUGF(4, "[GIST] leaf consistent, strategy [%d], count[%i]",
937  strategy, geog_counter_leaf++);
938 
939  switch (strategy)
940  {
941  case RTOverlapStrategyNumber:
942  retval = (bool) gidx_overlaps(key, query);
943  break;
944  case RTSameStrategyNumber:
945  retval = (bool) gidx_equals(key, query);
946  break;
947  case RTContainsStrategyNumber:
948  case RTOldContainsStrategyNumber:
949  retval = (bool) gidx_contains(key, query);
950  break;
951  case RTContainedByStrategyNumber:
952  case RTOldContainedByStrategyNumber:
953  retval = (bool) gidx_contains(query, key);
954  break;
955  default:
956  retval = FALSE;
957  }
958 
959  return (retval);
960 }
961 
962 /*
963 ** GiST support function. Called from gserialized_gist_consistent below.
964 */
965 static inline bool gserialized_gist_consistent_internal(GIDX *key, GIDX *query, StrategyNumber strategy)
966 {
967  bool retval;
968 
969  POSTGIS_DEBUGF(4, "[GIST] internal consistent, strategy [%d], count[%i], query[%s], key[%s]",
970  strategy, geog_counter_internal++, gidx_to_string(query), gidx_to_string(key) );
971 
972  switch (strategy)
973  {
974  case RTOverlapStrategyNumber:
975  retval = (bool) gidx_overlaps(key, query);
976  break;
977  case RTSameStrategyNumber:
978  case RTContainsStrategyNumber:
979  case RTOldContainsStrategyNumber:
980  retval = (bool) gidx_contains(key, query);
981  break;
982  case RTContainedByStrategyNumber:
983  case RTOldContainedByStrategyNumber:
984  retval = (bool) gidx_overlaps(key, query);
985  break;
986  default:
987  retval = FALSE;
988  }
989 
990  return (retval);
991 }
992 
993 /*
994 ** GiST support function. Take in a query and an entry and see what the
995 ** relationship is, based on the query strategy.
996 */
998 Datum gserialized_gist_consistent(PG_FUNCTION_ARGS)
999 {
1000  GISTENTRY *entry = (GISTENTRY*) PG_GETARG_POINTER(0);
1001  StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
1002  bool result;
1003  char gidxmem[GIDX_MAX_SIZE];
1004  GIDX *query_gbox_index = (GIDX*)gidxmem;
1005 
1006  /* PostgreSQL 8.4 and later require the RECHECK flag to be set here,
1007  rather than being supplied as part of the operator class definition */
1008  bool *recheck = (bool *) PG_GETARG_POINTER(4);
1009 
1010  /* We set recheck to false to avoid repeatedly pulling every "possibly matched" geometry
1011  out during index scans. For cases when the geometries are large, rechecking
1012  can make things twice as slow. */
1013  *recheck = false;
1014 
1015  POSTGIS_DEBUG(4, "[GIST] 'consistent' function called");
1016 
1017  /* Quick sanity check on query argument. */
1018  if ( DatumGetPointer(PG_GETARG_DATUM(1)) == NULL )
1019  {
1020  POSTGIS_DEBUG(4, "[GIST] null query pointer (!?!), returning false");
1021  PG_RETURN_BOOL(FALSE); /* NULL query! This is screwy! */
1022  }
1023 
1024  /* Quick sanity check on entry key. */
1025  if ( DatumGetPointer(entry->key) == NULL )
1026  {
1027  POSTGIS_DEBUG(4, "[GIST] null index entry, returning false");
1028  PG_RETURN_BOOL(FALSE); /* NULL entry! */
1029  }
1030 
1031  /* Null box should never make this far. */
1032  if ( gserialized_datum_get_gidx_p(PG_GETARG_DATUM(1), query_gbox_index) == LW_FAILURE )
1033  {
1034  POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!");
1035  PG_RETURN_BOOL(FALSE);
1036  }
1037 
1038  /* Treat leaf node tests different from internal nodes */
1039  if (GIST_LEAF(entry))
1040  {
1042  (GIDX*)DatumGetPointer(entry->key),
1043  query_gbox_index, strategy);
1044  }
1045  else
1046  {
1048  (GIDX*)DatumGetPointer(entry->key),
1049  query_gbox_index, strategy);
1050  }
1051 
1052  PG_RETURN_BOOL(result);
1053 }
1054 
1055 
1056 /*
1057 ** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry.
1058 ** Calculate the change in volume of the old entry once the new entry is added.
1059 ** TODO: Re-evaluate this in light of R*Tree penalty approaches.
1060 */
1062 Datum gserialized_gist_penalty(PG_FUNCTION_ARGS)
1063 {
1064  GISTENTRY *origentry = (GISTENTRY*) PG_GETARG_POINTER(0);
1065  GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1);
1066  float *result = (float*) PG_GETARG_POINTER(2);
1067  GIDX *gbox_index_orig, *gbox_index_new;
1068  float size_union, size_orig;
1069 
1070  POSTGIS_DEBUG(4, "[GIST] 'penalty' function called");
1071 
1072  gbox_index_orig = (GIDX*)DatumGetPointer(origentry->key);
1073  gbox_index_new = (GIDX*)DatumGetPointer(newentry->key);
1074 
1075  /* Drop out if we're dealing with null inputs. Shouldn't happen. */
1076  if ( (gbox_index_orig == NULL) && (gbox_index_new == NULL) )
1077  {
1078  POSTGIS_DEBUG(4, "[GIST] both inputs NULL! returning penalty of zero");
1079  *result = 0.0;
1080  PG_RETURN_FLOAT8(*result);
1081  }
1082 
1083  /* Calculate the size difference of the boxes (volume difference in this case). */
1084  size_union = gidx_union_volume(gbox_index_orig, gbox_index_new);
1085  size_orig = gidx_volume(gbox_index_orig);
1086  *result = size_union - size_orig;
1087 
1088  /* All things being equal, we prefer to expand small boxes rather than large boxes.
1089  NOTE: This code seemed to be causing badly balanced trees to be built
1090  and therefore has been commented out. Not sure why it didn't work,
1091  but it didn't.
1092  if( FP_IS_ZERO(*result) )
1093  if( FP_IS_ZERO(size_orig) )
1094  *result = 0.0;
1095  else
1096  *result = 1.0 - (1.0/(1.0 + size_orig));
1097  else
1098  *result += 1.0;
1099  */
1100 
1101  POSTGIS_DEBUGF(4, "[GIST] union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result);
1102 
1103  PG_RETURN_POINTER(result);
1104 }
1105 
1106 /*
1107 ** GiST support function. Merge all the boxes in a page into one master box.
1108 */
1110 Datum gserialized_gist_union(PG_FUNCTION_ARGS)
1111 {
1112  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1113  int *sizep = (int *) PG_GETARG_POINTER(1); /* Size of the return value */
1114  int numranges, i;
1115  GIDX *box_cur, *box_union;
1116 
1117  POSTGIS_DEBUG(4, "[GIST] 'union' function called");
1118 
1119  numranges = entryvec->n;
1120 
1121  box_cur = (GIDX*) DatumGetPointer(entryvec->vector[0].key);
1122 
1123  box_union = gidx_copy(box_cur);
1124 
1125  for ( i = 1; i < numranges; i++ )
1126  {
1127  box_cur = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1128  gidx_merge(&box_union, box_cur);
1129  }
1130 
1131  *sizep = VARSIZE(box_union);
1132 
1133  POSTGIS_DEBUGF(4, "[GIST] union called, numranges(%i), pageunion %s", numranges, gidx_to_string(box_union));
1134 
1135  PG_RETURN_POINTER(box_union);
1136 
1137 }
1138 
1139 /*
1140 ** GiST support function. Test equality of keys.
1141 */
1143 Datum gserialized_gist_same(PG_FUNCTION_ARGS)
1144 {
1145  GIDX *b1 = (GIDX*)PG_GETARG_POINTER(0);
1146  GIDX *b2 = (GIDX*)PG_GETARG_POINTER(1);
1147  bool *result = (bool*)PG_GETARG_POINTER(2);
1148 
1149  POSTGIS_DEBUG(4, "[GIST] 'same' function called");
1150 
1151  *result = gidx_equals(b1, b2);
1152 
1153  PG_RETURN_POINTER(result);
1154 }
1155 
1156 
1157 
1158 
1160 Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS)
1161 {
1162  GISTENTRY *entry = (GISTENTRY*) PG_GETARG_POINTER(0);
1163  Datum query_datum = PG_GETARG_DATUM(1);
1164  StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
1165 #if POSTGIS_PGSQL_VERSION >= 95
1166  bool *recheck = (bool *) PG_GETARG_POINTER(4);
1167 #endif
1168  char query_box_mem[GIDX_MAX_SIZE];
1169  GIDX *query_box = (GIDX*)query_box_mem;
1170  GIDX *entry_box;
1171  double distance;
1172 
1173  POSTGIS_DEBUGF(3, "[GIST] '%s' function called", __func__);
1174 
1175  /* We are using '13' as the gist geography distance <-> strategy number */
1176  if ( strategy != 13 )
1177  {
1178  elog(ERROR, "unrecognized strategy number: %d", strategy);
1179  PG_RETURN_FLOAT8(FLT_MAX);
1180  }
1181 
1182  /* Null box should never make this far. */
1183  if ( gserialized_datum_get_gidx_p(query_datum, query_box) == LW_FAILURE )
1184  {
1185  POSTGIS_DEBUG(2, "[GIST] null query_gbox_index!");
1186  PG_RETURN_FLOAT8(FLT_MAX);
1187  }
1188 
1189 #if POSTGIS_PGSQL_VERSION >= 95
1190  /* When we hit leaf nodes, it's time to turn on recheck */
1191  if (GIST_LEAF(entry))
1192  {
1193  *recheck = true;
1194  }
1195 #endif
1196 
1197  /* Get the entry box */
1198  entry_box = (GIDX*)DatumGetPointer(entry->key);
1199 
1200  /* Return distances from key-based tests should always be */
1201  /* the minimum possible distance, box-to-box */
1202  /* We scale up to "world units" so that the box-to-box distances */
1203  /* compare reasonably with the over-the-spheroid distances that */
1204  /* the recheck process will turn up */
1205  distance = WGS84_RADIUS * gidx_distance(entry_box, query_box, 0);
1206  POSTGIS_DEBUGF(2, "[GIST] '%s' got distance %g", __func__, distance);
1207 
1208  PG_RETURN_FLOAT8(distance);
1209 }
1210 
1211 
1212 /*
1213 ** GiST support function.
1214 ** Take in a query and an entry and return the "distance" between them.
1215 **
1216 ** Given an index entry p and a query value q, this function determines the
1217 ** index entry's "distance" from the query value. This function must be
1218 ** supplied if the operator class contains any ordering operators. A query
1219 ** using the ordering operator will be implemented by returning index entries
1220 ** with the smallest "distance" values first, so the results must be consistent
1221 ** with the operator's semantics. For a leaf index entry the result just
1222 ** represents the distance to the index entry; for an internal tree node, the
1223 ** result must be the smallest distance that any child entry could have.
1224 **
1225 ** Strategy 13 is centroid-based distance tests <<->>
1226 ** Strategy 20 is cpa based distance tests |=|
1227 */
1229 Datum gserialized_gist_distance(PG_FUNCTION_ARGS)
1230 {
1231  GISTENTRY *entry = (GISTENTRY*) PG_GETARG_POINTER(0);
1232  StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
1233  char query_box_mem[GIDX_MAX_SIZE];
1234  GIDX *query_box = (GIDX*)query_box_mem;
1235  GIDX *entry_box;
1236 #if POSTGIS_PGSQL_VERSION >= 95
1237  bool *recheck = (bool *) PG_GETARG_POINTER(4);
1238 #endif
1239 
1240  double distance;
1241 
1242  POSTGIS_DEBUG(4, "[GIST] 'distance' function called");
1243 
1244  /* Strategy 13 is <<->> */
1245  /* Strategy 20 is |=| */
1246  if ( strategy != 13 && strategy != 20 ) {
1247  elog(ERROR, "unrecognized strategy number: %d", strategy);
1248  PG_RETURN_FLOAT8(FLT_MAX);
1249  }
1250 
1251  /* Null box should never make this far. */
1252  if ( gserialized_datum_get_gidx_p(PG_GETARG_DATUM(1), query_box) == LW_FAILURE )
1253  {
1254  POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!");
1255  PG_RETURN_FLOAT8(FLT_MAX);
1256  }
1257 
1258  /* Get the entry box */
1259  entry_box = (GIDX*)DatumGetPointer(entry->key);
1260 
1261 #if POSTGIS_PGSQL_VERSION >= 95
1262 
1263  /* Strategy 20 is |=| */
1264  distance = gidx_distance(entry_box, query_box, strategy == 20);
1265 
1266  /* Treat leaf node tests different from internal nodes */
1267  if (GIST_LEAF(entry))
1268  *recheck = true;
1269 #else
1270 
1271  if ( strategy == 20 )
1272  {
1273  elog(ERROR, "You need PostgreSQL 9.5.0 or higher in order to use |=| with index");
1274  PG_RETURN_FLOAT8(FLT_MAX);
1275  }
1276 
1277  /* Treat leaf node tests different from internal nodes */
1278  if (GIST_LEAF(entry))
1279  {
1280  /* Calculate distance to leaves */
1281  distance = (double)gidx_distance_leaf_centroid(entry_box, query_box);
1282  }
1283  else
1284  {
1285  /* Calculate distance for internal nodes */
1286  distance = (double)gidx_distance_node_centroid(entry_box, query_box);
1287  }
1288 #endif
1289  PG_RETURN_FLOAT8(distance);
1290 }
1291 
1292 
1293 /*
1294 ** Utility function to add entries to the axis partition lists in the
1295 ** picksplit function.
1296 */
1297 static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
1298 {
1299  if ( *pos )
1300  gidx_merge(box_union, box_current);
1301  else
1302  memcpy((void*)(*box_union), (void*)box_current, VARSIZE(box_current));
1303  list[*pos] = num;
1304  (*pos)++;
1305 }
1306 
1307 /*
1308 ** Utility function check whether the number of entries two halves of the
1309 ** space constitute a "bad ratio" (poor balance).
1310 */
1312 {
1313  POSTGIS_DEBUGF(4, "[GIST] checking split ratio (%d, %d)", x, y);
1314  if ( (y == 0) || (((float)x / (float)y) < LIMIT_RATIO) ||
1315  (x == 0) || (((float)y / (float)x) < LIMIT_RATIO) )
1316  return TRUE;
1317 
1318  return FALSE;
1319 }
1320 
1321 static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
1322 {
1323  int i;
1324  for ( i = 0; i < dims; i++ )
1325  {
1326  if ( gserialized_gist_picksplit_badratio(pos[2*i],pos[2*i+1]) == FALSE )
1327  return FALSE;
1328  }
1329  return TRUE;
1330 }
1331 
1332 
1333 /*
1334 ** Where the picksplit algorithm cannot find any basis for splitting one way
1335 ** or another, we simply split the overflowing node in half.
1336 */
1337 static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
1338 {
1339  OffsetNumber i, maxoff;
1340  GIDX *unionL = NULL;
1341  GIDX *unionR = NULL;
1342  int nbytes;
1343 
1344  POSTGIS_DEBUG(4, "[GIST] in fallback picksplit function");
1345 
1346  maxoff = entryvec->n - 1;
1347 
1348  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
1349  v->spl_left = (OffsetNumber*) palloc(nbytes);
1350  v->spl_right = (OffsetNumber*) palloc(nbytes);
1351  v->spl_nleft = v->spl_nright = 0;
1352 
1353  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1354  {
1355  GIDX *cur = (GIDX*)DatumGetPointer(entryvec->vector[i].key);
1356 
1357  if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
1358  {
1359  v->spl_left[v->spl_nleft] = i;
1360  if (unionL == NULL)
1361  {
1362  unionL = gidx_copy(cur);
1363  }
1364  else
1365  {
1366  gidx_merge(&unionL, cur);
1367  }
1368  v->spl_nleft++;
1369  }
1370  else
1371  {
1372  v->spl_right[v->spl_nright] = i;
1373  if (unionR == NULL)
1374  {
1375  unionR = gidx_copy(cur);
1376  }
1377  else
1378  {
1379  gidx_merge(&unionR, cur);
1380  }
1381  v->spl_nright++;
1382  }
1383  }
1384 
1385  if (v->spl_ldatum_exists)
1386  gidx_merge(&unionL, (GIDX*)DatumGetPointer(v->spl_ldatum));
1387 
1388  v->spl_ldatum = PointerGetDatum(unionL);
1389 
1390  if (v->spl_rdatum_exists)
1391  gidx_merge(&unionR, (GIDX*)DatumGetPointer(v->spl_rdatum));
1392 
1393  v->spl_rdatum = PointerGetDatum(unionR);
1394  v->spl_ldatum_exists = v->spl_rdatum_exists = false;
1395 }
1396 
1397 
1398 
1399 static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
1400 {
1401  bool firstToLeft = true;
1402 
1403  POSTGIS_DEBUG(4, "[GIST] picksplit in constructsplit function");
1404 
1405  if (v->spl_ldatum_exists || v->spl_rdatum_exists)
1406  {
1407  if (v->spl_ldatum_exists && v->spl_rdatum_exists)
1408  {
1409  GIDX *LRl = gidx_copy(*union1);
1410  GIDX *LRr = gidx_copy(*union2);
1411  GIDX *RLl = gidx_copy(*union2);
1412  GIDX *RLr = gidx_copy(*union1);
1413  double sizeLR, sizeRL;
1414 
1415  gidx_merge(&LRl, (GIDX*)DatumGetPointer(v->spl_ldatum));
1416  gidx_merge(&LRr, (GIDX*)DatumGetPointer(v->spl_rdatum));
1417  gidx_merge(&RLl, (GIDX*)DatumGetPointer(v->spl_ldatum));
1418  gidx_merge(&RLr, (GIDX*)DatumGetPointer(v->spl_rdatum));
1419 
1420  sizeLR = gidx_inter_volume(LRl,LRr);
1421  sizeRL = gidx_inter_volume(RLl,RLr);
1422 
1423  POSTGIS_DEBUGF(4, "[GIST] sizeLR / sizeRL == %.12g / %.12g", sizeLR, sizeRL);
1424 
1425  if (sizeLR > sizeRL)
1426  firstToLeft = false;
1427 
1428  }
1429  else
1430  {
1431  float p1, p2;
1432  GISTENTRY oldUnion, addon;
1433 
1434  gistentryinit(oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
1435  NULL, NULL, InvalidOffsetNumber, FALSE);
1436 
1437  gistentryinit(addon, PointerGetDatum(*union1), NULL, NULL, InvalidOffsetNumber, FALSE);
1438  DirectFunctionCall3(gserialized_gist_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p1));
1439  gistentryinit(addon, PointerGetDatum(*union2), NULL, NULL, InvalidOffsetNumber, FALSE);
1440  DirectFunctionCall3(gserialized_gist_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&addon), PointerGetDatum(&p2));
1441 
1442  POSTGIS_DEBUGF(4, "[GIST] p1 / p2 == %.12g / %.12g", p1, p2);
1443 
1444  if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2))
1445  firstToLeft = false;
1446  }
1447  }
1448 
1449  POSTGIS_DEBUGF(4, "[GIST] firstToLeft == %d", firstToLeft);
1450 
1451  if (firstToLeft)
1452  {
1453  v->spl_left = list1;
1454  v->spl_right = list2;
1455  v->spl_nleft = nlist1;
1456  v->spl_nright = nlist2;
1457  if (v->spl_ldatum_exists)
1458  gidx_merge(union1, (GIDX*)DatumGetPointer(v->spl_ldatum));
1459  v->spl_ldatum = PointerGetDatum(*union1);
1460  if (v->spl_rdatum_exists)
1461  gidx_merge(union2, (GIDX*)DatumGetPointer(v->spl_rdatum));
1462  v->spl_rdatum = PointerGetDatum(*union2);
1463  }
1464  else
1465  {
1466  v->spl_left = list2;
1467  v->spl_right = list1;
1468  v->spl_nleft = nlist2;
1469  v->spl_nright = nlist1;
1470  if (v->spl_ldatum_exists)
1471  gidx_merge(union2, (GIDX*)DatumGetPointer(v->spl_ldatum));
1472  v->spl_ldatum = PointerGetDatum(*union2);
1473  if (v->spl_rdatum_exists)
1474  gidx_merge(union1, (GIDX*)DatumGetPointer(v->spl_rdatum));
1475  v->spl_rdatum = PointerGetDatum(*union1);
1476  }
1477 
1478  v->spl_ldatum_exists = v->spl_rdatum_exists = false;
1479 }
1480 
1481 
1482 #define BELOW(d) (2*(d))
1483 #define ABOVE(d) ((2*(d))+1)
1484 
1485 /*
1486 ** GiST support function. Split an overflowing node into two new nodes.
1487 ** Uses linear algorithm from Ang & Tan [2], dividing node extent into
1488 ** four quadrants, and splitting along the axis that most evenly distributes
1489 ** entries between the new nodes.
1490 ** TODO: Re-evaluate this in light of R*Tree picksplit approaches.
1491 */
1493 Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS)
1494 {
1495 
1496  GistEntryVector *entryvec = (GistEntryVector*) PG_GETARG_POINTER(0);
1497 
1498  GIST_SPLITVEC *v = (GIST_SPLITVEC*) PG_GETARG_POINTER(1);
1499  OffsetNumber i;
1500  /* One union box for each half of the space. */
1501  GIDX **box_union;
1502  /* One offset number list for each half of the space. */
1503  OffsetNumber **list;
1504  /* One position index for each half of the space. */
1505  int *pos;
1506  GIDX *box_pageunion;
1507  GIDX *box_current;
1508  int direction = -1;
1509  bool all_entries_equal = true;
1510  OffsetNumber max_offset;
1511  int nbytes, ndims_pageunion, d;
1512  int posmin = entryvec->n;
1513 
1514  POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1515 
1516  /*
1517  ** First calculate the bounding box and maximum number of dimensions in this page.
1518  */
1519 
1520  max_offset = entryvec->n - 1;
1521  box_current = (GIDX*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
1522  box_pageunion = gidx_copy(box_current);
1523 
1524  /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1525  for ( i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i) )
1526  {
1527  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1528 
1529  if ( all_entries_equal == true && ! gidx_equals (box_pageunion, box_current) )
1530  all_entries_equal = false;
1531 
1532  gidx_merge( &box_pageunion, box_current );
1533  }
1534 
1535  POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1536 
1537  /* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1538  if ( all_entries_equal )
1539  {
1540  POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1542  PG_RETURN_POINTER(v);
1543  }
1544 
1545  /* Initialize memory structures. */
1546  nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1547  ndims_pageunion = GIDX_NDIMS(box_pageunion);
1548  POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1549  pos = palloc(2*ndims_pageunion * sizeof(int));
1550  list = palloc(2*ndims_pageunion * sizeof(OffsetNumber*));
1551  box_union = palloc(2*ndims_pageunion * sizeof(GIDX*));
1552  for ( d = 0; d < ndims_pageunion; d++ )
1553  {
1554  list[BELOW(d)] = (OffsetNumber*) palloc(nbytes);
1555  list[ABOVE(d)] = (OffsetNumber*) palloc(nbytes);
1556  box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1557  box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1558  pos[BELOW(d)] = 0;
1559  pos[ABOVE(d)] = 0;
1560  }
1561 
1562  /*
1563  ** Assign each entry in the node to the volume partitions it belongs to,
1564  ** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1565  ** Each entry thereby ends up in three of the six partitions.
1566  */
1567  POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1568  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1569  {
1570  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1571 
1572  for ( d = 0; d < ndims_pageunion; d++ )
1573  {
1574  if ( GIDX_GET_MIN(box_current,d)-GIDX_GET_MIN(box_pageunion,d) < GIDX_GET_MAX(box_pageunion,d)-GIDX_GET_MAX(box_current,d) )
1575  {
1576  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1577  }
1578  else
1579  {
1580  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1581  }
1582 
1583  }
1584 
1585  }
1586 
1587  /*
1588  ** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1589  ** plane we choose to split on, we're going to end up with a mostly full node. Where the
1590  ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1591  ** sides of the page union box can occasionally all end up in one place, leading
1592  ** to this condition.
1593  */
1594  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE )
1595  {
1596  /*
1597  ** Instead we split on center points and see if we do better.
1598  ** First calculate the average center point for each axis.
1599  */
1600  double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1601 
1602  for ( d = 0; d < ndims_pageunion; d++ )
1603  {
1604  avgCenter[d] = 0.0;
1605  }
1606 
1607  POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1608 
1609  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1610  {
1611  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1612  for ( d = 0; d < ndims_pageunion; d++ )
1613  {
1614  avgCenter[d] += (GIDX_GET_MAX(box_current,d) + GIDX_GET_MIN(box_current,d)) / 2.0;
1615  }
1616  }
1617  for ( d = 0; d < ndims_pageunion; d++ )
1618  {
1619  avgCenter[d] /= max_offset;
1620  pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1621  POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1622  }
1623 
1624  /* For each of our entries... */
1625  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1626  {
1627  double center;
1628  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1629 
1630  for ( d = 0; d < ndims_pageunion; d++ )
1631  {
1632  center = (GIDX_GET_MIN(box_current,d)+GIDX_GET_MAX(box_current,d))/2.0;
1633  if ( center < avgCenter[d] )
1634  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1635  else if ( FPeq(center, avgCenter[d]) )
1636  if ( pos[BELOW(d)] > pos[ABOVE(d)] )
1637  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1638  else
1639  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1640  else
1641  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1642  }
1643 
1644  }
1645 
1646  /* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1647  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE )
1648  {
1649  POSTGIS_DEBUG(4, "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1651  PG_RETURN_POINTER(v);
1652  }
1653 
1654  }
1655 
1656  /*
1657  ** Now, what splitting plane gives us the most even ratio of
1658  ** entries in our child pages? Since each split region has been apportioned entries
1659  ** against the same number of total entries, the axis that has the smallest maximum
1660  ** number of entries in its regions is the most evenly distributed.
1661  ** TODO: what if the distributions are equal in two or more axes?
1662  */
1663  for ( d = 0; d < ndims_pageunion; d++ )
1664  {
1665  int posd = Max(pos[ABOVE(d)],pos[BELOW(d)]);
1666  if ( posd < posmin )
1667  {
1668  direction = d;
1669  posmin = posd;
1670  }
1671  }
1672  if ( direction == -1 || posmin == entryvec->n )
1673  {
1674  /* ERROR OUT HERE */
1675  elog(ERROR, "Error in building split, unable to determine split direction.");
1676  }
1677 
1678  POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1679 
1681  pos[BELOW(direction)],
1682  &(box_union[BELOW(direction)]),
1683  list[ABOVE(direction)],
1684  pos[ABOVE(direction)],
1685  &(box_union[ABOVE(direction)]) );
1686 
1687  POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX*)v->spl_ldatum));
1688  POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX*)v->spl_rdatum));
1689 
1690  POSTGIS_DEBUGF(4, "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1691  direction,
1692  GIDX_GET_MIN(box_pageunion, direction), GIDX_GET_MAX(box_pageunion, direction),
1693  GIDX_GET_MIN((GIDX*)v->spl_ldatum, direction), GIDX_GET_MAX((GIDX*)v->spl_ldatum, direction),
1694  GIDX_GET_MIN((GIDX*)v->spl_rdatum, direction), GIDX_GET_MAX((GIDX*)v->spl_rdatum, direction) );
1695 
1696  PG_RETURN_POINTER(v);
1697 
1698 }
1699 
1700 /*
1701 ** The GIDX key must be defined as a PostgreSQL type, even though it is only
1702 ** ever used internally. These no-op stubs are used to bind the type.
1703 */
1705 Datum gidx_in(PG_FUNCTION_ARGS)
1706 {
1707  ereport(ERROR,(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1708  errmsg("function gidx_in not implemented")));
1709  PG_RETURN_POINTER(NULL);
1710 }
1711 
1713 Datum gidx_out(PG_FUNCTION_ARGS)
1714 {
1715  GIDX *box = (GIDX *) PG_GETARG_POINTER(0);
1716  char *result = gidx_to_string(box);
1717  PG_RETURN_CSTRING(result);
1718 }
#define LINETYPE
Definition: liblwgeom.h:71
static void gidx_merge(GIDX **b_union, GIDX *b_new)
static GIDX * gidx_copy(GIDX *b)
static double gidx_distance_node_centroid(const GIDX *node, const GIDX *query)
Datum gserialized_within(PG_FUNCTION_ARGS)
double m
Definition: liblwgeom.h:336
Datum gserialized_gist_decompress(PG_FUNCTION_ARGS)
static bool gidx_is_unknown(const GIDX *a)
Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS)
LWGEOM * lwgeom_from_gserialized(const GSERIALIZED *g)
Allocate a new LWGEOM from a GSERIALIZED.
#define WGS84_RADIUS
Definition: liblwgeom.h:116
PG_FUNCTION_INFO_V1(gserialized_distance_nd)
uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition: lwgeom.c:829
static float gidx_volume(GIDX *a)
Datum gserialized_contains(PG_FUNCTION_ARGS)
void lwpoint_free(LWPOINT *pt)
Definition: lwpoint.c:182
void lwgeom_free(LWGEOM *geom)
Definition: lwgeom.c:1050
static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
Datum gserialized_gist_penalty(PG_FUNCTION_ARGS)
#define LW_SUCCESS
Definition: liblwgeom.h:65
static void gidx_set_unknown(GIDX *a)
static float gidx_union_volume(GIDX *a, GIDX *b)
LWGEOM * lwgeom_closest_line_3d(const LWGEOM *lw1, const LWGEOM *lw2)
Definition: measures3d.c:64
static int gserialized_datum_predicate(Datum gs1, Datum gs2, gidx_predicate predicate)
Support function.
double lwgeom_length_2d(const LWGEOM *geom)
Definition: lwgeom.c:1668
Datum gserialized_gist_union(PG_FUNCTION_ARGS)
int lwgeom_has_z(const LWGEOM *geom)
Return LW_TRUE if geometry has Z ordinates.
Definition: lwgeom.c:836
static bool gserialized_gist_consistent_leaf(GIDX *key, GIDX *query, StrategyNumber strategy)
double lwgeom_interpolate_point(const LWGEOM *lwin, const LWPOINT *lwpt)
Find the measure value at the location on the line closest to the point.
#define LW_FAILURE
Definition: liblwgeom.h:64
static void gidx_dimensionality_check(GIDX **a, GIDX **b)
static bool gidx_equals(GIDX *a, GIDX *b)
GSERIALIZED * gserialized_expand(GSERIALIZED *g, double distance)
Return a GSERIALIZED with an expanded bounding box.
#define LIMIT_RATIO
static void gidx_validate(GIDX *b)
Datum gserialized_gist_distance(PG_FUNCTION_ARGS)
Datum gserialized_gist_compress(PG_FUNCTION_ARGS)
Datum gserialized_distance_nd(PG_FUNCTION_ARGS)
#define LW_FALSE
Definition: liblwgeom.h:62
#define BELOW(d)
static double gidx_distance(const GIDX *a, const GIDX *b, int m_is_time)
Calculate the box->box distance.
Datum gidx_in(PG_FUNCTION_ARGS)
#define LW_TRUE
Return types for functions with status returns.
Definition: liblwgeom.h:61
static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
Datum gserialized_gist_same(PG_FUNCTION_ARGS)
static int gserialized_gist_picksplit_badratio(int x, int y)
static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static double gidx_distance_leaf_centroid(const GIDX *a, const GIDX *b)
Calculate the centroid->centroid distance between the boxes.
int lwpoint_getPoint4d_p(const LWPOINT *point, POINT4D *out)
Definition: lwpoint.c:44
static float gidx_inter_volume(GIDX *a, GIDX *b)
tuple x
Definition: pixval.py:53
Datum distance(PG_FUNCTION_ARGS)
LWLINE * lwgeom_as_lwline(const LWGEOM *lwgeom)
Definition: lwgeom.c:89
bool(* gidx_predicate)(GIDX *a, GIDX *b)
static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS)
static bool gidx_overlaps(GIDX *a, GIDX *b)
#define FALSE
Definition: dbfopen.c:168
Datum gserialized_overlaps(PG_FUNCTION_ARGS)
static bool gidx_contains(GIDX *a, GIDX *b)
double lwgeom_length(const LWGEOM *geom)
Definition: lwgeom.c:1646
#define POINTTYPE
LWTYPE numbers, used internally by PostGIS.
Definition: liblwgeom.h:70
#define FPeq(A, B)
Definition: box2d.c:11
LWPOINT * lwline_get_lwpoint(const LWLINE *line, int where)
Returns freshly allocated LWPOINT that corresponds to the index where.
Definition: lwline.c:295
LWGEOM * lwgeom_closest_line(const LWGEOM *lw1, const LWGEOM *lw2)
Definition: measures.c:28
Datum gserialized_gist_consistent(PG_FUNCTION_ARGS)
int lwgeom_has_m(const LWGEOM *geom)
Return LW_TRUE if geometry has M ordinates.
Definition: lwgeom.c:843
#define ABOVE(d)
#define TRUE
Definition: dbfopen.c:169
static bool gserialized_gist_consistent_internal(GIDX *key, GIDX *query, StrategyNumber strategy)
Datum gidx_out(PG_FUNCTION_ARGS)
tuple y
Definition: pixval.py:54
This library is the generic geometry handling section of PostGIS.