PostGIS  2.5.0dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
80 #endif
81 #include "utils/tqual.h"
82 #include "utils/builtins.h"
83 #include "utils/datum.h"
84 #include "utils/snapmgr.h"
85 #include "utils/fmgroids.h"
86 #include "funcapi.h"
87 #include "access/heapam.h"
88 #include "catalog/pg_type.h"
89 #include "access/relscan.h"
90 
91 #include "executor/spi.h"
92 #include "fmgr.h"
93 #include "commands/vacuum.h"
94 #include "nodes/relation.h"
95 #include "parser/parsetree.h"
96 #include "utils/array.h"
97 #include "utils/lsyscache.h"
98 #include "utils/builtins.h"
99 #include "utils/syscache.h"
100 #include "utils/rel.h"
101 #include "utils/selfuncs.h"
102 
103 #include "../postgis_config.h"
104 
105 #if POSTGIS_PGSQL_VERSION >= 93
106  #include "access/htup_details.h"
107 #endif
108 
109 #include "stringbuffer.h"
110 #include "liblwgeom.h"
111 #include "lwgeom_pg.h" /* For debugging macros. */
112 #include "gserialized_gist.h" /* For index common functions */
113 
114 #include <math.h>
115 #if HAVE_IEEEFP_H
116 #include <ieeefp.h>
117 #endif
118 #include <float.h>
119 #include <string.h>
120 #include <stdio.h>
121 #include <errno.h>
122 #include <ctype.h>
123 
124 
125 /************************************************************************/
126 
127 
128 /* Fall back to older finite() if necessary */
129 #ifndef HAVE_ISFINITE
130 # ifdef HAVE_GNU_ISFINITE
131 # define _GNU_SOURCE
132 # else
133 # define isfinite finite
134 # endif
135 #endif
136 
137 
138 /* Prototypes */
139 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
140 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
141 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
142 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
143 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
144 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
145 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
146 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
147 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
148 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
149 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
150 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
151 
152 /* Local prototypes */
153 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
154 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161 * Assign a number to the n-dimensional statistics kind
162 *
163 * tgl suggested:
164 *
165 * 1-100: reserved for assignment by the core Postgres project
166 * 100-199: reserved for assignment by PostGIS
167 * 200-9999: reserved for other globally-known stats kinds
168 * 10000-32767: reserved for private site-local use
169 */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 #define STATISTIC_SLOT_ND 0
173 #define STATISTIC_SLOT_2D 1
174 
175 /*
176 * To look-up the spatial index associated with a table we
177 * need to find GIST indexes using our spatial keys.
178 */
179 #define INDEX_KEY_ND "gidx"
180 #define INDEX_KEY_2D "box2df"
181 
182 /*
183 * The SD factor restricts the side of the statistics histogram
184 * based on the standard deviation of the extent of the data.
185 * SDFACTOR is the number of standard deviations from the mean
186 * the histogram will extend.
187 */
188 #define SDFACTOR 3.25
189 
195 #define ND_DIMS 4
196 
203 #define MIN_DIMENSION_WIDTH 0.000000001
204 
208 #define DEFAULT_ND_SEL 0.0001
209 #define DEFAULT_ND_JOINSEL 0.001
210 
214 #define FALLBACK_ND_SEL 0.2
215 #define FALLBACK_ND_JOINSEL 0.3
216 
222 typedef struct ND_BOX_T
223 {
224  float4 min[ND_DIMS];
225  float4 max[ND_DIMS];
226 } ND_BOX;
227 
231 typedef struct ND_IBOX_T
232 {
233  int min[ND_DIMS];
234  int max[ND_DIMS];
235 } ND_IBOX;
236 
237 
244 typedef struct ND_STATS_T
245 {
246  /* Dimensionality of the histogram. */
247  float4 ndims;
248 
249  /* Size of n-d histogram in each dimension. */
250  float4 size[ND_DIMS];
251 
252  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
254 
255  /* How many rows in the table itself? */
257 
258  /* How many rows were in the sample that built this histogram? */
260 
261  /* How many not-Null/Empty features were in the sample? */
263 
264  /* How many features actually got sampled in the histogram? */
266 
267  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
269 
270  /* How many cells did those histogram features cover? */
271  /* Since we are pro-rating coverage, this number should */
272  /* now always equal histogram_features */
274 
275  /* Variable length # of floats for histogram */
276  float4 value[1];
277 } ND_STATS;
278 
279 
280 
281 
288 static int
289 gbox_ndims(const GBOX* gbox)
290 {
291  int dims = 2;
292  if ( FLAGS_GET_GEODETIC(gbox->flags) )
293  return 3;
294  if ( FLAGS_GET_Z(gbox->flags) )
295  dims++;
296  if ( FLAGS_GET_M(gbox->flags) )
297  dims++;
298  return dims;
299 }
300 
306 static int
307 text_p_get_mode(const text *txt)
308 {
309  int mode = 2;
310  if (VARSIZE(txt) - VARHDRSZ <= 0)
311  return mode;
312  char *modestr = (char*)VARDATA(txt);
313  if ( modestr[0] == 'N' )
314  mode = 0;
315  return mode;
316 }
317 
318 
322 static int
323 cmp_int (const void *a, const void *b)
324 {
325  int ia = *((const int*)a);
326  int ib = *((const int*)b);
327 
328  if ( ia == ib )
329  return 0;
330  else if ( ia > ib )
331  return 1;
332  else
333  return -1;
334 }
335 
340 static int
341 range_quintile(int *vals, int nvals)
342 {
343  qsort(vals, nvals, sizeof(int), cmp_int);
344  return vals[4*nvals/5] - vals[nvals/5];
345 }
346 
350 static double
351 total_double(const double *vals, int nvals)
352 {
353  int i;
354  float total = 0;
355  /* Calculate total */
356  for ( i = 0; i < nvals; i++ )
357  total += vals[i];
358 
359  return total;
360 }
361 
362 #if POSTGIS_DEBUG_LEVEL >= 3
363 
367 static int
368 total_int(const int *vals, int nvals)
369 {
370  int i;
371  int total = 0;
372  /* Calculate total */
373  for ( i = 0; i < nvals; i++ )
374  total += vals[i];
375 
376  return total;
377 }
378 
382 static double
383 avg(const int *vals, int nvals)
384 {
385  int t = total_int(vals, nvals);
386  return (double)t / (double)nvals;
387 }
388 
392 static double
393 stddev(const int *vals, int nvals)
394 {
395  int i;
396  double sigma2 = 0;
397  double mean = avg(vals, nvals);
398 
399  /* Calculate sigma2 */
400  for ( i = 0; i < nvals; i++ )
401  {
402  double v = (double)(vals[i]);
403  sigma2 += (mean - v) * (mean - v);
404  }
405  return sqrt(sigma2 / nvals);
406 }
407 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
408 
413 static int
414 nd_stats_value_index(const ND_STATS *stats, int *indexes)
415 {
416  int d;
417  int accum = 1, vdx = 0;
418 
419  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
420  /* n-d histogram coordinate implies. */
421  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
422  for ( d = 0; d < (int)(stats->ndims); d++ )
423  {
424  int size = (int)(stats->size[d]);
425  if ( indexes[d] < 0 || indexes[d] >= size )
426  {
427  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
428  return -1;
429  }
430  vdx += indexes[d] * accum;
431  accum *= size;
432  }
433  return vdx;
434 }
435 
439 static char*
440 nd_box_to_json(const ND_BOX *nd_box, int ndims)
441 {
442  char *rv;
443  int i;
445 
446  stringbuffer_append(sb, "{\"min\":[");
447  for ( i = 0; i < ndims; i++ )
448  {
449  if ( i ) stringbuffer_append(sb, ",");
450  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
451  }
452  stringbuffer_append(sb, "],\"max\":[");
453  for ( i = 0; i < ndims; i++ )
454  {
455  if ( i ) stringbuffer_append(sb, ",");
456  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
457  }
458  stringbuffer_append(sb, "]}");
459 
462  return rv;
463 }
464 
465 
470 static char*
471 nd_stats_to_json(const ND_STATS *nd_stats)
472 {
473  char *json_extent, *str;
474  int d;
476  int ndims = (int)roundf(nd_stats->ndims);
477 
478  stringbuffer_append(sb, "{");
479  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
480 
481  /* Size */
482  stringbuffer_append(sb, "\"size\":[");
483  for ( d = 0; d < ndims; d++ )
484  {
485  if ( d ) stringbuffer_append(sb, ",");
486  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
487  }
488  stringbuffer_append(sb, "],");
489 
490  /* Extent */
491  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
492  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
493  pfree(json_extent);
494 
495  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
496  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
497  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
498  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
499  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
500  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
501  stringbuffer_append(sb, "}");
502 
503  str = stringbuffer_getstringcopy(sb);
505  return str;
506 }
507 
508 
514 // static char*
515 // nd_stats_to_grid(const ND_STATS *stats)
516 // {
517 // char *rv;
518 // int j, k;
519 // int sizex = (int)roundf(stats->size[0]);
520 // int sizey = (int)roundf(stats->size[1]);
521 // stringbuffer_t *sb = stringbuffer_create();
522 //
523 // for ( k = 0; k < sizey; k++ )
524 // {
525 // for ( j = 0; j < sizex; j++ )
526 // {
527 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
528 // }
529 // stringbuffer_append(sb, "\n");
530 // }
531 //
532 // rv = stringbuffer_getstringcopy(sb);
533 // stringbuffer_destroy(sb);
534 // return rv;
535 // }
536 
537 
539 static int
540 nd_box_merge(const ND_BOX *source, ND_BOX *target)
541 {
542  int d;
543  for ( d = 0; d < ND_DIMS; d++ )
544  {
545  target->min[d] = Min(target->min[d], source->min[d]);
546  target->max[d] = Max(target->max[d], source->max[d]);
547  }
548  return true;
549 }
550 
552 static int
554 {
555  memset(a, 0, sizeof(ND_BOX));
556  return true;
557 }
558 
564 static int
566 {
567  int d;
568  for ( d = 0; d < ND_DIMS; d++ )
569  {
570  a->min[d] = FLT_MAX;
571  a->max[d] = -1 * FLT_MAX;
572  }
573  return true;
574 }
575 
577 static void
578 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
579 {
580  int d = 0;
581  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
582 
583  nd_box_init(nd_box);
584  nd_box->min[d] = gbox->xmin;
585  nd_box->max[d] = gbox->xmax;
586  d++;
587  nd_box->min[d] = gbox->ymin;
588  nd_box->max[d] = gbox->ymax;
589  d++;
590  if ( FLAGS_GET_GEODETIC(gbox->flags) )
591  {
592  nd_box->min[d] = gbox->zmin;
593  nd_box->max[d] = gbox->zmax;
594  return;
595  }
596  if ( FLAGS_GET_Z(gbox->flags) )
597  {
598  nd_box->min[d] = gbox->zmin;
599  nd_box->max[d] = gbox->zmax;
600  d++;
601  }
602  if ( FLAGS_GET_M(gbox->flags) )
603  {
604  nd_box->min[d] = gbox->mmin;
605  nd_box->max[d] = gbox->mmax;
606  d++;
607  }
608  return;
609 }
610 
614 static int
615 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
616 {
617  int d;
618  for ( d = 0; d < ndims; d++ )
619  {
620  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
621  return false;
622  }
623  return true;
624 }
625 
629 static int
630 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
631 {
632  int d;
633  for ( d = 0; d < ndims; d++ )
634  {
635  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
636  return false;
637  }
638  return true;
639 }
640 
645 static int
646 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
647 {
648  int d;
649  double size;
650  for ( d = 0; d < ND_DIMS; d++ )
651  {
652  size = nd_box->max[d] - nd_box->min[d];
653  if ( size <= 0 ) continue;
654  nd_box->min[d] -= size * expansion_factor / 2;
655  nd_box->max[d] += size * expansion_factor / 2;
656  }
657  return true;
658 }
659 
664 static inline int
665 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
666 {
667  int d;
668 
669  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
670 
671  /* Initialize ibox */
672  memset(nd_ibox, 0, sizeof(ND_IBOX));
673 
674  /* In each dimension... */
675  for ( d = 0; d < nd_stats->ndims; d++ )
676  {
677  double smin = nd_stats->extent.min[d];
678  double smax = nd_stats->extent.max[d];
679  double width = smax - smin;
680  int size = roundf(nd_stats->size[d]);
681 
682  /* ... find cells the box overlaps with in this dimension */
683  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
684  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
685 
686  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
687  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
688 
689  /* Push any out-of range values into range */
690  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
691  nd_ibox->max[d] = Min(nd_ibox->max[d], size-1);
692  }
693  return true;
694 }
695 
699 static inline double
700 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
701 {
702  int d;
703  bool covered = true;
704  double ivol = 1.0;
705  double vol2 = 1.0;
706  double vol1 = 1.0;
707 
708  for ( d = 0 ; d < ndims; d++ )
709  {
710  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
711  return 0.0; /* Disjoint */
712 
713  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
714  covered = false;
715  }
716 
717  if ( covered )
718  return 1.0;
719 
720  for ( d = 0; d < ndims; d++ )
721  {
722  double width1 = b1->max[d] - b1->min[d];
723  double width2 = b2->max[d] - b2->min[d];
724  double imin, imax, iwidth;
725 
726  vol1 *= width1;
727  vol2 *= width2;
728 
729  imin = Max(b1->min[d], b2->min[d]);
730  imax = Min(b1->max[d], b2->max[d]);
731  iwidth = imax - imin;
732  iwidth = Max(0.0, iwidth);
733 
734  ivol *= iwidth;
735  }
736 
737  if ( vol2 == 0.0 )
738  return vol2;
739 
740  return ivol / vol2;
741 }
742 
743 
759 static int
760 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
761 {
762  /* How many bins shall we use in figuring out the distribution? */
763  static int num_bins = 50;
764  int d, i, k, range;
765  int counts[num_bins];
766  double smin, smax; /* Spatial min, spatial max */
767  double swidth; /* Spatial width of dimension */
768 #if POSTGIS_DEBUG_LEVEL >= 3
769  double average, sdev, sdev_ratio;
770 #endif
771  int bmin, bmax; /* Bin min, bin max */
772  const ND_BOX *ndb;
773 
774  /* For each dimension... */
775  for ( d = 0; d < ndims; d++ )
776  {
777  /* Initialize counts for this dimension */
778  memset(counts, 0, sizeof(int)*num_bins);
779 
780  smin = extent->min[d];
781  smax = extent->max[d];
782  swidth = smax - smin;
783 
784  /* Don't try and calculate distribution of overly narrow dimensions */
785  if ( swidth < MIN_DIMENSION_WIDTH )
786  {
787  distribution[d] = 0;
788  continue;
789  }
790 
791  /* Sum up the overlaps of each feature with the dimensional bins */
792  for ( i = 0; i < num_boxes; i++ )
793  {
794  double minoffset, maxoffset;
795 
796  /* Skip null entries */
797  ndb = nd_boxes[i];
798  if ( ! ndb ) continue;
799 
800  /* Where does box fall relative to the working range */
801  minoffset = ndb->min[d] - smin;
802  maxoffset = ndb->max[d] - smin;
803 
804  /* Skip boxes that our outside our working range */
805  if ( minoffset < 0 || minoffset > swidth ||
806  maxoffset < 0 || maxoffset > swidth )
807  {
808  continue;
809  }
810 
811  /* What bins does this range correspond to? */
812  bmin = num_bins * (minoffset) / swidth;
813  bmax = num_bins * (maxoffset) / swidth;
814 
815  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
816 
817  /* Increment the counts in all the bins this feature overlaps */
818  for ( k = bmin; k <= bmax; k++ )
819  {
820  counts[k] += 1;
821  }
822 
823  }
824 
825  /* How dispersed is the distribution of features across bins? */
826  range = range_quintile(counts, num_bins);
827 
828 #if POSTGIS_DEBUG_LEVEL >= 3
829  average = avg(counts, num_bins);
830  sdev = stddev(counts, num_bins);
831  sdev_ratio = sdev/average;
832 
833  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
834  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
835  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
836  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
837 #endif
838 
839  distribution[d] = range;
840  }
841 
842  return true;
843 }
844 
850 static inline int
851 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
852 {
853  int d = 0;
854 
855  while ( d < ndims )
856  {
857  if ( counter[d] < ibox->max[d] )
858  {
859  counter[d] += 1;
860  break;
861  }
862  counter[d] = ibox->min[d];
863  d++;
864  }
865  /* That's it, cannot increment any more! */
866  if ( d == ndims )
867  return false;
868 
869  /* Increment complete! */
870  return true;
871 }
872 
873 static ND_STATS*
874 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
875 {
876  int stats_kind = STATISTIC_KIND_ND;
877  int rv;
878  ND_STATS *nd_stats;
879 
880  /* If we're in 2D mode, set the kind appropriately */
881  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
882 
883  /* Then read the geom status histogram from that */
884 
885 #if POSTGIS_PGSQL_VERSION < 100
886  float4 *floatptr;
887  int nvalues;
888 
889  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
890  NULL, NULL, NULL, &floatptr, &nvalues);
891 
892  if ( ! rv ) {
893  POSTGIS_DEBUGF(2,
894  "no slot of kind %d in stats tuple", stats_kind);
895  return NULL;
896  }
897 
898  /* Clone the stats here so we can release the attstatsslot immediately */
899  nd_stats = palloc(sizeof(float) * nvalues);
900  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
901 
902  /* Clean up */
903  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
904 #else /* PostgreSQL 10 or higher */
905  AttStatsSlot sslot;
906  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
907  ATTSTATSSLOT_NUMBERS);
908  if ( ! rv ) {
909  POSTGIS_DEBUGF(2,
910  "no slot of kind %d in stats tuple", stats_kind);
911  return NULL;
912  }
913 
914  /* Clone the stats here so we can release the attstatsslot immediately */
915  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
916  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
917 
918  free_attstatsslot(&sslot);
919 #endif
920 
921  return nd_stats;
922 }
923 
928 static ND_STATS*
929 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
930 {
931  HeapTuple stats_tuple = NULL;
932  ND_STATS *nd_stats;
933 
934  /* First pull the stats tuple for the whole tree */
935  if ( ! only_parent )
936  {
937  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
938  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
939  if ( stats_tuple )
940  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
941  }
942  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
943  if ( only_parent || ! stats_tuple )
944  {
945  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
946  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
947  if ( stats_tuple )
948  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
949  }
950  if ( ! stats_tuple )
951  {
952  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
953  return NULL;
954  }
955 
956  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
957  ReleaseSysCache(stats_tuple);
958  if ( ! nd_stats )
959  {
960  POSTGIS_DEBUGF(2,
961  "histogram for attribute %d of table \"%s\" does not exist?",
962  att_num, get_rel_name(table_oid));
963  }
964 
965  return nd_stats;
966 }
967 
976 static ND_STATS*
977 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
978 {
979  const char *att_name = text2cstring(att_text);
980  AttrNumber att_num;
981 
982  /* We know the name? Look up the num */
983  if ( att_text )
984  {
985  /* Get the attribute number */
986  att_num = get_attnum(table_oid, att_name);
987  if ( ! att_num ) {
988  elog(ERROR, "attribute \"%s\" does not exist", att_name);
989  return NULL;
990  }
991  }
992  else
993  {
994  elog(ERROR, "attribute name is null");
995  return NULL;
996  }
997 
998  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
999 }
1000 
1014 static float8
1016 {
1017  int ncells1, ncells2;
1018  int ndims1, ndims2, ndims;
1019  double ntuples_max;
1020  double ntuples_not_null1, ntuples_not_null2;
1021 
1022  ND_BOX extent1, extent2;
1023  ND_IBOX ibox1, ibox2;
1024  int at1[ND_DIMS];
1025  int at2[ND_DIMS];
1026  double min1[ND_DIMS];
1027  double width1[ND_DIMS];
1028  double cellsize1[ND_DIMS];
1029  int size2[ND_DIMS];
1030  double min2[ND_DIMS];
1031  double width2[ND_DIMS];
1032  double cellsize2[ND_DIMS];
1033  int size1[ND_DIMS];
1034  int d;
1035  double val = 0;
1036  float8 selectivity;
1037 
1038  /* Drop out on null inputs */
1039  if ( ! ( s1 && s2 ) )
1040  {
1041  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1042  return FALLBACK_ND_SEL;
1043  }
1044 
1045  /* We need to know how many cells each side has... */
1046  ncells1 = (int)roundf(s1->histogram_cells);
1047  ncells2 = (int)roundf(s2->histogram_cells);
1048 
1049  /* ...so that we can drive the summation loop with the smaller histogram. */
1050  if ( ncells1 > ncells2 )
1051  {
1052  const ND_STATS *stats_tmp = s1;
1053  s1 = s2;
1054  s2 = stats_tmp;
1055  }
1056 
1057  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1058  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1059 
1060  /* Re-read that info after the swap */
1061  ncells1 = (int)roundf(s1->histogram_cells);
1062  ncells2 = (int)roundf(s2->histogram_cells);
1063 
1064  /* Q: What's the largest possible join size these relations can create? */
1065  /* A: The product of the # of non-null rows in each relation. */
1066  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1067  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1068  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1069 
1070  /* Get the ndims as ints */
1071  ndims1 = (int)roundf(s1->ndims);
1072  ndims2 = (int)roundf(s2->ndims);
1073  ndims = Max(ndims1, ndims2);
1074 
1075  /* Get the extents */
1076  extent1 = s1->extent;
1077  extent2 = s2->extent;
1078 
1079  /* If relation stats do not intersect, join is very very selective. */
1080  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1081  {
1082  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1083  PG_RETURN_FLOAT8(0.0);
1084  }
1085 
1086  /*
1087  * First find the index range of the part of the smaller
1088  * histogram that overlaps the larger one.
1089  */
1090  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1091  {
1092  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1093  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1094  }
1095 
1096  /* Initialize counters / constants on s1 */
1097  for ( d = 0; d < ndims1; d++ )
1098  {
1099  at1[d] = ibox1.min[d];
1100  min1[d] = s1->extent.min[d];
1101  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1102  size1[d] = (int)roundf(s1->size[d]);
1103  cellsize1[d] = width1[d] / size1[d];
1104  }
1105 
1106  /* Initialize counters / constants on s2 */
1107  for ( d = 0; d < ndims2; d++ )
1108  {
1109  min2[d] = s2->extent.min[d];
1110  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1111  size2[d] = (int)roundf(s2->size[d]);
1112  cellsize2[d] = width2[d] / size2[d];
1113  }
1114 
1115  /* For each affected cell of s1... */
1116  do
1117  {
1118  double val1;
1119  /* Construct the bounds of this cell */
1120  ND_BOX nd_cell1;
1121  nd_box_init(&nd_cell1);
1122  for ( d = 0; d < ndims1; d++ )
1123  {
1124  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1125  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1126  }
1127 
1128  /* Find the cells of s2 that cell1 overlaps.. */
1129  nd_box_overlap(s2, &nd_cell1, &ibox2);
1130 
1131  /* Initialize counter */
1132  for ( d = 0; d < ndims2; d++ )
1133  {
1134  at2[d] = ibox2.min[d];
1135  }
1136 
1137  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1138 
1139  /* Get the value at this cell */
1140  val1 = s1->value[nd_stats_value_index(s1, at1)];
1141 
1142  /* For each overlapped cell of s2... */
1143  do
1144  {
1145  double ratio2;
1146  double val2;
1147 
1148  /* Construct the bounds of this cell */
1149  ND_BOX nd_cell2;
1150  nd_box_init(&nd_cell2);
1151  for ( d = 0; d < ndims2; d++ )
1152  {
1153  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1154  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1155  }
1156 
1157  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1158 
1159  /* Calculate overlap ratio of the cells */
1160  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1161 
1162  /* Multiply the cell counts, scaled by overlap ratio */
1163  val2 = s2->value[nd_stats_value_index(s2, at2)];
1164  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1165  val += val1 * (val2 * ratio2);
1166  }
1167  while ( nd_increment(&ibox2, ndims2, at2) );
1168 
1169  }
1170  while( nd_increment(&ibox1, ndims1, at1) );
1171 
1172  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1173 
1174  /*
1175  * In order to compare our total cell count "val" to the
1176  * ntuples_max, we need to scale val up to reflect a full
1177  * table estimate. So, multiply by ratio of table size to
1178  * sample size.
1179  */
1180  val *= (s1->table_features / s1->sample_features);
1181  val *= (s2->table_features / s2->sample_features);
1182 
1183  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1184 
1185  /*
1186  * Because the cell counts are over-determined due to
1187  * double counting of features that overlap multiple cells
1188  * (see the compute_gserialized_stats routine)
1189  * we also have to scale our cell count "val" *down*
1190  * to adjust for the double counting.
1191  */
1192 // val /= (s1->cells_covered / s1->histogram_features);
1193 // val /= (s2->cells_covered / s2->histogram_features);
1194 
1195  /*
1196  * Finally, the selectivity is the estimated number of
1197  * rows to be returned divided by the maximum possible
1198  * number of rows that can be returned.
1199  */
1200  selectivity = val / ntuples_max;
1201 
1202  /* Guard against over-estimates and crazy numbers :) */
1203  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1204  {
1205  selectivity = DEFAULT_ND_JOINSEL;
1206  }
1207  else if ( selectivity > 1.0 )
1208  {
1209  selectivity = 1.0;
1210  }
1211 
1212  return selectivity;
1213 }
1214 
1220 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1221 {
1222  PG_RETURN_DATUM(DirectFunctionCall5(
1224  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1225  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1226  Int32GetDatum(0) /* ND mode */
1227  ));
1228 }
1229 
1235 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1236 {
1237  PG_RETURN_DATUM(DirectFunctionCall5(
1239  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1240  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1241  Int32GetDatum(2) /* 2D mode */
1242  ));
1243 }
1244 
1254 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1255 {
1256  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1257  /* Oid operator = PG_GETARG_OID(1); */
1258  List *args = (List *) PG_GETARG_POINTER(2);
1259  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1260  int mode = PG_GETARG_INT32(4);
1261 
1262  Node *arg1, *arg2;
1263  Var *var1, *var2;
1264  Oid relid1, relid2;
1265 
1266  ND_STATS *stats1, *stats2;
1267  float8 selectivity;
1268 
1269  /* Only respond to an inner join/unknown context join */
1270  if (jointype != JOIN_INNER)
1271  {
1272  elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
1273  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1274  }
1275 
1276  /* Find Oids of the geometry columns we are working with */
1277  arg1 = (Node*) linitial(args);
1278  arg2 = (Node*) lsecond(args);
1279  var1 = (Var*) arg1;
1280  var2 = (Var*) arg2;
1281 
1282  /* We only do column joins right now, no functional joins */
1283  /* TODO: handle g1 && ST_Expand(g2) */
1284  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1285  {
1286  elog(DEBUG1, "%s called with arguments that are not column references", __func__);
1287  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1288  }
1289 
1290  /* What are the Oids of our tables/relations? */
1291  relid1 = getrelid(var1->varno, root->parse->rtable);
1292  relid2 = getrelid(var2->varno, root->parse->rtable);
1293 
1294  POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1295  get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1296 
1297  /* Pull the stats from the stats system. */
1298  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1299  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1300 
1301  /* If we can't get stats, we have to stop here! */
1302  if ( ! stats1 )
1303  {
1304  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
1305  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1306  }
1307  else if ( ! stats2 )
1308  {
1309  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1310  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1311  }
1312 
1313  selectivity = estimate_join_selectivity(stats1, stats2);
1314  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1315 
1316  pfree(stats1);
1317  pfree(stats2);
1318  PG_RETURN_FLOAT8(selectivity);
1319 }
1320 
1321 
1322 
1323 
1342 static void
1343 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1344  int sample_rows, double total_rows, int mode)
1345 {
1346  MemoryContext old_context;
1347  int d, i; /* Counters */
1348  int notnull_cnt = 0; /* # not null rows in the sample */
1349  int null_cnt = 0; /* # null rows in the sample */
1350  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1351 
1352  ND_STATS *nd_stats; /* Our histogram */
1353  size_t nd_stats_size; /* Size to allocate */
1354 
1355  double total_width = 0; /* # of bytes used by sample */
1356  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1357  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1358 
1359  ND_BOX sum; /* Sum of extents of sample boxes */
1360  ND_BOX avg; /* Avg of extents of sample boxes */
1361  ND_BOX stddev; /* StdDev of extents of sample boxes */
1362 
1363  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1364  ND_BOX sample_extent; /* Extent of the raw sample */
1365  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1366  ND_BOX histo_extent; /* Spatial extent of the histogram */
1367  ND_BOX histo_extent_new; /* Temporary variable */
1368  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1369  int histo_cells; /* Number of cells in the histogram */
1370  int histo_cells_new = 1; /* Temporary variable */
1371 
1372  int ndims = 2; /* Dimensionality of the sample */
1373  int histo_ndims = 0; /* Dimensionality of the histogram */
1374  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1375  double total_distribution; /* Total of sample_distribution */
1376 
1377  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1378  int stats_kind; /* And this is what? (2D vs ND) */
1379 
1380  /* Initialize sum and stddev */
1381  nd_box_init(&sum);
1382  nd_box_init(&stddev);
1383 
1384  /*
1385  * This is where gserialized_analyze_nd
1386  * should put its' custom parameters.
1387  */
1388  /* void *mystats = stats->extra_data; */
1389 
1390  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1391  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1392  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1393 
1394  /*
1395  * We might need less space, but don't think
1396  * its worth saving...
1397  */
1398  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1399 
1400  /*
1401  * First scan:
1402  * o read boxes
1403  * o find dimensionality of the sample
1404  * o find extent of the sample
1405  * o count null-infinite/not-null values
1406  * o compute total_width
1407  * o compute total features's box area (for avgFeatureArea)
1408  * o sum features box coordinates (for standard deviation)
1409  */
1410  for ( i = 0; i < sample_rows; i++ )
1411  {
1412  Datum datum;
1413  GSERIALIZED *geom;
1414  GBOX gbox;
1415  ND_BOX *nd_box;
1416  bool is_null;
1417  bool is_copy;
1418 
1419  datum = fetchfunc(stats, i, &is_null);
1420 
1421  /* Skip all NULLs. */
1422  if ( is_null )
1423  {
1424  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1425  null_cnt++;
1426  continue;
1427  }
1428 
1429  /* Read the bounds from the gserialized. */
1430  geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1431  is_copy = VARATT_IS_EXTENDED(datum);
1432  if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1433  {
1434  /* Skip empties too. */
1435  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1436  continue;
1437  }
1438 
1439  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1440  if ( mode == 2 )
1441  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1442 
1443  /* Check bounds for validity (finite and not NaN) */
1444  if ( ! gbox_is_valid(&gbox) )
1445  {
1446  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1447  continue;
1448  }
1449 
1450  /*
1451  * In N-D mode, set the ndims to the maximum dimensionality found
1452  * in the sample. Otherwise, leave at ndims == 2.
1453  */
1454  if ( mode != 2 )
1455  ndims = Max(gbox_ndims(&gbox), ndims);
1456 
1457  /* Convert gbox to n-d box */
1458  nd_box = palloc(sizeof(ND_BOX));
1459  nd_box_from_gbox(&gbox, nd_box);
1460 
1461  /* Cache n-d bounding box */
1462  sample_boxes[notnull_cnt] = nd_box;
1463 
1464  /* Initialize sample extent before merging first entry */
1465  if ( ! notnull_cnt )
1466  nd_box_init_bounds(&sample_extent);
1467 
1468  /* Add current sample to overall sample extent */
1469  nd_box_merge(nd_box, &sample_extent);
1470 
1471  /* How many bytes does this sample use? */
1472  total_width += VARSIZE(geom);
1473 
1474  /* Add bounds coordinates to sums for stddev calculation */
1475  for ( d = 0; d < ndims; d++ )
1476  {
1477  sum.min[d] += nd_box->min[d];
1478  sum.max[d] += nd_box->max[d];
1479  }
1480 
1481  /* Increment our "good feature" count */
1482  notnull_cnt++;
1483 
1484  /* Free up memory if our sample geometry was copied */
1485  if ( is_copy )
1486  pfree(geom);
1487 
1488  /* Give backend a chance of interrupting us */
1489  vacuum_delay_point();
1490  }
1491 
1492  /*
1493  * We'll build a histogram having stats->attr->attstattarget cells
1494  * on each side, within reason... we'll use ndims*10000 as the
1495  * maximum number of cells.
1496  * Also, if we're sampling a relatively small table, we'll try to ensure that
1497  * we have an average of 5 features for each cell so the histogram isn't
1498  * so sparse.
1499  */
1500  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1501  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1502  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1503  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1504  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1505 
1506  /* If there's no useful features, we can't work out stats */
1507  if ( ! notnull_cnt )
1508  {
1509  elog(NOTICE, "no non-null/empty features, unable to compute statistics");
1510  stats->stats_valid = false;
1511  return;
1512  }
1513 
1514  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1515 
1516  /*
1517  * Second scan:
1518  * o compute standard deviation
1519  */
1520  for ( d = 0; d < ndims; d++ )
1521  {
1522  /* Calculate average bounds values */
1523  avg.min[d] = sum.min[d] / notnull_cnt;
1524  avg.max[d] = sum.max[d] / notnull_cnt;
1525 
1526  /* Calculate standard deviation for this dimension bounds */
1527  for ( i = 0; i < notnull_cnt; i++ )
1528  {
1529  const ND_BOX *ndb = sample_boxes[i];
1530  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1531  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1532  }
1533  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1534  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1535 
1536  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1537  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1538  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1539  }
1540 
1541  /*
1542  * Third scan:
1543  * o skip hard deviants
1544  * o compute new histogram box
1545  */
1546  nd_box_init_bounds(&histo_extent_new);
1547  for ( i = 0; i < notnull_cnt; i++ )
1548  {
1549  const ND_BOX *ndb = sample_boxes[i];
1550  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1551  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1552  {
1553  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1554  sample_boxes[i] = NULL;
1555  continue;
1556  }
1557  /* Expand our new box to fit all the other features. */
1558  nd_box_merge(ndb, &histo_extent_new);
1559  }
1560  /*
1561  * Expand the box slightly (1%) to avoid edge effects
1562  * with objects that are on the boundary
1563  */
1564  nd_box_expand(&histo_extent_new, 0.01);
1565  histo_extent = histo_extent_new;
1566 
1567  /*
1568  * How should we allocate our histogram cells to the
1569  * different dimensions? We can't do it by raw dimensional width,
1570  * because in x/y/z space, the z can have different units
1571  * from the x/y. Similarly for x/y/t space.
1572  * So, we instead calculate how much features overlap
1573  * each other in their dimension to figure out which
1574  * dimensions have useful selectivity characteristics (more
1575  * variability in density) and therefor would find
1576  * more cells useful (to distinguish between dense places and
1577  * homogeneous places).
1578  */
1579  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1580  sample_distribution);
1581 
1582  /*
1583  * The sample_distribution array now tells us how spread out the
1584  * data is in each dimension, so we use that data to allocate
1585  * the histogram cells we have available.
1586  * At this point, histo_cells_target is the approximate target number
1587  * of cells.
1588  */
1589 
1590  /*
1591  * Some dimensions have basically a uniform distribution, we want
1592  * to allocate no cells to those dimensions, only to dimensions
1593  * that have some interesting differences in data distribution.
1594  * Here we count up the number of interesting dimensions
1595  */
1596  for ( d = 0; d < ndims; d++ )
1597  {
1598  if ( sample_distribution[d] > 0 )
1599  histo_ndims++;
1600  }
1601 
1602  if ( histo_ndims == 0 )
1603  {
1604  /* Special case: all our dimensions had low variability! */
1605  /* We just divide the cells up evenly */
1606  POSTGIS_DEBUG(3, " special case: no axes have variability");
1607  histo_cells_new = 1;
1608  for ( d = 0; d < ndims; d++ )
1609  {
1610  histo_size[d] = 1 + (int)pow((double)histo_cells_target, 1/(double)ndims);
1611  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1612  histo_cells_new *= histo_size[d];
1613  }
1614  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1615  }
1616  else
1617  {
1618  /*
1619  * We're going to express the amount of variability in each dimension
1620  * as a proportion of the total variability and allocate cells in that
1621  * dimension relative to that proportion.
1622  */
1623  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1624  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1625  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1626  histo_cells_new = 1; /* For the number of cells in the final histogram */
1627  for ( d = 0; d < ndims; d++ )
1628  {
1629  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1630  {
1631  histo_size[d] = 1;
1632  }
1633  else /* Interesting dimension */
1634  {
1635  /* How does this dims variability compare to the total? */
1636  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1637  /*
1638  * Scale the target cells number by the # of dims and ratio,
1639  * then take the appropriate root to get the estimated number of cells
1640  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1641  */
1642  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1643  /* If something goes awry, just give this dim one slot */
1644  if ( ! histo_size[d] )
1645  histo_size[d] = 1;
1646  }
1647  histo_cells_new *= histo_size[d];
1648  }
1649  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1650  }
1651 
1652  /* Update histo_cells to the actual number of cells we need to allocate */
1653  histo_cells = histo_cells_new;
1654  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1655 
1656  /*
1657  * Create the histogram (ND_STATS) in the stats memory context
1658  */
1659  old_context = MemoryContextSwitchTo(stats->anl_context);
1660  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1661  nd_stats = palloc(nd_stats_size);
1662  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1663  MemoryContextSwitchTo(old_context);
1664 
1665  /* Initialize the #ND_STATS objects */
1666  nd_stats->ndims = ndims;
1667  nd_stats->extent = histo_extent;
1668  nd_stats->sample_features = sample_rows;
1669  nd_stats->table_features = total_rows;
1670  nd_stats->not_null_features = notnull_cnt;
1671  /* Copy in the histogram dimensions */
1672  for ( d = 0; d < ndims; d++ )
1673  nd_stats->size[d] = histo_size[d];
1674 
1675  /*
1676  * Fourth scan:
1677  * o fill histogram values with the proportion of
1678  * features' bbox overlaps: a feature's bvol
1679  * can fully overlap (1) or partially overlap
1680  * (fraction of 1) an histogram cell.
1681  *
1682  * Note that we are filling each cell with the "portion of
1683  * the feature's box that overlaps the cell". So, if we sum
1684  * up the values in the histogram, we could get the
1685  * histogram feature count.
1686  *
1687  */
1688  for ( i = 0; i < notnull_cnt; i++ )
1689  {
1690  const ND_BOX *nd_box;
1691  ND_IBOX nd_ibox;
1692  int at[ND_DIMS];
1693  int d;
1694  double num_cells = 0;
1695  double tmp_volume = 1.0;
1696  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1697  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1698  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1699 
1700  nd_box = sample_boxes[i];
1701  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1702 
1703  /* Give backend a chance of interrupting us */
1704  vacuum_delay_point();
1705 
1706  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1707  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1708  memset(at, 0, sizeof(int)*ND_DIMS);
1709 
1710  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1711  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1712  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1713 
1714  for ( d = 0; d < nd_stats->ndims; d++ )
1715  {
1716  /* Initialize the starting values */
1717  at[d] = nd_ibox.min[d];
1718  min[d] = nd_stats->extent.min[d];
1719  max[d] = nd_stats->extent.max[d];
1720  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1721 
1722  /* What's the volume (area) of this feature's box? */
1723  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1724  }
1725 
1726  /* Add feature volume (area) to our total */
1727  total_sample_volume += tmp_volume;
1728 
1729  /*
1730  * Move through all the overlaped histogram cells values and
1731  * add the box overlap proportion to them.
1732  */
1733  do
1734  {
1735  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1736  double ratio;
1737  /* Create a box for this histogram cell */
1738  for ( d = 0; d < nd_stats->ndims; d++ )
1739  {
1740  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1741  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1742  }
1743 
1744  /*
1745  * If a feature box is completely inside one cell the ratio will be
1746  * 1.0. If a feature box is 50% in two cells, each cell will get
1747  * 0.5 added on.
1748  */
1749  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1750  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1751  num_cells += ratio;
1752  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1753  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1754  }
1755  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1756 
1757  /* Keep track of overall number of overlaps counted */
1758  total_cell_count += num_cells;
1759  /* How many features have we added to this histogram? */
1760  histogram_features++;
1761  }
1762 
1763  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1764  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1765  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1766 
1767  /* Error out if we got no sample information */
1768  if ( ! histogram_features )
1769  {
1770  POSTGIS_DEBUG(3, " no stats have been gathered");
1771  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1772  stats->stats_valid = false;
1773  return;
1774  }
1775 
1776  nd_stats->histogram_features = histogram_features;
1777  nd_stats->histogram_cells = histo_cells;
1778  nd_stats->cells_covered = total_cell_count;
1779 
1780  /* Put this histogram data into the right slot/kind */
1781  if ( mode == 2 )
1782  {
1783  stats_slot = STATISTIC_SLOT_2D;
1784  stats_kind = STATISTIC_KIND_2D;
1785  }
1786  else
1787  {
1788  stats_slot = STATISTIC_SLOT_ND;
1789  stats_kind = STATISTIC_KIND_ND;
1790  }
1791 
1792  /* Write the statistics data */
1793  stats->stakind[stats_slot] = stats_kind;
1794  stats->staop[stats_slot] = InvalidOid;
1795  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1796  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1797  stats->stanullfrac = (float4)null_cnt/sample_rows;
1798  stats->stawidth = total_width/notnull_cnt;
1799  stats->stadistinct = -1.0;
1800  stats->stats_valid = true;
1801 
1802  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1803  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1804  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1805  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1806  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1807  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1808  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1809  /*
1810  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1811  */
1812 
1813  return;
1814 }
1815 
1816 
1834 static void
1835 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1836  int sample_rows, double total_rows)
1837 {
1838  /* 2D Mode */
1839  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1840  /* ND Mode */
1841  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1842 }
1843 
1844 
1873 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1874 {
1875  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1876  Form_pg_attribute attr = stats->attr;
1877 
1878  POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1879 
1880  /* If the attstattarget column is negative, use the default value */
1881  /* NB: it is okay to scribble on stats->attr since it's a copy */
1882  if (attr->attstattarget < 0)
1883  attr->attstattarget = default_statistics_target;
1884 
1885  POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1886 
1887  /* Setup the minimum rows and the algorithm function */
1888  stats->minrows = 300 * stats->attr->attstattarget;
1889  stats->compute_stats = compute_gserialized_stats;
1890 
1891  POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1892 
1893  /* Indicate we are done successfully */
1894  PG_RETURN_BOOL(true);
1895 }
1896 
1909 static float8
1910 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1911 {
1912  int d; /* counter */
1913  float8 selectivity;
1914  ND_BOX nd_box;
1915  ND_IBOX nd_ibox;
1916  int at[ND_DIMS];
1917  double cell_size[ND_DIMS];
1918  double min[ND_DIMS];
1919  double max[ND_DIMS];
1920  double total_count = 0.0;
1921  int ndims_max;
1922 
1923  /* Calculate the overlap of the box on the histogram */
1924  if ( ! nd_stats )
1925  {
1926  elog(NOTICE, " estimate_selectivity called with null input");
1927  return FALLBACK_ND_SEL;
1928  }
1929 
1930  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1931 
1932  /* Initialize nd_box. */
1933  nd_box_from_gbox(box, &nd_box);
1934 
1935  /*
1936  * To return 2D stats on an ND sample, we need to make the
1937  * 2D box cover the full range of the other dimensions in the
1938  * histogram.
1939  */
1940  POSTGIS_DEBUGF(3, " mode: %d", mode);
1941  if ( mode == 2 )
1942  {
1943  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1944  ndims_max = 2;
1945  }
1946 
1947  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1948  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1949 
1950  /*
1951  * Search box completely misses histogram extent?
1952  * We have to intersect in all N dimensions or else we have
1953  * zero interaction under the &&& operator. It's important
1954  * to short circuit in this case, as some of the tests below
1955  * will return junk results when run on non-intersecting inputs.
1956  */
1957  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1958  {
1959  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1960  return 0.0;
1961  }
1962 
1963  /* Search box completely contains histogram extent! */
1964  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1965  {
1966  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1967  return 1.0;
1968  }
1969 
1970  /* Calculate the overlap of the box on the histogram */
1971  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1972  {
1973  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1974  return FALLBACK_ND_SEL;
1975  }
1976 
1977  /* Work out some measurements of the histogram */
1978  for ( d = 0; d < nd_stats->ndims; d++ )
1979  {
1980  /* Cell size in each dim */
1981  min[d] = nd_stats->extent.min[d];
1982  max[d] = nd_stats->extent.max[d];
1983  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
1984  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
1985 
1986  /* Initialize the counter */
1987  at[d] = nd_ibox.min[d];
1988  }
1989 
1990  /* Move through all the overlap values and sum them */
1991  do
1992  {
1993  float cell_count, ratio;
1994  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1995 
1996  /* We have to pro-rate partially overlapped cells. */
1997  for ( d = 0; d < nd_stats->ndims; d++ )
1998  {
1999  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2000  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2001  }
2002 
2003  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2004  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2005 
2006  /* Add the pro-rated count for this cell to the overall total */
2007  total_count += cell_count * ratio;
2008  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2009  }
2010  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2011 
2012  /* Scale by the number of features in our histogram to get the proportion */
2013  selectivity = total_count / nd_stats->histogram_features;
2014 
2015  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2016  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2017  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2018  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2019 
2020  /* Prevent rounding overflows */
2021  if (selectivity > 1.0) selectivity = 1.0;
2022  else if (selectivity < 0.0) selectivity = 0.0;
2023 
2024  return selectivity;
2025 }
2026 
2027 
2028 
2034 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2035 {
2036  Oid table_oid = PG_GETARG_OID(0);
2037  text *att_text = PG_GETARG_TEXT_P(1);
2038  ND_STATS *nd_stats;
2039  char *str;
2040  text *json;
2041  int mode = 2; /* default to 2D mode */
2042  bool only_parent = false; /* default to whole tree stats */
2043 
2044  /* Check if we've been asked to not use 2d mode */
2045  if ( ! PG_ARGISNULL(2) )
2046  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2047 
2048  /* Check if we've been asked to only use stats from parent */
2049  if ( ! PG_ARGISNULL(3) )
2050  only_parent = PG_GETARG_BOOL(3);
2051 
2052  /* Retrieve the stats object */
2053  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2054  if ( ! nd_stats )
2055  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text2cstring(att_text));
2056 
2057  /* Convert to JSON */
2058  str = nd_stats_to_json(nd_stats);
2059  json = cstring2text(str);
2060  pfree(str);
2061  pfree(nd_stats);
2062  PG_RETURN_TEXT_P(json);
2063 }
2064 
2065 
2071 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2072 {
2073  Oid table_oid = PG_GETARG_OID(0);
2074  text *att_text = PG_GETARG_TEXT_P(1);
2075  Datum geom_datum = PG_GETARG_DATUM(2);
2076  GBOX gbox; /* search box read from gserialized datum */
2077  float8 selectivity = 0;
2078  ND_STATS *nd_stats;
2079  int mode = 2; /* 2D mode by default */
2080 
2081  /* Check if we've been asked to not use 2d mode */
2082  if ( ! PG_ARGISNULL(3) )
2083  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2084 
2085  /* Retrieve the stats object */
2086  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2087 
2088  if ( ! nd_stats )
2089  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text2cstring(att_text));
2090 
2091  /* Calculate the gbox */
2092  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2093  elog(ERROR, "unable to calculate bounding box from geometry");
2094 
2095  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2096 
2097  /* Do the estimation */
2098  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2099 
2100  pfree(nd_stats);
2101  PG_RETURN_FLOAT8(selectivity);
2102 }
2103 
2104 
2110 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2111 {
2112  Oid table_oid1 = PG_GETARG_OID(0);
2113  text *att_text1 = PG_GETARG_TEXT_P(1);
2114  Oid table_oid2 = PG_GETARG_OID(2);
2115  text *att_text2 = PG_GETARG_TEXT_P(3);
2116  ND_STATS *nd_stats1, *nd_stats2;
2117  float8 selectivity = 0;
2118  int mode = 2; /* 2D mode by default */
2119 
2120 
2121  /* Retrieve the stats object */
2122  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2123  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2124 
2125  if ( ! nd_stats1 )
2126  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text2cstring(att_text1));
2127 
2128  if ( ! nd_stats2 )
2129  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text2cstring(att_text2));
2130 
2131  /* Check if we've been asked to not use 2d mode */
2132  if ( ! PG_ARGISNULL(4) )
2133  {
2134  text *modetxt = PG_GETARG_TEXT_P(4);
2135  char *modestr = text2cstring(modetxt);
2136  if ( modestr[0] == 'N' )
2137  mode = 0;
2138  }
2139 
2140  /* Do the estimation */
2141  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2142 
2143  pfree(nd_stats1);
2144  pfree(nd_stats2);
2145  PG_RETURN_FLOAT8(selectivity);
2146 }
2147 
2153 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2154 {
2155  PG_RETURN_DATUM(DirectFunctionCall5(
2157  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2158  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2159  Int32GetDatum(2) /* 2-D mode */
2160  ));
2161 }
2162 
2168 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2169 {
2170  PG_RETURN_DATUM(DirectFunctionCall5(
2172  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2173  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2174  Int32GetDatum(0) /* N-D mode */
2175  ));
2176 }
2177 
2192 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2193 {
2194  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2195  /* Oid operator_oid = PG_GETARG_OID(1); */
2196  List *args = (List *) PG_GETARG_POINTER(2);
2197  /* int varRelid = PG_GETARG_INT32(3); */
2198  int mode = PG_GETARG_INT32(4);
2199 
2200  VariableStatData vardata;
2201  ND_STATS *nd_stats = NULL;
2202 
2203  Node *other;
2204  Var *self;
2205  GBOX search_box;
2206  float8 selectivity = 0;
2207 
2208  POSTGIS_DEBUG(2, "gserialized_gist_sel called");
2209 
2210  /*
2211  * TODO: This is a big one,
2212  * All this statistics code *only* tries to generate a valid
2213  * selectivity for && and &&&. That leaves all the other
2214  * geometry operators with bad stats! The selectivity
2215  * calculation should take account of the incoming operator
2216  * type and do the right thing.
2217  */
2218 
2219  /* Fail if not a binary opclause (probably shouldn't happen) */
2220  if (list_length(args) != 2)
2221  {
2222  POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
2223  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2224  }
2225 
2226  /* Find the constant part */
2227  other = (Node *) linitial(args);
2228  if ( ! IsA(other, Const) )
2229  {
2230  self = (Var *)other;
2231  other = (Node *) lsecond(args);
2232  }
2233  else
2234  {
2235  self = (Var *) lsecond(args);
2236  }
2237 
2238  if ( ! IsA(other, Const) )
2239  {
2240  POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
2241  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2242  }
2243 
2244  /* Convert the constant to a BOX */
2245  if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2246  {
2247  POSTGIS_DEBUG(3, "search box is EMPTY");
2248  PG_RETURN_FLOAT8(0.0);
2249  }
2250  POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
2251 
2252  /* Get pg_statistic row */
2253  examine_variable(root, (Node*)self, 0, &vardata);
2254  if ( vardata.statsTuple ) {
2255  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2256  }
2257  ReleaseVariableStats(vardata);
2258 
2259  if ( ! nd_stats )
2260  {
2261  POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
2262  PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
2263  }
2264 
2265  POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
2266 
2267  /* Do the estimation! */
2268  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2269  POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
2270 
2271  pfree(nd_stats);
2272  PG_RETURN_FLOAT8(selectivity);
2273 }
2274 
2275 
2276 
2283 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2284 {
2285  char *nsp = NULL;
2286  char *tbl = NULL;
2287  text *col = NULL;
2288  char *nsp_tbl = NULL;
2289  Oid tbl_oid, idx_oid;
2290  ND_STATS *nd_stats;
2291  GBOX *gbox = NULL;
2292  bool only_parent = false;
2293  int key_type;
2294 
2295  if ( PG_NARGS() == 4 )
2296  {
2297  nsp = text2cstring(PG_GETARG_TEXT_P(0));
2298  tbl = text2cstring(PG_GETARG_TEXT_P(1));
2299  col = PG_GETARG_TEXT_P(2);
2300  only_parent = PG_GETARG_BOOL(3);
2301  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2302  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2303  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2304  pfree(nsp_tbl);
2305  }
2306  else if ( PG_NARGS() == 3 )
2307  {
2308  nsp = text2cstring(PG_GETARG_TEXT_P(0));
2309  tbl = text2cstring(PG_GETARG_TEXT_P(1));
2310  col = PG_GETARG_TEXT_P(2);
2311  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2312  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2313  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2314  pfree(nsp_tbl);
2315  }
2316  else if ( PG_NARGS() == 2 )
2317  {
2318  tbl = text2cstring(PG_GETARG_TEXT_P(0));
2319  col = PG_GETARG_TEXT_P(1);
2320  nsp_tbl = palloc(strlen(tbl) + 3);
2321  sprintf(nsp_tbl, "\"%s\"", tbl);
2322  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2323  pfree(nsp_tbl);
2324  }
2325  else
2326  {
2327  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2328  PG_RETURN_NULL();
2329  }
2330 
2331 #if 1
2332  /* Read the extent from the head of the spatial index, if there is one */
2333  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2334  if (!idx_oid)
2335  elog(DEBUG2, "index for \"%s.%s\" does not exist", tbl, text2cstring(col));
2336  gbox = spatial_index_read_extent(idx_oid, key_type);
2337 #endif
2338 
2339  /* Fall back to reading the stats, if no index answer */
2340  if (!gbox)
2341  {
2342  /* Estimated extent only returns 2D bounds, so use mode 2 */
2343  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2344 
2345  /* Error out on no stats */
2346  if ( ! nd_stats ) {
2347  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text2cstring(col));
2348  PG_RETURN_NULL();
2349  }
2350 
2351  /* Construct the box */
2352  gbox = palloc(sizeof(GBOX));
2353  FLAGS_SET_GEODETIC(gbox->flags, 0);
2354  FLAGS_SET_Z(gbox->flags, 0);
2355  FLAGS_SET_M(gbox->flags, 0);
2356  gbox->xmin = nd_stats->extent.min[0];
2357  gbox->xmax = nd_stats->extent.max[0];
2358  gbox->ymin = nd_stats->extent.min[1];
2359  gbox->ymax = nd_stats->extent.max[1];
2360  pfree(nd_stats);
2361  }
2362 
2363  PG_RETURN_POINTER(gbox);
2364 }
2365 
2373 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2374 {
2375  if ( PG_NARGS() == 3 )
2376  {
2377  PG_RETURN_DATUM(
2378  DirectFunctionCall3(gserialized_estimated_extent,
2379  PG_GETARG_DATUM(0),
2380  PG_GETARG_DATUM(1),
2381  PG_GETARG_DATUM(2)));
2382  }
2383  else if ( PG_NARGS() == 2 )
2384  {
2385  PG_RETURN_DATUM(
2386  DirectFunctionCall2(gserialized_estimated_extent,
2387  PG_GETARG_DATUM(0),
2388  PG_GETARG_DATUM(1)));
2389  }
2390 
2391  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2392  PG_RETURN_NULL();
2393 }
2394 
2395 /************************************************************************/
2396 
2397 static Oid
2398 typname_to_oid(const char *typname)
2399 {
2400  Oid typoid = TypenameGetTypid(typname);
2401  if (OidIsValid(typoid) && get_typisdefined(typoid))
2402  return typoid;
2403  else
2404  return InvalidOid;
2405 }
2406 
2407 static Oid
2408 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
2409 {
2410  Relation tbl_rel;
2411  ListCell *lc;
2412  List *idx_list;
2413  Oid result = InvalidOid;
2414  char *colname = text2cstring(col);
2415 
2416  /* Lookup our spatial index key types */
2417  Oid b2d_oid = typname_to_oid(INDEX_KEY_2D);
2418  Oid gdx_oid = typname_to_oid(INDEX_KEY_ND);
2419 
2420  if (!(b2d_oid && gdx_oid))
2421  return InvalidOid;
2422 
2423  tbl_rel = RelationIdGetRelation(tbl_oid);
2424  idx_list = RelationGetIndexList(tbl_rel);
2425  RelationClose(tbl_rel);
2426 
2427  /* For each index associated with this table... */
2428  foreach(lc, idx_list)
2429  {
2430  Form_pg_class idx_form;
2431  HeapTuple idx_tup;
2432  int idx_relam;
2433  Oid idx_oid = lfirst_oid(lc);
2434 
2435  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2436  if (!HeapTupleIsValid(idx_tup))
2437  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2438  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2439  idx_relam = idx_form->relam;
2440  ReleaseSysCache(idx_tup);
2441 
2442  /* Does the index use a GIST access method? */
2443  if (idx_relam == GIST_AM_OID)
2444  {
2445  Form_pg_attribute att;
2446  Oid atttypid;
2447  /* Is the index on the column name we are looking for? */
2448  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2449  ObjectIdGetDatum(idx_oid),
2450  PointerGetDatum(colname));
2451  if (!HeapTupleIsValid(att_tup))
2452  continue;
2453 
2454  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2455  atttypid = att->atttypid;
2456  ReleaseSysCache(att_tup);
2457 
2458  /* Is the column actually spatial? */
2459  if (b2d_oid == atttypid || gdx_oid == atttypid)
2460  {
2461  /* Save result, clean up, and break out */
2462  result = idx_oid;
2463  if (key_type)
2464  *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2465  break;
2466  }
2467  }
2468  }
2469  return result;
2470 }
2471 
2472 static GBOX *
2473 spatial_index_read_extent(Oid idx_oid, int key_type)
2474 {
2475  BOX2DF *bounds_2df = NULL;
2476  GIDX *bounds_gidx = NULL;
2477  GBOX *gbox = NULL;
2478 
2479  if (!idx_oid)
2480  return NULL;
2481 
2482  Relation idx_rel = index_open(idx_oid, AccessExclusiveLock);
2483  Buffer buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2484  Page page = (Page) BufferGetPage(buffer);
2485  OffsetNumber offset = FirstOffsetNumber;
2486  unsigned long offset_max = PageGetMaxOffsetNumber(page);
2487  while (offset <= offset_max)
2488  {
2489  ItemId iid = PageGetItemId(page, offset);
2490  if (!iid)
2491  {
2492  ReleaseBuffer(buffer);
2493  index_close(idx_rel, AccessExclusiveLock);
2494  return NULL;
2495  }
2496  IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
2497  if (!GistTupleIsInvalid(ituple))
2498  {
2499  bool isnull;
2500  Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
2501  if (!isnull)
2502  {
2503  if (key_type == STATISTIC_SLOT_2D)
2504  {
2505  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2506  if (bounds_2df)
2507  box2df_merge(bounds_2df, b);
2508  else
2509  bounds_2df = box2df_copy(b);
2510  }
2511  else
2512  {
2513  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2514  if (bounds_gidx)
2515  gidx_merge(&bounds_gidx, b);
2516  else
2517  bounds_gidx = gidx_copy(b);
2518  }
2519  }
2520  }
2521  offset++;
2522  }
2523 
2524  ReleaseBuffer(buffer);
2525  index_close(idx_rel, AccessExclusiveLock);
2526 
2527  if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2528  {
2529  if (box2df_is_empty(bounds_2df))
2530  return NULL;
2531  gbox = gbox_new(0);
2532  box2df_to_gbox_p(bounds_2df, gbox);
2533  }
2534  else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2535  {
2536  if (gidx_is_unknown(bounds_gidx))
2537  return NULL;
2538  gbox = gbox_new(0);
2539  gbox_from_gidx(bounds_gidx, gbox, 0);
2540  }
2541  else
2542  return NULL;
2543 
2544  return gbox;
2545 }
2546 
2547 /*
2548 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2549  RETURNS box2d
2550  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2551  LANGUAGE 'c' STABLE STRICT;
2552 */
2553 
2555 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2556 {
2557  GBOX *gbox = NULL;
2558  int key_type;
2559  Oid tbl_oid = PG_GETARG_DATUM(0);
2560  text *col = PG_GETARG_TEXT_P(1);
2561 
2562  Oid idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2563  if (!idx_oid)
2564  PG_RETURN_NULL();
2565 
2566  gbox = spatial_index_read_extent(idx_oid, key_type);
2567  if (!gbox)
2568  PG_RETURN_NULL();
2569  else
2570  PG_RETURN_POINTER(gbox);
2571 }
2572 
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
Definition: g_serialized.c:639
#define INDEX_KEY_ND
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
GBOX * gbox_new(uint8_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: g_box.c:39
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
#define DEFAULT_ND_JOINSEL
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:35
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: g_box.c:204
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: g_box.c:399
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:142
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string...
Definition: stringbuffer.c:160
double xmax
Definition: liblwgeom.h:292
static Oid typname_to_oid(const char *typname)
#define ND_DIMS
The maximum number of dimensions our code can handle.
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
#define FALLBACK_ND_JOINSEL
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
BOX2DF * box2df_copy(BOX2DF *b)
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:148
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
#define LW_FAILURE
Definition: liblwgeom.h:78
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided...
Definition: stringbuffer.c:253
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:145
Datum buffer(PG_FUNCTION_ARGS)
double zmax
Definition: liblwgeom.h:296
double ymin
Definition: liblwgeom.h:293
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension...
double xmin
Definition: liblwgeom.h:291
#define INDEX_KEY_2D
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
float4 size[ND_DIMS]
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
int min[ND_DIMS]
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
char * text2cstring(const text *textptr)
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
double ymax
Definition: liblwgeom.h:294
N-dimensional box index type.
#define FLAGS_GET_Z(flags)
Macros for manipulating the 'flags' byte.
Definition: liblwgeom.h:139
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator...
uint8_t flags
Definition: liblwgeom.h:290
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.c:134
#define STATISTIC_KIND_2D
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:78
int max[ND_DIMS]
bool gidx_is_unknown(const GIDX *a)
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
float4 min[ND_DIMS]
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
double mmin
Definition: liblwgeom.h:297
#define SDFACTOR
double zmin
Definition: liblwgeom.h:295
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:140
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
double mmax
Definition: liblwgeom.h:298
#define STATISTIC_SLOT_ND
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
N-dimensional statistics structure.
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
GIDX * gidx_copy(GIDX *b)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
bool box2df_is_empty(const BOX2DF *a)
This library is the generic geometry handling section of PostGIS.
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
#define STATISTIC_KIND_ND
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:146