PostGIS  2.5.0beta1dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
80 #endif
81 #include "utils/tqual.h"
82 #include "utils/builtins.h"
83 #include "utils/datum.h"
84 #include "utils/snapmgr.h"
85 #include "utils/fmgroids.h"
86 #include "funcapi.h"
87 #include "access/heapam.h"
88 #include "catalog/pg_type.h"
89 #include "access/relscan.h"
90 
91 #include "executor/spi.h"
92 #include "fmgr.h"
93 #include "commands/vacuum.h"
94 #include "nodes/relation.h"
95 #include "parser/parsetree.h"
96 #include "utils/array.h"
97 #include "utils/lsyscache.h"
98 #include "utils/builtins.h"
99 #include "utils/syscache.h"
100 #include "utils/rel.h"
101 #include "utils/selfuncs.h"
102 
103 #include "../postgis_config.h"
104 
105 #if POSTGIS_PGSQL_VERSION >= 93
106  #include "access/htup_details.h"
107 #endif
108 
109 #include "stringbuffer.h"
110 #include "liblwgeom.h"
111 #include "lwgeom_pg.h" /* For debugging macros. */
112 #include "gserialized_gist.h" /* For index common functions */
113 
114 #include <math.h>
115 #if HAVE_IEEEFP_H
116 #include <ieeefp.h>
117 #endif
118 #include <float.h>
119 #include <string.h>
120 #include <stdio.h>
121 #include <errno.h>
122 #include <ctype.h>
123 
124 
125 /************************************************************************/
126 
127 
128 /* Fall back to older finite() if necessary */
129 #ifndef HAVE_ISFINITE
130 # ifdef HAVE_GNU_ISFINITE
131 # define _GNU_SOURCE
132 # else
133 # define isfinite finite
134 # endif
135 #endif
136 
137 
138 /* Prototypes */
139 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
140 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
141 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
142 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
143 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
144 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
145 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
146 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
147 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
148 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
149 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
150 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
151 
152 /* Local prototypes */
153 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
154 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161 * Assign a number to the n-dimensional statistics kind
162 *
163 * tgl suggested:
164 *
165 * 1-100: reserved for assignment by the core Postgres project
166 * 100-199: reserved for assignment by PostGIS
167 * 200-9999: reserved for other globally-known stats kinds
168 * 10000-32767: reserved for private site-local use
169 */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 #define STATISTIC_SLOT_ND 0
173 #define STATISTIC_SLOT_2D 1
174 
175 /*
176 * To look-up the spatial index associated with a table we
177 * need to find GIST indexes using our spatial keys.
178 */
179 #define INDEX_KEY_ND "gidx"
180 #define INDEX_KEY_2D "box2df"
181 
182 /*
183 * The SD factor restricts the side of the statistics histogram
184 * based on the standard deviation of the extent of the data.
185 * SDFACTOR is the number of standard deviations from the mean
186 * the histogram will extend.
187 */
188 #define SDFACTOR 3.25
189 
195 #define ND_DIMS 4
196 
203 #define MIN_DIMENSION_WIDTH 0.000000001
204 
208 #define DEFAULT_ND_SEL 0.0001
209 #define DEFAULT_ND_JOINSEL 0.001
210 
214 #define FALLBACK_ND_SEL 0.2
215 #define FALLBACK_ND_JOINSEL 0.3
216 
222 typedef struct ND_BOX_T
223 {
224  float4 min[ND_DIMS];
225  float4 max[ND_DIMS];
226 } ND_BOX;
227 
231 typedef struct ND_IBOX_T
232 {
233  int min[ND_DIMS];
234  int max[ND_DIMS];
235 } ND_IBOX;
236 
237 
244 typedef struct ND_STATS_T
245 {
246  /* Dimensionality of the histogram. */
247  float4 ndims;
248 
249  /* Size of n-d histogram in each dimension. */
250  float4 size[ND_DIMS];
251 
252  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
254 
255  /* How many rows in the table itself? */
257 
258  /* How many rows were in the sample that built this histogram? */
260 
261  /* How many not-Null/Empty features were in the sample? */
263 
264  /* How many features actually got sampled in the histogram? */
266 
267  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
269 
270  /* How many cells did those histogram features cover? */
271  /* Since we are pro-rating coverage, this number should */
272  /* now always equal histogram_features */
274 
275  /* Variable length # of floats for histogram */
276  float4 value[1];
277 } ND_STATS;
278 
279 
280 
281 
288 static int
289 gbox_ndims(const GBOX* gbox)
290 {
291  int dims = 2;
292  if ( FLAGS_GET_GEODETIC(gbox->flags) )
293  return 3;
294  if ( FLAGS_GET_Z(gbox->flags) )
295  dims++;
296  if ( FLAGS_GET_M(gbox->flags) )
297  dims++;
298  return dims;
299 }
300 
306 static int
307 text_p_get_mode(const text *txt)
308 {
309  int mode = 2;
310  char *modestr;
311  if (VARSIZE(txt) - VARHDRSZ <= 0)
312  return mode;
313  modestr = (char*)VARDATA(txt);
314  if ( modestr[0] == 'N' )
315  mode = 0;
316  return mode;
317 }
318 
319 
323 static int
324 cmp_int (const void *a, const void *b)
325 {
326  int ia = *((const int*)a);
327  int ib = *((const int*)b);
328 
329  if ( ia == ib )
330  return 0;
331  else if ( ia > ib )
332  return 1;
333  else
334  return -1;
335 }
336 
341 static int
342 range_quintile(int *vals, int nvals)
343 {
344  qsort(vals, nvals, sizeof(int), cmp_int);
345  return vals[4*nvals/5] - vals[nvals/5];
346 }
347 
351 static double
352 total_double(const double *vals, int nvals)
353 {
354  int i;
355  float total = 0;
356  /* Calculate total */
357  for ( i = 0; i < nvals; i++ )
358  total += vals[i];
359 
360  return total;
361 }
362 
363 #if POSTGIS_DEBUG_LEVEL >= 3
364 
368 static int
369 total_int(const int *vals, int nvals)
370 {
371  int i;
372  int total = 0;
373  /* Calculate total */
374  for ( i = 0; i < nvals; i++ )
375  total += vals[i];
376 
377  return total;
378 }
379 
383 static double
384 avg(const int *vals, int nvals)
385 {
386  int t = total_int(vals, nvals);
387  return (double)t / (double)nvals;
388 }
389 
393 static double
394 stddev(const int *vals, int nvals)
395 {
396  int i;
397  double sigma2 = 0;
398  double mean = avg(vals, nvals);
399 
400  /* Calculate sigma2 */
401  for ( i = 0; i < nvals; i++ )
402  {
403  double v = (double)(vals[i]);
404  sigma2 += (mean - v) * (mean - v);
405  }
406  return sqrt(sigma2 / nvals);
407 }
408 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
409 
414 static int
415 nd_stats_value_index(const ND_STATS *stats, int *indexes)
416 {
417  int d;
418  int accum = 1, vdx = 0;
419 
420  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
421  /* n-d histogram coordinate implies. */
422  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
423  for ( d = 0; d < (int)(stats->ndims); d++ )
424  {
425  int size = (int)(stats->size[d]);
426  if ( indexes[d] < 0 || indexes[d] >= size )
427  {
428  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
429  return -1;
430  }
431  vdx += indexes[d] * accum;
432  accum *= size;
433  }
434  return vdx;
435 }
436 
440 static char*
441 nd_box_to_json(const ND_BOX *nd_box, int ndims)
442 {
443  char *rv;
444  int i;
446 
447  stringbuffer_append(sb, "{\"min\":[");
448  for ( i = 0; i < ndims; i++ )
449  {
450  if ( i ) stringbuffer_append(sb, ",");
451  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
452  }
453  stringbuffer_append(sb, "],\"max\":[");
454  for ( i = 0; i < ndims; i++ )
455  {
456  if ( i ) stringbuffer_append(sb, ",");
457  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
458  }
459  stringbuffer_append(sb, "]}");
460 
463  return rv;
464 }
465 
466 
471 static char*
472 nd_stats_to_json(const ND_STATS *nd_stats)
473 {
474  char *json_extent, *str;
475  int d;
477  int ndims = (int)roundf(nd_stats->ndims);
478 
479  stringbuffer_append(sb, "{");
480  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
481 
482  /* Size */
483  stringbuffer_append(sb, "\"size\":[");
484  for ( d = 0; d < ndims; d++ )
485  {
486  if ( d ) stringbuffer_append(sb, ",");
487  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
488  }
489  stringbuffer_append(sb, "],");
490 
491  /* Extent */
492  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
493  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
494  pfree(json_extent);
495 
496  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
497  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
498  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
499  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
500  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
501  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
502  stringbuffer_append(sb, "}");
503 
504  str = stringbuffer_getstringcopy(sb);
506  return str;
507 }
508 
509 
515 // static char*
516 // nd_stats_to_grid(const ND_STATS *stats)
517 // {
518 // char *rv;
519 // int j, k;
520 // int sizex = (int)roundf(stats->size[0]);
521 // int sizey = (int)roundf(stats->size[1]);
522 // stringbuffer_t *sb = stringbuffer_create();
523 //
524 // for ( k = 0; k < sizey; k++ )
525 // {
526 // for ( j = 0; j < sizex; j++ )
527 // {
528 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
529 // }
530 // stringbuffer_append(sb, "\n");
531 // }
532 //
533 // rv = stringbuffer_getstringcopy(sb);
534 // stringbuffer_destroy(sb);
535 // return rv;
536 // }
537 
538 
540 static int
541 nd_box_merge(const ND_BOX *source, ND_BOX *target)
542 {
543  int d;
544  for ( d = 0; d < ND_DIMS; d++ )
545  {
546  target->min[d] = Min(target->min[d], source->min[d]);
547  target->max[d] = Max(target->max[d], source->max[d]);
548  }
549  return true;
550 }
551 
553 static int
555 {
556  memset(a, 0, sizeof(ND_BOX));
557  return true;
558 }
559 
565 static int
567 {
568  int d;
569  for ( d = 0; d < ND_DIMS; d++ )
570  {
571  a->min[d] = FLT_MAX;
572  a->max[d] = -1 * FLT_MAX;
573  }
574  return true;
575 }
576 
578 static void
579 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
580 {
581  int d = 0;
582  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
583 
584  nd_box_init(nd_box);
585  nd_box->min[d] = gbox->xmin;
586  nd_box->max[d] = gbox->xmax;
587  d++;
588  nd_box->min[d] = gbox->ymin;
589  nd_box->max[d] = gbox->ymax;
590  d++;
591  if ( FLAGS_GET_GEODETIC(gbox->flags) )
592  {
593  nd_box->min[d] = gbox->zmin;
594  nd_box->max[d] = gbox->zmax;
595  return;
596  }
597  if ( FLAGS_GET_Z(gbox->flags) )
598  {
599  nd_box->min[d] = gbox->zmin;
600  nd_box->max[d] = gbox->zmax;
601  d++;
602  }
603  if ( FLAGS_GET_M(gbox->flags) )
604  {
605  nd_box->min[d] = gbox->mmin;
606  nd_box->max[d] = gbox->mmax;
607  d++;
608  }
609  return;
610 }
611 
615 static int
616 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
617 {
618  int d;
619  for ( d = 0; d < ndims; d++ )
620  {
621  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
622  return false;
623  }
624  return true;
625 }
626 
630 static int
631 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
632 {
633  int d;
634  for ( d = 0; d < ndims; d++ )
635  {
636  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
637  return false;
638  }
639  return true;
640 }
641 
646 static int
647 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
648 {
649  int d;
650  double size;
651  for ( d = 0; d < ND_DIMS; d++ )
652  {
653  size = nd_box->max[d] - nd_box->min[d];
654  if ( size <= 0 ) continue;
655  nd_box->min[d] -= size * expansion_factor / 2;
656  nd_box->max[d] += size * expansion_factor / 2;
657  }
658  return true;
659 }
660 
665 static inline int
666 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
667 {
668  int d;
669 
670  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
671 
672  /* Initialize ibox */
673  memset(nd_ibox, 0, sizeof(ND_IBOX));
674 
675  /* In each dimension... */
676  for ( d = 0; d < nd_stats->ndims; d++ )
677  {
678  double smin = nd_stats->extent.min[d];
679  double smax = nd_stats->extent.max[d];
680  double width = smax - smin;
681  int size = roundf(nd_stats->size[d]);
682 
683  /* ... find cells the box overlaps with in this dimension */
684  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
685  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
686 
687  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
688  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
689 
690  /* Push any out-of range values into range */
691  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
692  nd_ibox->max[d] = Min(nd_ibox->max[d], size-1);
693  }
694  return true;
695 }
696 
700 static inline double
701 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
702 {
703  int d;
704  bool covered = true;
705  double ivol = 1.0;
706  double vol2 = 1.0;
707  double vol1 = 1.0;
708 
709  for ( d = 0 ; d < ndims; d++ )
710  {
711  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
712  return 0.0; /* Disjoint */
713 
714  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
715  covered = false;
716  }
717 
718  if ( covered )
719  return 1.0;
720 
721  for ( d = 0; d < ndims; d++ )
722  {
723  double width1 = b1->max[d] - b1->min[d];
724  double width2 = b2->max[d] - b2->min[d];
725  double imin, imax, iwidth;
726 
727  vol1 *= width1;
728  vol2 *= width2;
729 
730  imin = Max(b1->min[d], b2->min[d]);
731  imax = Min(b1->max[d], b2->max[d]);
732  iwidth = imax - imin;
733  iwidth = Max(0.0, iwidth);
734 
735  ivol *= iwidth;
736  }
737 
738  if ( vol2 == 0.0 )
739  return vol2;
740 
741  return ivol / vol2;
742 }
743 
744 
760 static int
761 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
762 {
763  /* How many bins shall we use in figuring out the distribution? */
764  static int num_bins = 50;
765  int d, i, k, range;
766  int counts[num_bins];
767  double smin, smax; /* Spatial min, spatial max */
768  double swidth; /* Spatial width of dimension */
769 #if POSTGIS_DEBUG_LEVEL >= 3
770  double average, sdev, sdev_ratio;
771 #endif
772  int bmin, bmax; /* Bin min, bin max */
773  const ND_BOX *ndb;
774 
775  /* For each dimension... */
776  for ( d = 0; d < ndims; d++ )
777  {
778  /* Initialize counts for this dimension */
779  memset(counts, 0, sizeof(int)*num_bins);
780 
781  smin = extent->min[d];
782  smax = extent->max[d];
783  swidth = smax - smin;
784 
785  /* Don't try and calculate distribution of overly narrow dimensions */
786  if ( swidth < MIN_DIMENSION_WIDTH )
787  {
788  distribution[d] = 0;
789  continue;
790  }
791 
792  /* Sum up the overlaps of each feature with the dimensional bins */
793  for ( i = 0; i < num_boxes; i++ )
794  {
795  double minoffset, maxoffset;
796 
797  /* Skip null entries */
798  ndb = nd_boxes[i];
799  if ( ! ndb ) continue;
800 
801  /* Where does box fall relative to the working range */
802  minoffset = ndb->min[d] - smin;
803  maxoffset = ndb->max[d] - smin;
804 
805  /* Skip boxes that our outside our working range */
806  if ( minoffset < 0 || minoffset > swidth ||
807  maxoffset < 0 || maxoffset > swidth )
808  {
809  continue;
810  }
811 
812  /* What bins does this range correspond to? */
813  bmin = num_bins * (minoffset) / swidth;
814  bmax = num_bins * (maxoffset) / swidth;
815 
816  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
817 
818  /* Increment the counts in all the bins this feature overlaps */
819  for ( k = bmin; k <= bmax; k++ )
820  {
821  counts[k] += 1;
822  }
823 
824  }
825 
826  /* How dispersed is the distribution of features across bins? */
827  range = range_quintile(counts, num_bins);
828 
829 #if POSTGIS_DEBUG_LEVEL >= 3
830  average = avg(counts, num_bins);
831  sdev = stddev(counts, num_bins);
832  sdev_ratio = sdev/average;
833 
834  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
835  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
836  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
837  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
838 #endif
839 
840  distribution[d] = range;
841  }
842 
843  return true;
844 }
845 
851 static inline int
852 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
853 {
854  int d = 0;
855 
856  while ( d < ndims )
857  {
858  if ( counter[d] < ibox->max[d] )
859  {
860  counter[d] += 1;
861  break;
862  }
863  counter[d] = ibox->min[d];
864  d++;
865  }
866  /* That's it, cannot increment any more! */
867  if ( d == ndims )
868  return false;
869 
870  /* Increment complete! */
871  return true;
872 }
873 
874 static ND_STATS*
875 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
876 {
877  int stats_kind = STATISTIC_KIND_ND;
878  int rv;
879  ND_STATS *nd_stats;
880 
881  /* If we're in 2D mode, set the kind appropriately */
882  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
883 
884  /* Then read the geom status histogram from that */
885 
886 #if POSTGIS_PGSQL_VERSION < 100
887  {
888  float4 *floatptr;
889  int nvalues;
890 
891  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
892  NULL, NULL, NULL, &floatptr, &nvalues);
893 
894  if ( ! rv ) {
895  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
896  return NULL;
897  }
898 
899  /* Clone the stats here so we can release the attstatsslot immediately */
900  nd_stats = palloc(sizeof(float) * nvalues);
901  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
902 
903  /* Clean up */
904  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
905  }
906 #else /* PostgreSQL 10 or higher */
907  {
908  AttStatsSlot sslot;
909  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
910  ATTSTATSSLOT_NUMBERS);
911  if ( ! rv ) {
912  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
913  return NULL;
914  }
915 
916  /* Clone the stats here so we can release the attstatsslot immediately */
917  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
918  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
919 
920  free_attstatsslot(&sslot);
921  }
922 #endif
923 
924  return nd_stats;
925 }
926 
931 static ND_STATS*
932 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
933 {
934  HeapTuple stats_tuple = NULL;
935  ND_STATS *nd_stats;
936 
937  /* First pull the stats tuple for the whole tree */
938  if ( ! only_parent )
939  {
940  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
941  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
942  if ( stats_tuple )
943  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
944  }
945  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
946  if ( only_parent || ! stats_tuple )
947  {
948  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
949  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
950  if ( stats_tuple )
951  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
952  }
953  if ( ! stats_tuple )
954  {
955  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
956  return NULL;
957  }
958 
959  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
960  ReleaseSysCache(stats_tuple);
961  if ( ! nd_stats )
962  {
963  POSTGIS_DEBUGF(2,
964  "histogram for attribute %d of table \"%s\" does not exist?",
965  att_num, get_rel_name(table_oid));
966  }
967 
968  return nd_stats;
969 }
970 
979 static ND_STATS*
980 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
981 {
982  const char *att_name = text_to_cstring(att_text);
983  AttrNumber att_num;
984 
985  /* We know the name? Look up the num */
986  if ( att_text )
987  {
988  /* Get the attribute number */
989  att_num = get_attnum(table_oid, att_name);
990  if ( ! att_num ) {
991  elog(ERROR, "attribute \"%s\" does not exist", att_name);
992  return NULL;
993  }
994  }
995  else
996  {
997  elog(ERROR, "attribute name is null");
998  return NULL;
999  }
1000 
1001  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1002 }
1003 
1017 static float8
1019 {
1020  int ncells1, ncells2;
1021  int ndims1, ndims2, ndims;
1022  double ntuples_max;
1023  double ntuples_not_null1, ntuples_not_null2;
1024 
1025  ND_BOX extent1, extent2;
1026  ND_IBOX ibox1, ibox2;
1027  int at1[ND_DIMS];
1028  int at2[ND_DIMS];
1029  double min1[ND_DIMS];
1030  double width1[ND_DIMS];
1031  double cellsize1[ND_DIMS];
1032  int size2[ND_DIMS];
1033  double min2[ND_DIMS];
1034  double width2[ND_DIMS];
1035  double cellsize2[ND_DIMS];
1036  int size1[ND_DIMS];
1037  int d;
1038  double val = 0;
1039  float8 selectivity;
1040 
1041  /* Drop out on null inputs */
1042  if ( ! ( s1 && s2 ) )
1043  {
1044  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1045  return FALLBACK_ND_SEL;
1046  }
1047 
1048  /* We need to know how many cells each side has... */
1049  ncells1 = (int)roundf(s1->histogram_cells);
1050  ncells2 = (int)roundf(s2->histogram_cells);
1051 
1052  /* ...so that we can drive the summation loop with the smaller histogram. */
1053  if ( ncells1 > ncells2 )
1054  {
1055  const ND_STATS *stats_tmp = s1;
1056  s1 = s2;
1057  s2 = stats_tmp;
1058  }
1059 
1060  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1061  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1062 
1063  /* Re-read that info after the swap */
1064  ncells1 = (int)roundf(s1->histogram_cells);
1065  ncells2 = (int)roundf(s2->histogram_cells);
1066 
1067  /* Q: What's the largest possible join size these relations can create? */
1068  /* A: The product of the # of non-null rows in each relation. */
1069  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1070  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1071  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1072 
1073  /* Get the ndims as ints */
1074  ndims1 = (int)roundf(s1->ndims);
1075  ndims2 = (int)roundf(s2->ndims);
1076  ndims = Max(ndims1, ndims2);
1077 
1078  /* Get the extents */
1079  extent1 = s1->extent;
1080  extent2 = s2->extent;
1081 
1082  /* If relation stats do not intersect, join is very very selective. */
1083  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1084  {
1085  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1086  PG_RETURN_FLOAT8(0.0);
1087  }
1088 
1089  /*
1090  * First find the index range of the part of the smaller
1091  * histogram that overlaps the larger one.
1092  */
1093  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1094  {
1095  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1096  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1097  }
1098 
1099  /* Initialize counters / constants on s1 */
1100  for ( d = 0; d < ndims1; d++ )
1101  {
1102  at1[d] = ibox1.min[d];
1103  min1[d] = s1->extent.min[d];
1104  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1105  size1[d] = (int)roundf(s1->size[d]);
1106  cellsize1[d] = width1[d] / size1[d];
1107  }
1108 
1109  /* Initialize counters / constants on s2 */
1110  for ( d = 0; d < ndims2; d++ )
1111  {
1112  min2[d] = s2->extent.min[d];
1113  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1114  size2[d] = (int)roundf(s2->size[d]);
1115  cellsize2[d] = width2[d] / size2[d];
1116  }
1117 
1118  /* For each affected cell of s1... */
1119  do
1120  {
1121  double val1;
1122  /* Construct the bounds of this cell */
1123  ND_BOX nd_cell1;
1124  nd_box_init(&nd_cell1);
1125  for ( d = 0; d < ndims1; d++ )
1126  {
1127  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1128  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1129  }
1130 
1131  /* Find the cells of s2 that cell1 overlaps.. */
1132  nd_box_overlap(s2, &nd_cell1, &ibox2);
1133 
1134  /* Initialize counter */
1135  for ( d = 0; d < ndims2; d++ )
1136  {
1137  at2[d] = ibox2.min[d];
1138  }
1139 
1140  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1141 
1142  /* Get the value at this cell */
1143  val1 = s1->value[nd_stats_value_index(s1, at1)];
1144 
1145  /* For each overlapped cell of s2... */
1146  do
1147  {
1148  double ratio2;
1149  double val2;
1150 
1151  /* Construct the bounds of this cell */
1152  ND_BOX nd_cell2;
1153  nd_box_init(&nd_cell2);
1154  for ( d = 0; d < ndims2; d++ )
1155  {
1156  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1157  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1158  }
1159 
1160  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1161 
1162  /* Calculate overlap ratio of the cells */
1163  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1164 
1165  /* Multiply the cell counts, scaled by overlap ratio */
1166  val2 = s2->value[nd_stats_value_index(s2, at2)];
1167  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1168  val += val1 * (val2 * ratio2);
1169  }
1170  while ( nd_increment(&ibox2, ndims2, at2) );
1171 
1172  }
1173  while( nd_increment(&ibox1, ndims1, at1) );
1174 
1175  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1176 
1177  /*
1178  * In order to compare our total cell count "val" to the
1179  * ntuples_max, we need to scale val up to reflect a full
1180  * table estimate. So, multiply by ratio of table size to
1181  * sample size.
1182  */
1183  val *= (s1->table_features / s1->sample_features);
1184  val *= (s2->table_features / s2->sample_features);
1185 
1186  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1187 
1188  /*
1189  * Because the cell counts are over-determined due to
1190  * double counting of features that overlap multiple cells
1191  * (see the compute_gserialized_stats routine)
1192  * we also have to scale our cell count "val" *down*
1193  * to adjust for the double counting.
1194  */
1195 // val /= (s1->cells_covered / s1->histogram_features);
1196 // val /= (s2->cells_covered / s2->histogram_features);
1197 
1198  /*
1199  * Finally, the selectivity is the estimated number of
1200  * rows to be returned divided by the maximum possible
1201  * number of rows that can be returned.
1202  */
1203  selectivity = val / ntuples_max;
1204 
1205  /* Guard against over-estimates and crazy numbers :) */
1206  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1207  {
1208  selectivity = DEFAULT_ND_JOINSEL;
1209  }
1210  else if ( selectivity > 1.0 )
1211  {
1212  selectivity = 1.0;
1213  }
1214 
1215  return selectivity;
1216 }
1217 
1223 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1224 {
1225  PG_RETURN_DATUM(DirectFunctionCall5(
1227  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1228  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1229  Int32GetDatum(0) /* ND mode */
1230  ));
1231 }
1232 
1238 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1239 {
1240  PG_RETURN_DATUM(DirectFunctionCall5(
1242  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1243  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1244  Int32GetDatum(2) /* 2D mode */
1245  ));
1246 }
1247 
1257 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1258 {
1259  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1260  /* Oid operator = PG_GETARG_OID(1); */
1261  List *args = (List *) PG_GETARG_POINTER(2);
1262  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1263  int mode = PG_GETARG_INT32(4);
1264 
1265  Node *arg1, *arg2;
1266  Var *var1, *var2;
1267  Oid relid1, relid2;
1268 
1269  ND_STATS *stats1, *stats2;
1270  float8 selectivity;
1271 
1272  /* Only respond to an inner join/unknown context join */
1273  if (jointype != JOIN_INNER)
1274  {
1275  elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
1276  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1277  }
1278 
1279  /* Find Oids of the geometry columns we are working with */
1280  arg1 = (Node*) linitial(args);
1281  arg2 = (Node*) lsecond(args);
1282  var1 = (Var*) arg1;
1283  var2 = (Var*) arg2;
1284 
1285  /* We only do column joins right now, no functional joins */
1286  /* TODO: handle g1 && ST_Expand(g2) */
1287  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1288  {
1289  elog(DEBUG1, "%s called with arguments that are not column references", __func__);
1290  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1291  }
1292 
1293  /* What are the Oids of our tables/relations? */
1294  relid1 = getrelid(var1->varno, root->parse->rtable);
1295  relid2 = getrelid(var2->varno, root->parse->rtable);
1296 
1297  POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1298  get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1299 
1300  /* Pull the stats from the stats system. */
1301  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1302  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1303 
1304  /* If we can't get stats, we have to stop here! */
1305  if ( ! stats1 )
1306  {
1307  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
1308  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1309  }
1310  else if ( ! stats2 )
1311  {
1312  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1313  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1314  }
1315 
1316  selectivity = estimate_join_selectivity(stats1, stats2);
1317  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1318 
1319  pfree(stats1);
1320  pfree(stats2);
1321  PG_RETURN_FLOAT8(selectivity);
1322 }
1323 
1324 
1325 
1326 
1345 static void
1346 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1347  int sample_rows, double total_rows, int mode)
1348 {
1349  MemoryContext old_context;
1350  int d, i; /* Counters */
1351  int notnull_cnt = 0; /* # not null rows in the sample */
1352  int null_cnt = 0; /* # null rows in the sample */
1353  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1354 
1355  ND_STATS *nd_stats; /* Our histogram */
1356  size_t nd_stats_size; /* Size to allocate */
1357 
1358  double total_width = 0; /* # of bytes used by sample */
1359  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1360  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1361 
1362  ND_BOX sum; /* Sum of extents of sample boxes */
1363  ND_BOX avg; /* Avg of extents of sample boxes */
1364  ND_BOX stddev; /* StdDev of extents of sample boxes */
1365 
1366  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1367  ND_BOX sample_extent; /* Extent of the raw sample */
1368  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1369  ND_BOX histo_extent; /* Spatial extent of the histogram */
1370  ND_BOX histo_extent_new; /* Temporary variable */
1371  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1372  int histo_cells; /* Number of cells in the histogram */
1373  int histo_cells_new = 1; /* Temporary variable */
1374 
1375  int ndims = 2; /* Dimensionality of the sample */
1376  int histo_ndims = 0; /* Dimensionality of the histogram */
1377  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1378  double total_distribution; /* Total of sample_distribution */
1379 
1380  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1381  int stats_kind; /* And this is what? (2D vs ND) */
1382 
1383  /* Initialize sum and stddev */
1384  nd_box_init(&sum);
1385  nd_box_init(&stddev);
1386 
1387  /*
1388  * This is where gserialized_analyze_nd
1389  * should put its' custom parameters.
1390  */
1391  /* void *mystats = stats->extra_data; */
1392 
1393  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1394  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1395  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1396 
1397  /*
1398  * We might need less space, but don't think
1399  * its worth saving...
1400  */
1401  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1402 
1403  /*
1404  * First scan:
1405  * o read boxes
1406  * o find dimensionality of the sample
1407  * o find extent of the sample
1408  * o count null-infinite/not-null values
1409  * o compute total_width
1410  * o compute total features's box area (for avgFeatureArea)
1411  * o sum features box coordinates (for standard deviation)
1412  */
1413  for ( i = 0; i < sample_rows; i++ )
1414  {
1415  Datum datum;
1416  GSERIALIZED *geom;
1417  GBOX gbox;
1418  ND_BOX *nd_box;
1419  bool is_null;
1420  bool is_copy;
1421 
1422  datum = fetchfunc(stats, i, &is_null);
1423 
1424  /* Skip all NULLs. */
1425  if ( is_null )
1426  {
1427  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1428  null_cnt++;
1429  continue;
1430  }
1431 
1432  /* Read the bounds from the gserialized. */
1433  geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1434  is_copy = VARATT_IS_EXTENDED(datum);
1435  if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1436  {
1437  /* Skip empties too. */
1438  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1439  continue;
1440  }
1441 
1442  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1443  if ( mode == 2 )
1444  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1445 
1446  /* Check bounds for validity (finite and not NaN) */
1447  if ( ! gbox_is_valid(&gbox) )
1448  {
1449  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1450  continue;
1451  }
1452 
1453  /*
1454  * In N-D mode, set the ndims to the maximum dimensionality found
1455  * in the sample. Otherwise, leave at ndims == 2.
1456  */
1457  if ( mode != 2 )
1458  ndims = Max(gbox_ndims(&gbox), ndims);
1459 
1460  /* Convert gbox to n-d box */
1461  nd_box = palloc(sizeof(ND_BOX));
1462  nd_box_from_gbox(&gbox, nd_box);
1463 
1464  /* Cache n-d bounding box */
1465  sample_boxes[notnull_cnt] = nd_box;
1466 
1467  /* Initialize sample extent before merging first entry */
1468  if ( ! notnull_cnt )
1469  nd_box_init_bounds(&sample_extent);
1470 
1471  /* Add current sample to overall sample extent */
1472  nd_box_merge(nd_box, &sample_extent);
1473 
1474  /* How many bytes does this sample use? */
1475  total_width += VARSIZE(geom);
1476 
1477  /* Add bounds coordinates to sums for stddev calculation */
1478  for ( d = 0; d < ndims; d++ )
1479  {
1480  sum.min[d] += nd_box->min[d];
1481  sum.max[d] += nd_box->max[d];
1482  }
1483 
1484  /* Increment our "good feature" count */
1485  notnull_cnt++;
1486 
1487  /* Free up memory if our sample geometry was copied */
1488  if ( is_copy )
1489  pfree(geom);
1490 
1491  /* Give backend a chance of interrupting us */
1492  vacuum_delay_point();
1493  }
1494 
1495  /*
1496  * We'll build a histogram having stats->attr->attstattarget cells
1497  * on each side, within reason... we'll use ndims*10000 as the
1498  * maximum number of cells.
1499  * Also, if we're sampling a relatively small table, we'll try to ensure that
1500  * we have an average of 5 features for each cell so the histogram isn't
1501  * so sparse.
1502  */
1503  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1504  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1505  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1506  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1507  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1508 
1509  /* If there's no useful features, we can't work out stats */
1510  if ( ! notnull_cnt )
1511  {
1512  elog(NOTICE, "no non-null/empty features, unable to compute statistics");
1513  stats->stats_valid = false;
1514  return;
1515  }
1516 
1517  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1518 
1519  /*
1520  * Second scan:
1521  * o compute standard deviation
1522  */
1523  for ( d = 0; d < ndims; d++ )
1524  {
1525  /* Calculate average bounds values */
1526  avg.min[d] = sum.min[d] / notnull_cnt;
1527  avg.max[d] = sum.max[d] / notnull_cnt;
1528 
1529  /* Calculate standard deviation for this dimension bounds */
1530  for ( i = 0; i < notnull_cnt; i++ )
1531  {
1532  const ND_BOX *ndb = sample_boxes[i];
1533  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1534  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1535  }
1536  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1537  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1538 
1539  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1540  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1541  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1542  }
1543 
1544  /*
1545  * Third scan:
1546  * o skip hard deviants
1547  * o compute new histogram box
1548  */
1549  nd_box_init_bounds(&histo_extent_new);
1550  for ( i = 0; i < notnull_cnt; i++ )
1551  {
1552  const ND_BOX *ndb = sample_boxes[i];
1553  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1554  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1555  {
1556  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1557  sample_boxes[i] = NULL;
1558  continue;
1559  }
1560  /* Expand our new box to fit all the other features. */
1561  nd_box_merge(ndb, &histo_extent_new);
1562  }
1563  /*
1564  * Expand the box slightly (1%) to avoid edge effects
1565  * with objects that are on the boundary
1566  */
1567  nd_box_expand(&histo_extent_new, 0.01);
1568  histo_extent = histo_extent_new;
1569 
1570  /*
1571  * How should we allocate our histogram cells to the
1572  * different dimensions? We can't do it by raw dimensional width,
1573  * because in x/y/z space, the z can have different units
1574  * from the x/y. Similarly for x/y/t space.
1575  * So, we instead calculate how much features overlap
1576  * each other in their dimension to figure out which
1577  * dimensions have useful selectivity characteristics (more
1578  * variability in density) and therefor would find
1579  * more cells useful (to distinguish between dense places and
1580  * homogeneous places).
1581  */
1582  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1583  sample_distribution);
1584 
1585  /*
1586  * The sample_distribution array now tells us how spread out the
1587  * data is in each dimension, so we use that data to allocate
1588  * the histogram cells we have available.
1589  * At this point, histo_cells_target is the approximate target number
1590  * of cells.
1591  */
1592 
1593  /*
1594  * Some dimensions have basically a uniform distribution, we want
1595  * to allocate no cells to those dimensions, only to dimensions
1596  * that have some interesting differences in data distribution.
1597  * Here we count up the number of interesting dimensions
1598  */
1599  for ( d = 0; d < ndims; d++ )
1600  {
1601  if ( sample_distribution[d] > 0 )
1602  histo_ndims++;
1603  }
1604 
1605  if ( histo_ndims == 0 )
1606  {
1607  /* Special case: all our dimensions had low variability! */
1608  /* We just divide the cells up evenly */
1609  POSTGIS_DEBUG(3, " special case: no axes have variability");
1610  histo_cells_new = 1;
1611  for ( d = 0; d < ndims; d++ )
1612  {
1613  histo_size[d] = 1 + (int)pow((double)histo_cells_target, 1/(double)ndims);
1614  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1615  histo_cells_new *= histo_size[d];
1616  }
1617  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1618  }
1619  else
1620  {
1621  /*
1622  * We're going to express the amount of variability in each dimension
1623  * as a proportion of the total variability and allocate cells in that
1624  * dimension relative to that proportion.
1625  */
1626  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1627  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1628  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1629  histo_cells_new = 1; /* For the number of cells in the final histogram */
1630  for ( d = 0; d < ndims; d++ )
1631  {
1632  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1633  {
1634  histo_size[d] = 1;
1635  }
1636  else /* Interesting dimension */
1637  {
1638  /* How does this dims variability compare to the total? */
1639  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1640  /*
1641  * Scale the target cells number by the # of dims and ratio,
1642  * then take the appropriate root to get the estimated number of cells
1643  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1644  */
1645  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1646  /* If something goes awry, just give this dim one slot */
1647  if ( ! histo_size[d] )
1648  histo_size[d] = 1;
1649  }
1650  histo_cells_new *= histo_size[d];
1651  }
1652  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1653  }
1654 
1655  /* Update histo_cells to the actual number of cells we need to allocate */
1656  histo_cells = histo_cells_new;
1657  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1658 
1659  /*
1660  * Create the histogram (ND_STATS) in the stats memory context
1661  */
1662  old_context = MemoryContextSwitchTo(stats->anl_context);
1663  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1664  nd_stats = palloc(nd_stats_size);
1665  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1666  MemoryContextSwitchTo(old_context);
1667 
1668  /* Initialize the #ND_STATS objects */
1669  nd_stats->ndims = ndims;
1670  nd_stats->extent = histo_extent;
1671  nd_stats->sample_features = sample_rows;
1672  nd_stats->table_features = total_rows;
1673  nd_stats->not_null_features = notnull_cnt;
1674  /* Copy in the histogram dimensions */
1675  for ( d = 0; d < ndims; d++ )
1676  nd_stats->size[d] = histo_size[d];
1677 
1678  /*
1679  * Fourth scan:
1680  * o fill histogram values with the proportion of
1681  * features' bbox overlaps: a feature's bvol
1682  * can fully overlap (1) or partially overlap
1683  * (fraction of 1) an histogram cell.
1684  *
1685  * Note that we are filling each cell with the "portion of
1686  * the feature's box that overlaps the cell". So, if we sum
1687  * up the values in the histogram, we could get the
1688  * histogram feature count.
1689  *
1690  */
1691  for ( i = 0; i < notnull_cnt; i++ )
1692  {
1693  const ND_BOX *nd_box;
1694  ND_IBOX nd_ibox;
1695  int at[ND_DIMS];
1696  int d;
1697  double num_cells = 0;
1698  double tmp_volume = 1.0;
1699  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1700  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1701  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1702 
1703  nd_box = sample_boxes[i];
1704  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1705 
1706  /* Give backend a chance of interrupting us */
1707  vacuum_delay_point();
1708 
1709  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1710  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1711  memset(at, 0, sizeof(int)*ND_DIMS);
1712 
1713  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1714  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1715  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1716 
1717  for ( d = 0; d < nd_stats->ndims; d++ )
1718  {
1719  /* Initialize the starting values */
1720  at[d] = nd_ibox.min[d];
1721  min[d] = nd_stats->extent.min[d];
1722  max[d] = nd_stats->extent.max[d];
1723  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1724 
1725  /* What's the volume (area) of this feature's box? */
1726  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1727  }
1728 
1729  /* Add feature volume (area) to our total */
1730  total_sample_volume += tmp_volume;
1731 
1732  /*
1733  * Move through all the overlaped histogram cells values and
1734  * add the box overlap proportion to them.
1735  */
1736  do
1737  {
1738  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1739  double ratio;
1740  /* Create a box for this histogram cell */
1741  for ( d = 0; d < nd_stats->ndims; d++ )
1742  {
1743  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1744  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1745  }
1746 
1747  /*
1748  * If a feature box is completely inside one cell the ratio will be
1749  * 1.0. If a feature box is 50% in two cells, each cell will get
1750  * 0.5 added on.
1751  */
1752  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1753  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1754  num_cells += ratio;
1755  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1756  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1757  }
1758  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1759 
1760  /* Keep track of overall number of overlaps counted */
1761  total_cell_count += num_cells;
1762  /* How many features have we added to this histogram? */
1763  histogram_features++;
1764  }
1765 
1766  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1767  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1768  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1769 
1770  /* Error out if we got no sample information */
1771  if ( ! histogram_features )
1772  {
1773  POSTGIS_DEBUG(3, " no stats have been gathered");
1774  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1775  stats->stats_valid = false;
1776  return;
1777  }
1778 
1779  nd_stats->histogram_features = histogram_features;
1780  nd_stats->histogram_cells = histo_cells;
1781  nd_stats->cells_covered = total_cell_count;
1782 
1783  /* Put this histogram data into the right slot/kind */
1784  if ( mode == 2 )
1785  {
1786  stats_slot = STATISTIC_SLOT_2D;
1787  stats_kind = STATISTIC_KIND_2D;
1788  }
1789  else
1790  {
1791  stats_slot = STATISTIC_SLOT_ND;
1792  stats_kind = STATISTIC_KIND_ND;
1793  }
1794 
1795  /* Write the statistics data */
1796  stats->stakind[stats_slot] = stats_kind;
1797  stats->staop[stats_slot] = InvalidOid;
1798  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1799  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1800  stats->stanullfrac = (float4)null_cnt/sample_rows;
1801  stats->stawidth = total_width/notnull_cnt;
1802  stats->stadistinct = -1.0;
1803  stats->stats_valid = true;
1804 
1805  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1806  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1807  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1808  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1809  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1810  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1811  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1812  /*
1813  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1814  */
1815 
1816  return;
1817 }
1818 
1819 
1837 static void
1838 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1839  int sample_rows, double total_rows)
1840 {
1841  /* 2D Mode */
1842  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1843  /* ND Mode */
1844  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1845 }
1846 
1847 
1876 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1877 {
1878  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1879  Form_pg_attribute attr = stats->attr;
1880 
1881  POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1882 
1883  /* If the attstattarget column is negative, use the default value */
1884  /* NB: it is okay to scribble on stats->attr since it's a copy */
1885  if (attr->attstattarget < 0)
1886  attr->attstattarget = default_statistics_target;
1887 
1888  POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1889 
1890  /* Setup the minimum rows and the algorithm function */
1891  stats->minrows = 300 * stats->attr->attstattarget;
1892  stats->compute_stats = compute_gserialized_stats;
1893 
1894  POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1895 
1896  /* Indicate we are done successfully */
1897  PG_RETURN_BOOL(true);
1898 }
1899 
1912 static float8
1913 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1914 {
1915  int d; /* counter */
1916  float8 selectivity;
1917  ND_BOX nd_box;
1918  ND_IBOX nd_ibox;
1919  int at[ND_DIMS];
1920  double cell_size[ND_DIMS];
1921  double min[ND_DIMS];
1922  double max[ND_DIMS];
1923  double total_count = 0.0;
1924  int ndims_max;
1925 
1926  /* Calculate the overlap of the box on the histogram */
1927  if ( ! nd_stats )
1928  {
1929  elog(NOTICE, " estimate_selectivity called with null input");
1930  return FALLBACK_ND_SEL;
1931  }
1932 
1933  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1934 
1935  /* Initialize nd_box. */
1936  nd_box_from_gbox(box, &nd_box);
1937 
1938  /*
1939  * To return 2D stats on an ND sample, we need to make the
1940  * 2D box cover the full range of the other dimensions in the
1941  * histogram.
1942  */
1943  POSTGIS_DEBUGF(3, " mode: %d", mode);
1944  if ( mode == 2 )
1945  {
1946  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1947  ndims_max = 2;
1948  }
1949 
1950  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1951  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1952 
1953  /*
1954  * Search box completely misses histogram extent?
1955  * We have to intersect in all N dimensions or else we have
1956  * zero interaction under the &&& operator. It's important
1957  * to short circuit in this case, as some of the tests below
1958  * will return junk results when run on non-intersecting inputs.
1959  */
1960  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1961  {
1962  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1963  return 0.0;
1964  }
1965 
1966  /* Search box completely contains histogram extent! */
1967  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1968  {
1969  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1970  return 1.0;
1971  }
1972 
1973  /* Calculate the overlap of the box on the histogram */
1974  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1975  {
1976  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1977  return FALLBACK_ND_SEL;
1978  }
1979 
1980  /* Work out some measurements of the histogram */
1981  for ( d = 0; d < nd_stats->ndims; d++ )
1982  {
1983  /* Cell size in each dim */
1984  min[d] = nd_stats->extent.min[d];
1985  max[d] = nd_stats->extent.max[d];
1986  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
1987  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
1988 
1989  /* Initialize the counter */
1990  at[d] = nd_ibox.min[d];
1991  }
1992 
1993  /* Move through all the overlap values and sum them */
1994  do
1995  {
1996  float cell_count, ratio;
1997  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1998 
1999  /* We have to pro-rate partially overlapped cells. */
2000  for ( d = 0; d < nd_stats->ndims; d++ )
2001  {
2002  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2003  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2004  }
2005 
2006  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2007  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2008 
2009  /* Add the pro-rated count for this cell to the overall total */
2010  total_count += cell_count * ratio;
2011  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2012  }
2013  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2014 
2015  /* Scale by the number of features in our histogram to get the proportion */
2016  selectivity = total_count / nd_stats->histogram_features;
2017 
2018  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2019  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2020  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2021  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2022 
2023  /* Prevent rounding overflows */
2024  if (selectivity > 1.0) selectivity = 1.0;
2025  else if (selectivity < 0.0) selectivity = 0.0;
2026 
2027  return selectivity;
2028 }
2029 
2030 
2031 
2037 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2038 {
2039  Oid table_oid = PG_GETARG_OID(0);
2040  text *att_text = PG_GETARG_TEXT_P(1);
2041  ND_STATS *nd_stats;
2042  char *str;
2043  text *json;
2044  int mode = 2; /* default to 2D mode */
2045  bool only_parent = false; /* default to whole tree stats */
2046 
2047  /* Check if we've been asked to not use 2d mode */
2048  if ( ! PG_ARGISNULL(2) )
2049  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2050 
2051  /* Check if we've been asked to only use stats from parent */
2052  if ( ! PG_ARGISNULL(3) )
2053  only_parent = PG_GETARG_BOOL(3);
2054 
2055  /* Retrieve the stats object */
2056  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2057  if ( ! nd_stats )
2058  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2059 
2060  /* Convert to JSON */
2061  str = nd_stats_to_json(nd_stats);
2062  json = cstring_to_text(str);
2063  pfree(str);
2064  pfree(nd_stats);
2065  PG_RETURN_TEXT_P(json);
2066 }
2067 
2068 
2074 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2075 {
2076  Oid table_oid = PG_GETARG_OID(0);
2077  text *att_text = PG_GETARG_TEXT_P(1);
2078  Datum geom_datum = PG_GETARG_DATUM(2);
2079  GBOX gbox; /* search box read from gserialized datum */
2080  float8 selectivity = 0;
2081  ND_STATS *nd_stats;
2082  int mode = 2; /* 2D mode by default */
2083 
2084  /* Check if we've been asked to not use 2d mode */
2085  if ( ! PG_ARGISNULL(3) )
2086  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2087 
2088  /* Retrieve the stats object */
2089  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2090 
2091  if ( ! nd_stats )
2092  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2093 
2094  /* Calculate the gbox */
2095  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2096  elog(ERROR, "unable to calculate bounding box from geometry");
2097 
2098  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2099 
2100  /* Do the estimation */
2101  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2102 
2103  pfree(nd_stats);
2104  PG_RETURN_FLOAT8(selectivity);
2105 }
2106 
2107 
2113 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2114 {
2115  Oid table_oid1 = PG_GETARG_OID(0);
2116  text *att_text1 = PG_GETARG_TEXT_P(1);
2117  Oid table_oid2 = PG_GETARG_OID(2);
2118  text *att_text2 = PG_GETARG_TEXT_P(3);
2119  ND_STATS *nd_stats1, *nd_stats2;
2120  float8 selectivity = 0;
2121  int mode = 2; /* 2D mode by default */
2122 
2123 
2124  /* Retrieve the stats object */
2125  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2126  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2127 
2128  if ( ! nd_stats1 )
2129  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2130 
2131  if ( ! nd_stats2 )
2132  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2133 
2134  /* Check if we've been asked to not use 2d mode */
2135  if ( ! PG_ARGISNULL(4) )
2136  {
2137  text *modetxt = PG_GETARG_TEXT_P(4);
2138  char *modestr = text_to_cstring(modetxt);
2139  if ( modestr[0] == 'N' )
2140  mode = 0;
2141  }
2142 
2143  /* Do the estimation */
2144  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2145 
2146  pfree(nd_stats1);
2147  pfree(nd_stats2);
2148  PG_RETURN_FLOAT8(selectivity);
2149 }
2150 
2156 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2157 {
2158  PG_RETURN_DATUM(DirectFunctionCall5(
2160  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2161  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2162  Int32GetDatum(2) /* 2-D mode */
2163  ));
2164 }
2165 
2171 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2172 {
2173  PG_RETURN_DATUM(DirectFunctionCall5(
2175  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2176  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2177  Int32GetDatum(0) /* N-D mode */
2178  ));
2179 }
2180 
2195 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2196 {
2197  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2198  /* Oid operator_oid = PG_GETARG_OID(1); */
2199  List *args = (List *) PG_GETARG_POINTER(2);
2200  /* int varRelid = PG_GETARG_INT32(3); */
2201  int mode = PG_GETARG_INT32(4);
2202 
2203  VariableStatData vardata;
2204  ND_STATS *nd_stats = NULL;
2205 
2206  Node *other;
2207  Var *self;
2208  GBOX search_box;
2209  float8 selectivity = 0;
2210 
2211  POSTGIS_DEBUG(2, "gserialized_gist_sel called");
2212 
2213  /*
2214  * TODO: This is a big one,
2215  * All this statistics code *only* tries to generate a valid
2216  * selectivity for && and &&&. That leaves all the other
2217  * geometry operators with bad stats! The selectivity
2218  * calculation should take account of the incoming operator
2219  * type and do the right thing.
2220  */
2221 
2222  /* Fail if not a binary opclause (probably shouldn't happen) */
2223  if (list_length(args) != 2)
2224  {
2225  POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
2226  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2227  }
2228 
2229  /* Find the constant part */
2230  other = (Node *) linitial(args);
2231  if ( ! IsA(other, Const) )
2232  {
2233  self = (Var *)other;
2234  other = (Node *) lsecond(args);
2235  }
2236  else
2237  {
2238  self = (Var *) lsecond(args);
2239  }
2240 
2241  if ( ! IsA(other, Const) )
2242  {
2243  POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
2244  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2245  }
2246 
2247  /* Convert the constant to a BOX */
2248  if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2249  {
2250  POSTGIS_DEBUG(3, "search box is EMPTY");
2251  PG_RETURN_FLOAT8(0.0);
2252  }
2253  POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
2254 
2255  /* Get pg_statistic row */
2256  examine_variable(root, (Node*)self, 0, &vardata);
2257  if ( vardata.statsTuple ) {
2258  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2259  }
2260  ReleaseVariableStats(vardata);
2261 
2262  if ( ! nd_stats )
2263  {
2264  POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
2265  PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
2266  }
2267 
2268  POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
2269 
2270  /* Do the estimation! */
2271  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2272  POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
2273 
2274  pfree(nd_stats);
2275  PG_RETURN_FLOAT8(selectivity);
2276 }
2277 
2278 
2279 
2286 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2287 {
2288  char *nsp = NULL;
2289  char *tbl = NULL;
2290  text *col = NULL;
2291  char *nsp_tbl = NULL;
2292  Oid tbl_oid, idx_oid;
2293  ND_STATS *nd_stats;
2294  GBOX *gbox = NULL;
2295  bool only_parent = false;
2296  int key_type;
2297 
2298  if ( PG_NARGS() == 4 )
2299  {
2300  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2301  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2302  col = PG_GETARG_TEXT_P(2);
2303  only_parent = PG_GETARG_BOOL(3);
2304  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2305  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2306  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2307  pfree(nsp_tbl);
2308  }
2309  else if ( PG_NARGS() == 3 )
2310  {
2311  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2312  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2313  col = PG_GETARG_TEXT_P(2);
2314  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2315  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2316  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2317  pfree(nsp_tbl);
2318  }
2319  else if ( PG_NARGS() == 2 )
2320  {
2321  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2322  col = PG_GETARG_TEXT_P(1);
2323  nsp_tbl = palloc(strlen(tbl) + 3);
2324  sprintf(nsp_tbl, "\"%s\"", tbl);
2325  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2326  pfree(nsp_tbl);
2327  }
2328  else
2329  {
2330  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2331  PG_RETURN_NULL();
2332  }
2333 
2334 #if 1
2335  /* Read the extent from the head of the spatial index, if there is one */
2336  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2337  if (!idx_oid)
2338  elog(DEBUG2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2339  gbox = spatial_index_read_extent(idx_oid, key_type);
2340 #endif
2341 
2342  /* Fall back to reading the stats, if no index answer */
2343  if (!gbox)
2344  {
2345  /* Estimated extent only returns 2D bounds, so use mode 2 */
2346  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2347 
2348  /* Error out on no stats */
2349  if ( ! nd_stats ) {
2350  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2351  PG_RETURN_NULL();
2352  }
2353 
2354  /* Construct the box */
2355  gbox = palloc(sizeof(GBOX));
2356  FLAGS_SET_GEODETIC(gbox->flags, 0);
2357  FLAGS_SET_Z(gbox->flags, 0);
2358  FLAGS_SET_M(gbox->flags, 0);
2359  gbox->xmin = nd_stats->extent.min[0];
2360  gbox->xmax = nd_stats->extent.max[0];
2361  gbox->ymin = nd_stats->extent.min[1];
2362  gbox->ymax = nd_stats->extent.max[1];
2363  pfree(nd_stats);
2364  }
2365 
2366  PG_RETURN_POINTER(gbox);
2367 }
2368 
2376 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2377 {
2378  if ( PG_NARGS() == 3 )
2379  {
2380  PG_RETURN_DATUM(
2381  DirectFunctionCall3(gserialized_estimated_extent,
2382  PG_GETARG_DATUM(0),
2383  PG_GETARG_DATUM(1),
2384  PG_GETARG_DATUM(2)));
2385  }
2386  else if ( PG_NARGS() == 2 )
2387  {
2388  PG_RETURN_DATUM(
2389  DirectFunctionCall2(gserialized_estimated_extent,
2390  PG_GETARG_DATUM(0),
2391  PG_GETARG_DATUM(1)));
2392  }
2393 
2394  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2395  PG_RETURN_NULL();
2396 }
2397 
2398 /************************************************************************/
2399 
2400 static Oid
2401 typname_to_oid(const char *typname)
2402 {
2403  Oid typoid = TypenameGetTypid(typname);
2404  if (OidIsValid(typoid) && get_typisdefined(typoid))
2405  return typoid;
2406  else
2407  return InvalidOid;
2408 }
2409 
2410 static Oid
2411 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
2412 {
2413  Relation tbl_rel;
2414  ListCell *lc;
2415  List *idx_list;
2416  Oid result = InvalidOid;
2417  char *colname = text_to_cstring(col);
2418 
2419  /* Lookup our spatial index key types */
2420  Oid b2d_oid = typname_to_oid(INDEX_KEY_2D);
2421  Oid gdx_oid = typname_to_oid(INDEX_KEY_ND);
2422 
2423  if (!(b2d_oid && gdx_oid))
2424  return InvalidOid;
2425 
2426  tbl_rel = RelationIdGetRelation(tbl_oid);
2427  idx_list = RelationGetIndexList(tbl_rel);
2428  RelationClose(tbl_rel);
2429 
2430  /* For each index associated with this table... */
2431  foreach(lc, idx_list)
2432  {
2433  Form_pg_class idx_form;
2434  HeapTuple idx_tup;
2435  int idx_relam;
2436  Oid idx_oid = lfirst_oid(lc);
2437 
2438  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2439  if (!HeapTupleIsValid(idx_tup))
2440  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2441  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2442  idx_relam = idx_form->relam;
2443  ReleaseSysCache(idx_tup);
2444 
2445  /* Does the index use a GIST access method? */
2446  if (idx_relam == GIST_AM_OID)
2447  {
2448  Form_pg_attribute att;
2449  Oid atttypid;
2450  /* Is the index on the column name we are looking for? */
2451  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2452  ObjectIdGetDatum(idx_oid),
2453  PointerGetDatum(colname));
2454  if (!HeapTupleIsValid(att_tup))
2455  continue;
2456 
2457  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2458  atttypid = att->atttypid;
2459  ReleaseSysCache(att_tup);
2460 
2461  /* Is the column actually spatial? */
2462  if (b2d_oid == atttypid || gdx_oid == atttypid)
2463  {
2464  /* Save result, clean up, and break out */
2465  result = idx_oid;
2466  if (key_type)
2467  *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2468  break;
2469  }
2470  }
2471  }
2472  return result;
2473 }
2474 
2475 static GBOX *
2476 spatial_index_read_extent(Oid idx_oid, int key_type)
2477 {
2478  BOX2DF *bounds_2df = NULL;
2479  GIDX *bounds_gidx = NULL;
2480  GBOX *gbox = NULL;
2481  Relation idx_rel;
2482  Buffer buffer;
2483  Page page;
2484  OffsetNumber offset;
2485  unsigned long offset_max;
2486 
2487  if (!idx_oid)
2488  return NULL;
2489 
2490  idx_rel = index_open(idx_oid, AccessExclusiveLock);
2491  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2492  page = (Page) BufferGetPage(buffer);
2493  offset = FirstOffsetNumber;
2494  offset_max = PageGetMaxOffsetNumber(page);
2495  while (offset <= offset_max)
2496  {
2497  ItemId iid = PageGetItemId(page, offset);
2498  IndexTuple ituple;
2499  if (!iid)
2500  {
2501  ReleaseBuffer(buffer);
2502  index_close(idx_rel, AccessExclusiveLock);
2503  return NULL;
2504  }
2505  ituple = (IndexTuple) PageGetItem(page, iid);
2506  if (!GistTupleIsInvalid(ituple))
2507  {
2508  bool isnull;
2509  Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
2510  if (!isnull)
2511  {
2512  if (key_type == STATISTIC_SLOT_2D)
2513  {
2514  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2515  if (bounds_2df)
2516  box2df_merge(bounds_2df, b);
2517  else
2518  bounds_2df = box2df_copy(b);
2519  }
2520  else
2521  {
2522  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2523  if (bounds_gidx)
2524  gidx_merge(&bounds_gidx, b);
2525  else
2526  bounds_gidx = gidx_copy(b);
2527  }
2528  }
2529  }
2530  offset++;
2531  }
2532 
2533  ReleaseBuffer(buffer);
2534  index_close(idx_rel, AccessExclusiveLock);
2535 
2536  if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2537  {
2538  if (box2df_is_empty(bounds_2df))
2539  return NULL;
2540  gbox = gbox_new(0);
2541  box2df_to_gbox_p(bounds_2df, gbox);
2542  }
2543  else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2544  {
2545  if (gidx_is_unknown(bounds_gidx))
2546  return NULL;
2547  gbox = gbox_new(0);
2548  gbox_from_gidx(bounds_gidx, gbox, 0);
2549  }
2550  else
2551  return NULL;
2552 
2553  return gbox;
2554 }
2555 
2556 /*
2557 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2558  RETURNS box2d
2559  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2560  LANGUAGE 'c' STABLE STRICT;
2561 */
2562 
2564 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2565 {
2566  GBOX *gbox = NULL;
2567  int key_type;
2568  Oid tbl_oid = PG_GETARG_DATUM(0);
2569  text *col = PG_GETARG_TEXT_P(1);
2570 
2571  Oid idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2572  if (!idx_oid)
2573  PG_RETURN_NULL();
2574 
2575  gbox = spatial_index_read_extent(idx_oid, key_type);
2576  if (!gbox)
2577  PG_RETURN_NULL();
2578  else
2579  PG_RETURN_POINTER(gbox);
2580 }
2581 
args
Definition: ovdump.py:44
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
Definition: g_serialized.c:639
char * text_to_cstring(const text *textptr)
#define INDEX_KEY_ND
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is &#39;N&#39;.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
GBOX * gbox_new(uint8_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: g_box.c:39
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
#define DEFAULT_ND_JOINSEL
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:35
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: g_box.c:204
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we&#39;ll bother trying to compute statistics on.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: g_box.c:399
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:142
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string...
Definition: stringbuffer.c:160
double xmax
Definition: liblwgeom.h:295
static Oid typname_to_oid(const char *typname)
#define ND_DIMS
The maximum number of dimensions our code can handle.
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
#define FALLBACK_ND_JOINSEL
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
BOX2DF * box2df_copy(BOX2DF *b)
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:148
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
#define LW_FAILURE
Definition: liblwgeom.h:78
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided...
Definition: stringbuffer.c:253
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:145
Datum buffer(PG_FUNCTION_ARGS)
double zmax
Definition: liblwgeom.h:299
double ymin
Definition: liblwgeom.h:296
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension...
double xmin
Definition: liblwgeom.h:294
#define INDEX_KEY_2D
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
float4 size[ND_DIMS]
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
int min[ND_DIMS]
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
double ymax
Definition: liblwgeom.h:297
N-dimensional box index type.
#define FLAGS_GET_Z(flags)
Macros for manipulating the &#39;flags&#39; byte.
Definition: liblwgeom.h:139
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator...
uint8_t flags
Definition: liblwgeom.h:293
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.c:134
#define STATISTIC_KIND_2D
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:78
int max[ND_DIMS]
bool gidx_is_unknown(const GIDX *a)
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
float4 min[ND_DIMS]
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
double mmin
Definition: liblwgeom.h:300
#define SDFACTOR
double zmin
Definition: liblwgeom.h:298
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:140
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
double mmax
Definition: liblwgeom.h:301
#define STATISTIC_SLOT_ND
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
int value
Definition: genraster.py:61
N-dimensional statistics structure.
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
GIDX * gidx_copy(GIDX *b)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
bool box2df_is_empty(const BOX2DF *a)
This library is the generic geometry handling section of PostGIS.
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
#define STATISTIC_KIND_ND
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:146