PostGIS  3.7.0dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurrences of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the constant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #if PG_VERSION_NUM < 130000
70 #include "access/tuptoaster.h" /* For toast_raw_datum_size */
71 #else
72 #include "access/detoast.h" /* For toast_raw_datum_size */
73 #endif
74 #include "utils/datum.h"
75 #include "access/heapam.h"
76 #include "catalog/index.h"
77 #include "catalog/pg_am.h"
78 #include "miscadmin.h"
79 #include "storage/lmgr.h"
80 #include "catalog/namespace.h"
81 #include "catalog/indexing.h"
82 
83 #include "utils/regproc.h"
84 #include "utils/varlena.h"
85 
86 #include "utils/builtins.h"
87 #include "utils/datum.h"
88 #include "utils/snapmgr.h"
89 #include "utils/fmgroids.h"
90 #include "funcapi.h"
91 #include "access/heapam.h"
92 #include "catalog/pg_type.h"
93 #include "access/relscan.h"
94 
95 #include "executor/spi.h"
96 #include "fmgr.h"
97 #include "commands/vacuum.h"
98 #include "nodes/pathnodes.h"
99 
100 #include "parser/parsetree.h"
101 #include "utils/array.h"
102 #include "utils/lsyscache.h"
103 #include "utils/builtins.h"
104 #include "utils/syscache.h"
105 #include "utils/rel.h"
106 #include "utils/selfuncs.h"
107 
108 #include "../postgis_config.h"
109 
110 #include "access/htup_details.h"
111 
112 #include "stringbuffer.h"
113 #include "liblwgeom.h"
114 #include "lwgeodetic.h"
115 #include "lwgeom_pg.h" /* For debugging macros. */
116 #include "gserialized_gist.h" /* For index common functions */
117 
118 #include <math.h>
119 #if HAVE_IEEEFP_H
120 #include <ieeefp.h>
121 #endif
122 #include <float.h>
123 #include <string.h>
124 #include <stdio.h>
125 #include <ctype.h>
126 
127 
128 /************************************************************************/
129 
130 
131 /* Prototypes */
132 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
133 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
134 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
135 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
136 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
137 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
138 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
139 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
140 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
141 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
142 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
143 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
144 
145 /* Local prototypes */
146 static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum);
147 static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
148 
149 
150 /* Other prototypes */
151 float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
152 float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
153 
154 /* Old Prototype */
155 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
156 
157 /*
158  * Assign a number to the n-dimensional statistics kind
159  *
160  * tgl suggested:
161  *
162  * 1-100: reserved for assignment by the core Postgres project
163  * 100-199: reserved for assignment by PostGIS
164  * 200-9999: reserved for other globally-known stats kinds
165  * 10000-32767: reserved for private site-local use
166  */
167 #define STATISTIC_KIND_ND 102
168 #define STATISTIC_KIND_2D 103
169 
170 /*
171  * Postgres does not pin its slots and uses them as they come.
172  * We need to preserve its Correlation for brin to work
173  * 0 may be MCV
174  * 1 may be Histogram
175  * 2 may be Correlation
176  * We take 3 and 4.
177  */
178 #define STATISTIC_SLOT_ND 3
179 #define STATISTIC_SLOT_2D 4
180 
181 /*
182 * The SD factor restricts the side of the statistics histogram
183 * based on the standard deviation of the extent of the data.
184 * SDFACTOR is the number of standard deviations from the mean
185 * the histogram will extend.
186 */
187 #define SDFACTOR 3.25
188 
194 #define ND_DIMS 4
195 
202 #define MIN_DIMENSION_WIDTH 0.000000001
203 
208 #define MAX_DIMENSION_WIDTH 1.0E+20
209 
213 #define DEFAULT_ND_SEL 0.0001
214 #define DEFAULT_ND_JOINSEL 0.001
215 
219 #define FALLBACK_ND_SEL 0.2
220 #define FALLBACK_ND_JOINSEL 0.3
221 
227 typedef struct ND_BOX_T
228 {
229  float4 min[ND_DIMS];
230  float4 max[ND_DIMS];
232 
236 typedef struct ND_IBOX_T
237 {
238  int min[ND_DIMS];
239  int max[ND_DIMS];
241 
242 
249 typedef struct ND_STATS_T
250 {
251  /* Dimensionality of the histogram. */
252  float4 ndims;
253 
254  /* Size of n-d histogram in each dimension. */
255  float4 size[ND_DIMS];
256 
257  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
259 
260  /* How many rows in the table itself? */
262 
263  /* How many rows were in the sample that built this histogram? */
265 
266  /* How many not-Null/Empty features were in the sample? */
268 
269  /* How many features actually got sampled in the histogram? */
271 
272  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
274 
275  /* How many cells did those histogram features cover? */
276  /* Since we are pro-rating coverage, this number should */
277  /* now always equal histogram_features */
279 
280  /* Variable length # of floats for histogram */
281  float4 value[1];
283 
284 typedef struct {
285  /* Saved state from std_typanalyze() */
286  AnalyzeAttrComputeStatsFunc std_compute_stats;
289 
296 static int
297 gbox_ndims(const GBOX* gbox)
298 {
299  int dims = 2;
300  if ( FLAGS_GET_GEODETIC(gbox->flags) )
301  return 3;
302  if ( FLAGS_GET_Z(gbox->flags) )
303  dims++;
304  if ( FLAGS_GET_M(gbox->flags) )
305  dims++;
306  return dims;
307 }
308 
314 static int
315 text_p_get_mode(const text *txt)
316 {
317  int mode = 2;
318  char *modestr;
319  if (VARSIZE_ANY_EXHDR(txt) <= 0)
320  return mode;
321  modestr = (char*)VARDATA(txt);
322  if ( modestr[0] == 'N' )
323  mode = 0;
324  return mode;
325 }
326 
327 
331 static int
332 cmp_int (const void *a, const void *b)
333 {
334  int ia = *((const int*)a);
335  int ib = *((const int*)b);
336 
337  if ( ia == ib )
338  return 0;
339  else if ( ia > ib )
340  return 1;
341  else
342  return -1;
343 }
344 
349 // static int
350 // range_quintile(int *vals, int nvals)
351 // {
352 // qsort(vals, nvals, sizeof(int), cmp_int);
353 // return vals[4*nvals/5] - vals[nvals/5];
354 // }
355 
359 static int
360 range_full(int *vals, int nvals)
361 {
362  qsort(vals, nvals, sizeof(int), cmp_int);
363  return vals[nvals-1] - vals[0];
364 }
365 
369 static double
370 total_double(const double *vals, int nvals)
371 {
372  int i;
373  float total = 0;
374  /* Calculate total */
375  for ( i = 0; i < nvals; i++ )
376  total += vals[i];
377 
378  return total;
379 }
380 
381 #if POSTGIS_DEBUG_LEVEL >= 3
382 
386 static int
387 total_int(const int *vals, int nvals)
388 {
389  int i;
390  int total = 0;
391  /* Calculate total */
392  for ( i = 0; i < nvals; i++ )
393  total += vals[i];
394 
395  return total;
396 }
397 
401 static double
402 avg(const int *vals, int nvals)
403 {
404  int t = total_int(vals, nvals);
405  return (double)t / (double)nvals;
406 }
407 
411 static double
412 stddev(const int *vals, int nvals)
413 {
414  int i;
415  double sigma2 = 0;
416  double mean = avg(vals, nvals);
417 
418  /* Calculate sigma2 */
419  for ( i = 0; i < nvals; i++ )
420  {
421  double v = (double)(vals[i]);
422  sigma2 += (mean - v) * (mean - v);
423  }
424  return sqrt(sigma2 / nvals);
425 }
426 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
427 
432 static int
433 nd_stats_value_index(const ND_STATS *stats, int *indexes)
434 {
435  int d;
436  int accum = 1, vdx = 0;
437 
438  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
439  /* n-d histogram coordinate implies. */
440  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
441  for ( d = 0; d < (int)(stats->ndims); d++ )
442  {
443  int size = (int)(stats->size[d]);
444  if ( indexes[d] < 0 || indexes[d] >= size )
445  {
446  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
447  return -1;
448  }
449  vdx += indexes[d] * accum;
450  accum *= size;
451  }
452  return vdx;
453 }
454 
458 static char*
459 nd_box_to_json(const ND_BOX *nd_box, int ndims)
460 {
461  char *rv;
462  int i;
464 
465  stringbuffer_append(sb, "{\"min\":[");
466  for ( i = 0; i < ndims; i++ )
467  {
468  if ( i ) stringbuffer_append(sb, ",");
469  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
470  }
471  stringbuffer_append(sb, "],\"max\":[");
472  for ( i = 0; i < ndims; i++ )
473  {
474  if ( i ) stringbuffer_append(sb, ",");
475  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
476  }
477  stringbuffer_append(sb, "]}");
478 
481  return rv;
482 }
483 
484 
489 static char*
490 nd_stats_to_json(const ND_STATS *nd_stats)
491 {
492  char *json_extent, *str;
493  int d;
495  int ndims = (int)roundf(nd_stats->ndims);
496 
497  stringbuffer_append(sb, "{");
498  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
499 
500  /* Size */
501  stringbuffer_append(sb, "\"size\":[");
502  for ( d = 0; d < ndims; d++ )
503  {
504  if ( d ) stringbuffer_append(sb, ",");
505  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
506  }
507  stringbuffer_append(sb, "],");
508 
509  /* Extent */
510  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
511  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
512  pfree(json_extent);
513 
514  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
515  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
516  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
517  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
518  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
519  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
520  stringbuffer_append(sb, "}");
521 
524  return str;
525 }
526 
527 
533 static char*
535 {
536  char *rv;
537  int j, k;
538  int sizex = (int)roundf(stats->size[0]);
539  int sizey = (int)roundf(stats->size[1]);
541 
542  for ( k = 0; k < sizey; k++ )
543  {
544  for ( j = 0; j < sizex; j++ )
545  {
546  stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
547  }
548  stringbuffer_append(sb, "\n");
549  }
550 
553  return rv;
554 }
555 
556 
558 static int
559 nd_box_merge(const ND_BOX *source, ND_BOX *target)
560 {
561  int d;
562  for ( d = 0; d < ND_DIMS; d++ )
563  {
564  target->min[d] = Min(target->min[d], source->min[d]);
565  target->max[d] = Max(target->max[d], source->max[d]);
566  }
567  return true;
568 }
569 
571 static int
573 {
574  memset(a, 0, sizeof(ND_BOX));
575  return true;
576 }
577 
583 static int
585 {
586  int d;
587  for ( d = 0; d < ND_DIMS; d++ )
588  {
589  a->min[d] = FLT_MAX;
590  a->max[d] = -1 * FLT_MAX;
591  }
592  return true;
593 }
594 
596 static void
597 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
598 {
599  volatile int d = 0;
600  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
601 
602  nd_box_init(nd_box);
603  nd_box->min[d] = gbox->xmin;
604  nd_box->max[d] = gbox->xmax;
605  d++;
606  nd_box->min[d] = gbox->ymin;
607  nd_box->max[d] = gbox->ymax;
608  d++;
609  if ( FLAGS_GET_GEODETIC(gbox->flags) )
610  {
611  nd_box->min[d] = gbox->zmin;
612  nd_box->max[d] = gbox->zmax;
613  return;
614  }
615  if ( FLAGS_GET_Z(gbox->flags) )
616  {
617  nd_box->min[d] = gbox->zmin;
618  nd_box->max[d] = gbox->zmax;
619  d++;
620  }
621  if ( FLAGS_GET_M(gbox->flags) )
622  {
623  nd_box->min[d] = gbox->mmin;
624  nd_box->max[d] = gbox->mmax;
625  d++;
626  }
627  return;
628 }
629 
633 static int
634 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
635 {
636  int d;
637  for ( d = 0; d < ndims; d++ )
638  {
639  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
640  return false;
641  }
642  return true;
643 }
644 
648 static int
649 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
650 {
651  int d;
652  for ( d = 0; d < ndims; d++ )
653  {
654  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
655  return false;
656  }
657  return true;
658 }
659 
664 static int
665 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
666 {
667  int d;
668  double size;
669  for ( d = 0; d < ND_DIMS; d++ )
670  {
671  size = nd_box->max[d] - nd_box->min[d];
672  /* Avoid expanding boxes that are either too wide or too narrow*/
673  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
674  continue;
675  nd_box->min[d] -= size * expansion_factor / 2;
676  nd_box->max[d] += size * expansion_factor / 2;
677  }
678  return true;
679 }
680 
685 static inline int
686 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
687 {
688  int d;
689 
690  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
691 
692  /* Initialize ibox */
693  memset(nd_ibox, 0, sizeof(ND_IBOX));
694 
695  /* In each dimension... */
696  for ( d = 0; d < nd_stats->ndims; d++ )
697  {
698  double smin = nd_stats->extent.min[d];
699  double smax = nd_stats->extent.max[d];
700  double width = smax - smin;
701 
702  if (width < MIN_DIMENSION_WIDTH)
703  {
704  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
705  }
706  else
707  {
708  int size = (int)roundf(nd_stats->size[d]);
709 
710  /* ... find cells the box overlaps with in this dimension */
711  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
712  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
713 
714  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
715  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
716 
717  /* Push any out-of range values into range */
718  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
719  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
720  }
721  }
722  return true;
723 }
724 
728 static inline double
729 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
730 {
731  int d;
732  bool covered = true;
733  double ivol = 1.0;
734  double vol2 = 1.0;
735 
736  for ( d = 0 ; d < ndims; d++ )
737  {
738  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
739  return 0.0; /* Disjoint */
740 
741  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
742  covered = false;
743  }
744 
745  if ( covered )
746  return 1.0;
747 
748  for ( d = 0; d < ndims; d++ )
749  {
750  double width2 = b2->max[d] - b2->min[d];
751  double imin, imax, iwidth;
752 
753  vol2 *= width2;
754 
755  imin = Max(b1->min[d], b2->min[d]);
756  imax = Min(b1->max[d], b2->max[d]);
757  iwidth = imax - imin;
758  iwidth = Max(0.0, iwidth);
759 
760  ivol *= iwidth;
761  }
762 
763  if ( vol2 == 0.0 )
764  return vol2;
765 
766  return ivol / vol2;
767 }
768 
769 /* How many bins shall we use in figuring out the distribution? */
770 #define MAX_NUM_BINS 50
771 #define BIN_MIN_SIZE 10
772 
788 static int
789 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
790 {
791  int d, i, k, range;
792  int *counts;
793  double smin, smax; /* Spatial min, spatial max */
794  double swidth; /* Spatial width of dimension */
795 #if POSTGIS_DEBUG_LEVEL >= 3
796  double average, sdev, sdev_ratio;
797 #endif
798  int bmin, bmax; /* Bin min, bin max */
799  const ND_BOX *ndb;
800 
801  int num_bins = Min(Max(2, num_boxes/BIN_MIN_SIZE), MAX_NUM_BINS);
802  counts = palloc0(num_bins * sizeof(int));
803 
804  /* For each dimension... */
805  for ( d = 0; d < ndims; d++ )
806  {
807  /* Initialize counts for this dimension */
808  memset(counts, 0, num_bins * sizeof(int));
809 
810 
811  smin = extent->min[d];
812  smax = extent->max[d];
813  swidth = smax - smin;
814 
815  /* Don't try and calculate distribution of overly narrow */
816  /* or overly wide dimensions. Here we're being pretty geographical, */
817  /* expecting "normal" planar or geographic coordinates. */
818  /* Otherwise we have to "handle" +/- Inf bounded features and */
819  /* the assumptions needed for that are as bad as this hack. */
820  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
821  {
822  distribution[d] = 0;
823  continue;
824  }
825 
826  /* Sum up the overlaps of each feature with the dimensional bins */
827  for ( i = 0; i < num_boxes; i++ )
828  {
829  double minoffset, maxoffset;
830 
831  /* Skip null entries */
832  ndb = nd_boxes[i];
833  if ( ! ndb ) continue;
834 
835  /* Where does box fall relative to the working range */
836  minoffset = ndb->min[d] - smin;
837  maxoffset = ndb->max[d] - smin;
838 
839  /* Skip boxes that our outside our working range */
840  if ( minoffset < 0 || minoffset > swidth ||
841  maxoffset < 0 || maxoffset > swidth )
842  {
843  continue;
844  }
845 
846  /* What bins does this range correspond to? */
847  bmin = floor(num_bins * minoffset / swidth);
848  bmax = floor(num_bins * maxoffset / swidth);
849 
850  /* Should only happen when maxoffset==swidth */
851  if (bmax >= num_bins)
852  bmax = num_bins-1;
853 
854  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
855 
856  /* Increment the counts in all the bins this feature overlaps */
857  for ( k = bmin; k <= bmax; k++ )
858  {
859  counts[k] += 1;
860  }
861 
862  }
863 
864  /* How dispersed is the distribution of features across bins? */
865  // range = range_quintile(counts, num_bins);
866  range = range_full(counts, num_bins);
867 
868 #if POSTGIS_DEBUG_LEVEL >= 3
869  average = avg(counts, num_bins);
870  sdev = stddev(counts, num_bins);
871  sdev_ratio = sdev/average;
872 
873  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
874  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
875  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
876  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
877 #endif
878 
879  distribution[d] = range;
880  }
881 
882  pfree(counts);
883 
884  return true;
885 }
886 
892 static inline int
893 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
894 {
895  int d = 0;
896 
897  while ( d < ndims )
898  {
899  if ( counter[d] < ibox->max[d] )
900  {
901  counter[d] += 1;
902  break;
903  }
904  counter[d] = ibox->min[d];
905  d++;
906  }
907  /* That's it, cannot increment any more! */
908  if ( d == ndims )
909  return false;
910 
911  /* Increment complete! */
912  return true;
913 }
914 
915 static ND_STATS*
916 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
917 {
918  int stats_kind = STATISTIC_KIND_ND;
919  int rv;
920  ND_STATS *nd_stats;
921 
922  /* If we're in 2D mode, set the kind appropriately */
923  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
924 
925  /* Then read the geom status histogram from that */
926  {
927  AttStatsSlot sslot;
928  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
929  ATTSTATSSLOT_NUMBERS);
930  if ( ! rv ) {
931  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
932  return NULL;
933  }
934 
935  /* Clone the stats here so we can release the attstatsslot immediately */
936  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
937  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
938 
939  free_attstatsslot(&sslot);
940  }
941  return nd_stats;
942 }
943 
948 static ND_STATS*
949 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
950 {
951  HeapTuple stats_tuple = NULL;
952  ND_STATS *nd_stats;
953 
954  /* First pull the stats tuple for the whole tree */
955  if ( ! only_parent )
956  {
957  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
958  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
959  if ( stats_tuple )
960  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
961  }
962  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
963  if ( only_parent || ! stats_tuple )
964  {
965  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
966  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
967  if ( stats_tuple )
968  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
969  }
970  if ( ! stats_tuple )
971  {
972  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
973  return NULL;
974  }
975 
976  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
977  ReleaseSysCache(stats_tuple);
978  if ( ! nd_stats )
979  {
980  POSTGIS_DEBUGF(2,
981  "histogram for attribute %d of table \"%s\" does not exist?",
982  att_num, get_rel_name(table_oid));
983  }
984 
985  return nd_stats;
986 }
987 
996 static ND_STATS*
997 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
998 {
999  const char *att_name = text_to_cstring(att_text);
1000  AttrNumber att_num;
1001 
1002  /* We know the name? Look up the num */
1003  if ( att_text )
1004  {
1005  /* Get the attribute number */
1006  att_num = get_attnum(table_oid, att_name);
1007  if ( ! att_num ) {
1008  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1009  return NULL;
1010  }
1011  }
1012  else
1013  {
1014  elog(ERROR, "attribute name is null");
1015  return NULL;
1016  }
1017 
1018  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1019 }
1020 
1034 static float8
1036 {
1037  int ncells1, ncells2;
1038  int ndims1, ndims2, ndims;
1039  double ntuples_max;
1040  double ntuples_not_null1, ntuples_not_null2;
1041 
1042  ND_BOX extent1, extent2;
1043  ND_IBOX ibox1, ibox2;
1044  int at1[ND_DIMS];
1045  int at2[ND_DIMS];
1046  double min1[ND_DIMS];
1047  double width1[ND_DIMS];
1048  double cellsize1[ND_DIMS];
1049  int size2[ND_DIMS];
1050  double min2[ND_DIMS];
1051  double width2[ND_DIMS];
1052  double cellsize2[ND_DIMS];
1053  int size1[ND_DIMS];
1054  int d;
1055  double val = 0;
1056  float8 selectivity;
1057 
1058  /* Drop out on null inputs */
1059  if ( ! ( s1 && s2 ) )
1060  {
1061  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1062  return FALLBACK_ND_SEL;
1063  }
1064 
1065  /* We need to know how many cells each side has... */
1066  ncells1 = (int)roundf(s1->histogram_cells);
1067  ncells2 = (int)roundf(s2->histogram_cells);
1068 
1069  /* ...so that we can drive the summation loop with the smaller histogram. */
1070  if ( ncells1 > ncells2 )
1071  {
1072  const ND_STATS *stats_tmp = s1;
1073  s1 = s2;
1074  s2 = stats_tmp;
1075  }
1076 
1077  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1078  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1079 
1080  /* Re-read that info after the swap */
1081  ncells1 = (int)roundf(s1->histogram_cells);
1082  ncells2 = (int)roundf(s2->histogram_cells);
1083 
1084  /* Q: What's the largest possible join size these relations can create? */
1085  /* A: The product of the # of non-null rows in each relation. */
1086  ntuples_not_null1 = s1->table_features * ((double)s1->not_null_features / s1->sample_features);
1087  ntuples_not_null2 = s2->table_features * ((double)s2->not_null_features / s2->sample_features);
1088  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1089 
1090  /* Get the ndims as ints */
1091  ndims1 = (int)roundf(s1->ndims);
1092  ndims2 = (int)roundf(s2->ndims);
1093  ndims = Max(ndims1, ndims2);
1094 
1095  /* Get the extents */
1096  extent1 = s1->extent;
1097  extent2 = s2->extent;
1098 
1099  /* If relation stats do not intersect, join is very very selective. */
1100  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1101  {
1102  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1103  PG_RETURN_FLOAT8(0.0);
1104  }
1105 
1106  /*
1107  * First find the index range of the part of the smaller
1108  * histogram that overlaps the larger one.
1109  */
1110  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1111  {
1112  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1113  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1114  }
1115 
1116  /* Initialize counters / constants on s1 */
1117  for ( d = 0; d < ndims1; d++ )
1118  {
1119  at1[d] = ibox1.min[d];
1120  min1[d] = s1->extent.min[d];
1121  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1122  size1[d] = (int)roundf(s1->size[d]);
1123  cellsize1[d] = width1[d] / size1[d];
1124  }
1125 
1126  /* Initialize counters / constants on s2 */
1127  for ( d = 0; d < ndims2; d++ )
1128  {
1129  min2[d] = s2->extent.min[d];
1130  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1131  size2[d] = (int)roundf(s2->size[d]);
1132  cellsize2[d] = width2[d] / size2[d];
1133  }
1134 
1135  /* For each affected cell of s1... */
1136  do
1137  {
1138  double val1;
1139  /* Construct the bounds of this cell */
1140  ND_BOX nd_cell1;
1141  nd_box_init(&nd_cell1);
1142  for ( d = 0; d < ndims1; d++ )
1143  {
1144  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1145  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1146  }
1147 
1148  /* Find the cells of s2 that cell1 overlaps.. */
1149  nd_box_overlap(s2, &nd_cell1, &ibox2);
1150 
1151  /* Initialize counter */
1152  for ( d = 0; d < ndims2; d++ )
1153  {
1154  at2[d] = ibox2.min[d];
1155  }
1156 
1157  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1158 
1159  /* Get the value at this cell */
1160  val1 = s1->value[nd_stats_value_index(s1, at1)];
1161 
1162  /* For each overlapped cell of s2... */
1163  do
1164  {
1165  double ratio2;
1166  double val2;
1167 
1168  /* Construct the bounds of this cell */
1169  ND_BOX nd_cell2;
1170  nd_box_init(&nd_cell2);
1171  for ( d = 0; d < ndims2; d++ )
1172  {
1173  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1174  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1175  }
1176 
1177  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1178 
1179  /* Calculate overlap ratio of the cells */
1180  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1181 
1182  /* Multiply the cell counts, scaled by overlap ratio */
1183  val2 = s2->value[nd_stats_value_index(s2, at2)];
1184  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1185  val += val1 * (val2 * ratio2);
1186  }
1187  while ( nd_increment(&ibox2, ndims2, at2) );
1188 
1189  }
1190  while( nd_increment(&ibox1, ndims1, at1) );
1191 
1192  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1193 
1194  /*
1195  * In order to compare our total cell count "val" to the
1196  * ntuples_max, we need to scale val up to reflect a full
1197  * table estimate. So, multiply by ratio of table size to
1198  * sample size.
1199  */
1200  val *= (s1->table_features / s1->sample_features);
1201  val *= (s2->table_features / s2->sample_features);
1202 
1203  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1204 
1205  /*
1206  * Because the cell counts are over-determined due to
1207  * double counting of features that overlap multiple cells
1208  * (see the compute_gserialized_stats routine)
1209  * we also have to scale our cell count "val" *down*
1210  * to adjust for the double counting.
1211  */
1212 // val /= (s1->cells_covered / s1->histogram_features);
1213 // val /= (s2->cells_covered / s2->histogram_features);
1214 
1215  /*
1216  * Finally, the selectivity is the estimated number of
1217  * rows to be returned divided by the maximum possible
1218  * number of rows that can be returned.
1219  */
1220  selectivity = val / ntuples_max;
1221 
1222  /* Guard against over-estimates and crazy numbers :) */
1223  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1224  {
1225  selectivity = DEFAULT_ND_JOINSEL;
1226  }
1227  else if ( selectivity > 1.0 )
1228  {
1229  selectivity = 1.0;
1230  }
1231 
1232  return selectivity;
1233 }
1234 
1240 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1241 {
1242  PG_RETURN_DATUM(DirectFunctionCall5(
1244  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1245  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1246  Int32GetDatum(0) /* ND mode */
1247  ));
1248 }
1249 
1255 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1256 {
1257  PG_RETURN_DATUM(DirectFunctionCall5(
1259  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1260  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1261  Int32GetDatum(2) /* 2D mode */
1262  ));
1263 }
1264 
1265 double
1266 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1267 {
1268  float8 selectivity;
1269  Oid relid1, relid2;
1270  ND_STATS *stats1, *stats2;
1271  Node *arg1 = (Node*) linitial(args);
1272  Node *arg2 = (Node*) lsecond(args);
1273  Var *var1 = (Var*) arg1;
1274  Var *var2 = (Var*) arg2;
1275 
1276  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1277 
1278  /* We only do column joins right now, no functional joins */
1279  /* TODO: handle g1 && ST_Expand(g2) */
1280  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1281  {
1282  POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1283  return DEFAULT_ND_JOINSEL;
1284  }
1285 
1286  /* What are the Oids of our tables/relations? */
1287  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1288  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1289 
1290  /* Pull the stats from the stats system. */
1291  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1292  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1293 
1294  /* If we can't get stats, we have to stop here! */
1295  if (!stats1)
1296  {
1297  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1298  return DEFAULT_ND_JOINSEL;
1299  }
1300  else if (!stats2)
1301  {
1302  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1303  return DEFAULT_ND_JOINSEL;
1304  }
1305 
1306  selectivity = estimate_join_selectivity(stats1, stats2);
1307  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1308  pfree(stats1);
1309  pfree(stats2);
1310  return selectivity;
1311 }
1312 
1322 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1323 {
1324  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1325  /* Oid operator = PG_GETARG_OID(1); */
1326  List *args = (List *) PG_GETARG_POINTER(2);
1327  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1328  int mode = PG_GETARG_INT32(4);
1329 
1330  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1331 
1332  /* Check length of args and punt on > 2 */
1333  if (list_length(args) != 2)
1334  {
1335  POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1336  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1337  }
1338 
1339  /* Only respond to an inner join/unknown context join */
1340  if (jointype != JOIN_INNER)
1341  {
1342  POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1343  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1344  }
1345 
1346  PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1347 }
1348 
1367 static void
1368 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1369  int sample_rows, double total_rows, int mode)
1370 {
1371  MemoryContext old_context;
1372  int d, i; /* Counters */
1373  int notnull_cnt = 0; /* # not null rows in the sample */
1374  int null_cnt = 0; /* # null rows in the sample */
1375  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1376 
1377  ND_STATS *nd_stats; /* Our histogram */
1378  size_t nd_stats_size; /* Size to allocate */
1379 
1380  double total_width = 0; /* # of bytes used by sample */
1381  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1382 
1383  ND_BOX sum; /* Sum of extents of sample boxes */
1384  ND_BOX avg; /* Avg of extents of sample boxes */
1385  ND_BOX stddev; /* StdDev of extents of sample boxes */
1386 
1387  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1388  ND_BOX sample_extent; /* Extent of the raw sample */
1389  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1390  ND_BOX histo_extent; /* Spatial extent of the histogram */
1391  ND_BOX histo_extent_new; /* Temporary variable */
1392  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1393  int histo_cells; /* Number of cells in the histogram */
1394  int histo_cells_new = 1; /* Temporary variable */
1395 
1396  int ndims = 2; /* Dimensionality of the sample */
1397  int histo_ndims = 0; /* Dimensionality of the histogram */
1398  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1399  double total_distribution; /* Total of sample_distribution */
1400 
1401  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1402  int stats_kind; /* And this is what? (2D vs ND) */
1403 
1404  /* Initialize sum and stddev */
1405  nd_box_init(&sum);
1406  nd_box_init(&stddev);
1407  nd_box_init(&avg);
1408  nd_box_init(&histo_extent);
1409  nd_box_init(&histo_extent_new);
1410 
1411  /*
1412  * This is where gserialized_analyze_nd
1413  * should put its' custom parameters.
1414  */
1415  /* void *mystats = stats->extra_data; */
1416 
1417  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1418  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1419  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1420 
1421  /*
1422  * We might need less space, but don't think
1423  * its worth saving...
1424  */
1425  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1426 
1427  /*
1428  * First scan:
1429  * o read boxes
1430  * o find dimensionality of the sample
1431  * o find extent of the sample
1432  * o count null-infinite/not-null values
1433  * o compute total_width
1434  * o compute total features's box area (for avgFeatureArea)
1435  * o sum features box coordinates (for standard deviation)
1436  */
1437  for ( i = 0; i < sample_rows; i++ )
1438  {
1439  Datum datum;
1440  GBOX gbox = {0};
1441  ND_BOX *nd_box;
1442  bool is_null;
1443 
1444  datum = fetchfunc(stats, i, &is_null);
1445 
1446  /* Skip all NULLs. */
1447  if ( is_null )
1448  {
1449  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1450  null_cnt++;
1451  continue;
1452  }
1453 
1454  /* Read the bounds from the gserialized. */
1455  if (LW_FAILURE == gserialized_datum_get_gbox_p(datum, &gbox))
1456  {
1457  /* Skip empties too. */
1458  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1459  continue;
1460  }
1461 
1462  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1463  if ( mode == 2 )
1464  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1465 
1466  /* Check bounds for validity (finite and not NaN) */
1467  if ( ! gbox_is_valid(&gbox) )
1468  {
1469  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1470  continue;
1471  }
1472 
1473  /*
1474  * In N-D mode, set the ndims to the maximum dimensionality found
1475  * in the sample. Otherwise, leave at ndims == 2.
1476  */
1477  if ( mode != 2 )
1478  ndims = Max(gbox_ndims(&gbox), ndims);
1479 
1480  /* Convert gbox to n-d box */
1481  nd_box = palloc(sizeof(ND_BOX));
1482  nd_box_from_gbox(&gbox, nd_box);
1483 
1484  /* Cache n-d bounding box */
1485  sample_boxes[notnull_cnt] = nd_box;
1486 
1487  /* Initialize sample extent before merging first entry */
1488  if ( ! notnull_cnt )
1489  nd_box_init_bounds(&sample_extent);
1490 
1491  /* Add current sample to overall sample extent */
1492  nd_box_merge(nd_box, &sample_extent);
1493 
1494  /* How many bytes does this sample use? */
1495  total_width += toast_raw_datum_size(datum);
1496 
1497  /* Add bounds coordinates to sums for stddev calculation */
1498  for ( d = 0; d < ndims; d++ )
1499  {
1500  sum.min[d] += nd_box->min[d];
1501  sum.max[d] += nd_box->max[d];
1502  }
1503 
1504  /* Increment our "good feature" count */
1505  notnull_cnt++;
1506 
1507  /* Give backend a chance of interrupting us */
1508 #if POSTGIS_PGSQL_VERSION >= 180
1509  vacuum_delay_point(true);
1510 #else
1511  vacuum_delay_point();
1512 #endif
1513  }
1514 
1515  /*
1516  * We'll build a histogram having stats->attr->attstattarget
1517  * (default 100) cells on each side, within reason...
1518  * we'll use ndims*100000 as the maximum number of cells.
1519  * Also, if we're sampling a relatively small table, we'll try to ensure that
1520  * we have a smaller grid.
1521  */
1522 #if POSTGIS_PGSQL_VERSION >= 170
1523  histo_cells_target = (int)pow((double)(stats->attstattarget), (double)ndims);
1524  POSTGIS_DEBUGF(3, " stats->attstattarget: %d", stats->attstattarget);
1525 #else
1526  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1527  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1528 #endif
1529  histo_cells_target = Min(histo_cells_target, ndims * 100000);
1530  histo_cells_target = Min(histo_cells_target, (int)(10 * ndims * total_rows));
1531  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1532 
1533  /* If there's no useful features, we can't work out stats */
1534  if ( ! notnull_cnt )
1535  {
1536  stats->stats_valid = false;
1537  return;
1538  }
1539 
1540  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1541 
1542  /*
1543  * Second scan:
1544  * o compute standard deviation
1545  */
1546  for ( d = 0; d < ndims; d++ )
1547  {
1548  /* Calculate average bounds values */
1549  avg.min[d] = sum.min[d] / notnull_cnt;
1550  avg.max[d] = sum.max[d] / notnull_cnt;
1551 
1552  /* Calculate standard deviation for this dimension bounds */
1553  for ( i = 0; i < notnull_cnt; i++ )
1554  {
1555  const ND_BOX *ndb = sample_boxes[i];
1556  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1557  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1558  }
1559  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1560  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1561 
1562  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1563  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1564  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1565  }
1566 
1567  /*
1568  * Third scan:
1569  * o skip hard deviants
1570  * o compute new histogram box
1571  */
1572  nd_box_init_bounds(&histo_extent_new);
1573  for ( i = 0; i < notnull_cnt; i++ )
1574  {
1575  const ND_BOX *ndb = sample_boxes[i];
1576  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1577  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1578  {
1579  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1580  sample_boxes[i] = NULL;
1581  continue;
1582  }
1583  /* Expand our new box to fit all the other features. */
1584  nd_box_merge(ndb, &histo_extent_new);
1585  }
1586  /*
1587  * Expand the box slightly (1%) to avoid edge effects
1588  * with objects that are on the boundary
1589  */
1590  nd_box_expand(&histo_extent_new, 0.01);
1591  histo_extent = histo_extent_new;
1592 
1593  /*
1594  * How should we allocate our histogram cells to the
1595  * different dimensions? We can't do it by raw dimensional width,
1596  * because in x/y/z space, the z can have different units
1597  * from the x/y. Similarly for x/y/t space.
1598  * So, we instead calculate how much features overlap
1599  * each other in their dimension to figure out which
1600  * dimensions have useful selectivity characteristics (more
1601  * variability in density) and therefore would find
1602  * more cells useful (to distinguish between dense places and
1603  * homogeneous places).
1604  */
1605  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1606  sample_distribution);
1607 
1608  /*
1609  * The sample_distribution array now tells us how spread out the
1610  * data is in each dimension, so we use that data to allocate
1611  * the histogram cells we have available.
1612  * At this point, histo_cells_target is the approximate target number
1613  * of cells.
1614  */
1615 
1616  /*
1617  * Some dimensions have basically a uniform distribution, we want
1618  * to allocate no cells to those dimensions, only to dimensions
1619  * that have some interesting differences in data distribution.
1620  * Here we count up the number of interesting dimensions
1621  */
1622  for ( d = 0; d < ndims; d++ )
1623  {
1624  if ( sample_distribution[d] > 0 )
1625  histo_ndims++;
1626  }
1627 
1628  if ( histo_ndims == 0 )
1629  {
1630  /* Special case: all our dimensions had low variability! */
1631  /* We just divide the cells up evenly */
1632  POSTGIS_DEBUG(3, " special case: no axes have variability");
1633  histo_cells_new = 1;
1634  for ( d = 0; d < ndims; d++ )
1635  {
1636  histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1637  if ( ! histo_size[d] )
1638  histo_size[d] = 1;
1639  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1640  histo_cells_new *= histo_size[d];
1641  }
1642  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1643  }
1644  else
1645  {
1646  /*
1647  * We're going to express the amount of variability in each dimension
1648  * as a proportion of the total variability and allocate cells in that
1649  * dimension relative to that proportion.
1650  */
1651  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1652  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1653  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1654  histo_cells_new = 1; /* For the number of cells in the final histogram */
1655  for ( d = 0; d < ndims; d++ )
1656  {
1657  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1658  {
1659  histo_size[d] = 1;
1660  }
1661  else /* Interesting dimension */
1662  {
1663  /* How does this dims variability compare to the total? */
1664  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1665  /*
1666  * Scale the target cells number by the # of dims and ratio,
1667  * then take the appropriate root to get the estimated number of cells
1668  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1669  */
1670  histo_size[d] = (int)pow((double)histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1671  /* If something goes awry, just give this dim one slot */
1672  if ( ! histo_size[d] )
1673  histo_size[d] = 1;
1674  }
1675  histo_cells_new *= histo_size[d];
1676  }
1677  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1678  }
1679 
1680  /* Update histo_cells to the actual number of cells we need to allocate */
1681  histo_cells = histo_cells_new;
1682  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1683 
1684  /*
1685  * Create the histogram (ND_STATS) in the stats memory context
1686  */
1687  old_context = MemoryContextSwitchTo(stats->anl_context);
1688  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1689  nd_stats = palloc(nd_stats_size);
1690  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1691  MemoryContextSwitchTo(old_context);
1692 
1693  /* Initialize the #ND_STATS objects */
1694  nd_stats->ndims = ndims;
1695  nd_stats->extent = histo_extent;
1696  nd_stats->sample_features = sample_rows;
1697  nd_stats->table_features = total_rows;
1698  nd_stats->not_null_features = notnull_cnt;
1699  /* Copy in the histogram dimensions */
1700  for ( d = 0; d < ndims; d++ )
1701  nd_stats->size[d] = histo_size[d];
1702 
1703  /*
1704  * Fourth scan:
1705  * o fill histogram values with the proportion of
1706  * features' bbox overlaps: a feature's bvol
1707  * can fully overlap (1) or partially overlap
1708  * (fraction of 1) an histogram cell.
1709  *
1710  * Note that we are filling each cell with the "portion of
1711  * the feature's box that overlaps the cell". So, if we sum
1712  * up the values in the histogram, we could get the
1713  * histogram feature count.
1714  *
1715  */
1716  for ( i = 0; i < notnull_cnt; i++ )
1717  {
1718  const ND_BOX *nd_box;
1719  ND_IBOX nd_ibox;
1720  int at[ND_DIMS];
1721  double num_cells = 0;
1722  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1723  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1724  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1725 
1726  nd_box = sample_boxes[i];
1727  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1728 
1729  /* Give backend a chance of interrupting us */
1730 #if POSTGIS_PGSQL_VERSION >= 180
1731  vacuum_delay_point(true);
1732 #else
1733  vacuum_delay_point();
1734 #endif
1735 
1736  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1737  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1738  memset(at, 0, sizeof(int)*ND_DIMS);
1739 
1740  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1741  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1742  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1743 
1744  for ( d = 0; d < nd_stats->ndims; d++ )
1745  {
1746  /* Initialize the starting values */
1747  at[d] = nd_ibox.min[d];
1748  min[d] = nd_stats->extent.min[d];
1749  max[d] = nd_stats->extent.max[d];
1750  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1751  }
1752 
1753  /*
1754  * Move through all the overlapped histogram cells values and
1755  * add the box overlap proportion to them.
1756  */
1757  do
1758  {
1759  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1760  double ratio;
1761  /* Create a box for this histogram cell */
1762  for ( d = 0; d < nd_stats->ndims; d++ )
1763  {
1764  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1765  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1766  }
1767 
1768  /*
1769  * If a feature box is completely inside one cell the ratio will be
1770  * 1.0. If a feature box is 50% in two cells, each cell will get
1771  * 0.5 added on.
1772  */
1773  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1774  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1775  num_cells += ratio;
1776  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1777  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1778  }
1779  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1780 
1781  /* Keep track of overall number of overlaps counted */
1782  total_cell_count += num_cells;
1783  /* How many features have we added to this histogram? */
1784  histogram_features++;
1785  }
1786 
1787  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1788  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1789  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1790 
1791  /* Error out if we got no sample information */
1792  if ( ! histogram_features )
1793  {
1794  POSTGIS_DEBUG(3, " no stats have been gathered");
1795  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1796  stats->stats_valid = false;
1797  return;
1798  }
1799 
1800  nd_stats->histogram_features = histogram_features;
1801  nd_stats->histogram_cells = histo_cells;
1802  nd_stats->cells_covered = total_cell_count;
1803 
1804  /* Put this histogram data into the right slot/kind */
1805  if ( mode == 2 )
1806  {
1807  stats_slot = STATISTIC_SLOT_2D;
1808  stats_kind = STATISTIC_KIND_2D;
1809  }
1810  else
1811  {
1812  stats_slot = STATISTIC_SLOT_ND;
1813  stats_kind = STATISTIC_KIND_ND;
1814  }
1815 
1816  /* Write the statistics data */
1817  stats->stakind[stats_slot] = stats_kind;
1818  stats->staop[stats_slot] = InvalidOid;
1819  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1820  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1821  stats->stanullfrac = (float4)null_cnt/sample_rows;
1822  stats->stawidth = total_width/notnull_cnt;
1823  stats->stadistinct = -1.0;
1824  stats->stats_valid = true;
1825 
1826  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1827  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1828  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1829  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1830  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1831  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1832  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1833  /*
1834  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1835  */
1836 
1837  return;
1838 }
1839 
1840 
1858 static void
1859 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1860  int sample_rows, double total_rows)
1861 {
1862  GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
1863  /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
1864  stats->extra_data = extra_data->std_extra_data;
1865  extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
1866  stats->extra_data = extra_data;
1867 
1868  /* 2D Mode */
1869  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1870 
1871  if (stats->stats_valid)
1872  {
1873  /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1874  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1875  }
1876 }
1877 
1878 
1906 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1907 {
1908  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1909  GserializedAnalyzeExtraData *extra_data =
1911 
1912  /* Ask for standard analyze to fill in as much as possible */
1913  if (!std_typanalyze(stats))
1914  PG_RETURN_BOOL(false);
1915 
1916  /* Save old compute_stats and extra_data for scalar statistics ... */
1917  extra_data->std_compute_stats = stats->compute_stats;
1918  extra_data->std_extra_data = stats->extra_data;
1919  /* ... and replace with our info */
1920  stats->compute_stats = compute_gserialized_stats;
1921  stats->extra_data = extra_data;
1922 
1923  /* Indicate we are done successfully */
1924  PG_RETURN_BOOL(true);
1925 }
1926 
1939 static float8
1940 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1941 {
1942  int d; /* counter */
1943  float8 selectivity;
1944  ND_BOX nd_box;
1945  ND_IBOX nd_ibox;
1946  int at[ND_DIMS];
1947  double cell_size[ND_DIMS];
1948  double min[ND_DIMS];
1949  double max[ND_DIMS];
1950  double total_count = 0.0;
1951  int ndims_max;
1952 
1953  /* Calculate the overlap of the box on the histogram */
1954  if ( ! nd_stats )
1955  {
1956  elog(NOTICE, " estimate_selectivity called with null input");
1957  return FALLBACK_ND_SEL;
1958  }
1959 
1960  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1961 
1962  /* Initialize nd_box. */
1963  nd_box_from_gbox(box, &nd_box);
1964 
1965  /*
1966  * To return 2D stats on an ND sample, we need to make the
1967  * 2D box cover the full range of the other dimensions in the
1968  * histogram.
1969  */
1970  POSTGIS_DEBUGF(3, " mode: %d", mode);
1971  if ( mode == 2 )
1972  {
1973  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1974  ndims_max = 2;
1975  }
1976 
1977  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1978  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1979 
1980  // elog(DEBUG1, "out histogram:\n%s", nd_stats_to_grid(nd_stats));
1981 
1982  /*
1983  * Search box completely misses histogram extent?
1984  * We have to intersect in all N dimensions or else we have
1985  * zero interaction under the &&& operator. It's important
1986  * to short circuit in this case, as some of the tests below
1987  * will return junk results when run on non-intersecting inputs.
1988  */
1989  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1990  {
1991  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1992  return 0.0;
1993  }
1994 
1995  /* Search box completely contains histogram extent! */
1996  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1997  {
1998  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1999  return 1.0;
2000  }
2001 
2002  /* Calculate the overlap of the box on the histogram */
2003  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2004  {
2005  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2006  return FALLBACK_ND_SEL;
2007  }
2008 
2009  /* Work out some measurements of the histogram */
2010  for ( d = 0; d < nd_stats->ndims; d++ )
2011  {
2012  /* Cell size in each dim */
2013  min[d] = nd_stats->extent.min[d];
2014  max[d] = nd_stats->extent.max[d];
2015  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2016  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2017 
2018  /* Initialize the counter */
2019  at[d] = nd_ibox.min[d];
2020  }
2021 
2022  /* Move through all the overlap values and sum them */
2023  do
2024  {
2025  float cell_count, ratio;
2026  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2027 
2028  /* We have to pro-rate partially overlapped cells. */
2029  for ( d = 0; d < nd_stats->ndims; d++ )
2030  {
2031  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2032  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2033  }
2034 
2035  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2036  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2037 
2038  /* Add the pro-rated count for this cell to the overall total */
2039  total_count += (double)cell_count * ratio;
2040  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2041  }
2042  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2043 
2044  /* Scale by the number of features in our histogram to get the proportion */
2045  selectivity = total_count / nd_stats->histogram_features;
2046 
2047  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2048  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2049  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2050  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2051 
2052  /* Prevent rounding overflows */
2053  if (selectivity > 1.0) selectivity = 1.0;
2054  else if (selectivity < 0.0) selectivity = 0.0;
2055 
2056  return selectivity;
2057 }
2058 
2059 
2060 
2066 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2067 {
2068  Oid table_oid = PG_GETARG_OID(0);
2069  text *att_text = PG_GETARG_TEXT_P(1);
2070  ND_STATS *nd_stats;
2071  char *str;
2072  text *json;
2073  int mode = 2; /* default to 2D mode */
2074  bool only_parent = false; /* default to whole tree stats */
2075 
2076  /* Check if we've been asked to not use 2d mode */
2077  if ( ! PG_ARGISNULL(2) )
2078  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2079 
2080  /* Retrieve the stats object */
2081  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2082  if ( ! nd_stats )
2083  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2084 
2085  /* Convert to JSON */
2086  elog(DEBUG1, "stats grid:\n%s", nd_stats_to_grid(nd_stats));
2087  str = nd_stats_to_json(nd_stats);
2088  json = cstring_to_text(str);
2089  pfree(str);
2090  pfree(nd_stats);
2091 
2092  PG_RETURN_TEXT_P(json);
2093 }
2094 
2095 
2101 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2102 {
2103  Oid table_oid = PG_GETARG_OID(0);
2104  text *att_text = PG_GETARG_TEXT_P(1);
2105  Datum geom_datum = PG_GETARG_DATUM(2);
2106  GBOX gbox; /* search box read from gserialized datum */
2107  float8 selectivity = 0;
2108  ND_STATS *nd_stats;
2109  int mode = 2; /* 2D mode by default */
2110 
2111  /* Check if we've been asked to not use 2d mode */
2112  if ( ! PG_ARGISNULL(3) )
2113  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2114 
2115  /* Retrieve the stats object */
2116  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2117 
2118  if ( ! nd_stats )
2119  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2120 
2121  /* Calculate the gbox */
2122  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2123  elog(ERROR, "unable to calculate bounding box from geometry");
2124 
2125  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2126 
2127  /* Do the estimation */
2128  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2129 
2130  pfree(nd_stats);
2131  PG_RETURN_FLOAT8(selectivity);
2132 }
2133 
2134 
2140 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2141 {
2142  Oid table_oid1 = PG_GETARG_OID(0);
2143  text *att_text1 = PG_GETARG_TEXT_P(1);
2144  Oid table_oid2 = PG_GETARG_OID(2);
2145  text *att_text2 = PG_GETARG_TEXT_P(3);
2146  ND_STATS *nd_stats1, *nd_stats2;
2147  float8 selectivity = 0;
2148  int mode = 2; /* 2D mode by default */
2149 
2150 
2151  /* Retrieve the stats object */
2152  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2153  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2154 
2155  if ( ! nd_stats1 )
2156  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2157 
2158  if ( ! nd_stats2 )
2159  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2160 
2161  /* Check if we've been asked to not use 2d mode */
2162  if ( ! PG_ARGISNULL(4) )
2163  {
2164  text *modetxt = PG_GETARG_TEXT_P(4);
2165  char *modestr = text_to_cstring(modetxt);
2166  if ( modestr[0] == 'N' )
2167  mode = 0;
2168  }
2169 
2170  /* Do the estimation */
2171  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2172 
2173  pfree(nd_stats1);
2174  pfree(nd_stats2);
2175  PG_RETURN_FLOAT8(selectivity);
2176 }
2177 
2183 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2184 {
2185  PG_RETURN_DATUM(DirectFunctionCall5(
2187  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2188  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2189  Int32GetDatum(2) /* 2-D mode */
2190  ));
2191 }
2192 
2198 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2199 {
2200  PG_RETURN_DATUM(DirectFunctionCall5(
2202  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2203  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2204  Int32GetDatum(0) /* N-D mode */
2205  ));
2206 }
2207 
2208 
2223 float8
2224 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2225 {
2226  VariableStatData vardata;
2227  Node *other = NULL;
2228  bool varonleft;
2229  ND_STATS *nd_stats = NULL;
2230 
2231  GBOX search_box;
2232  float8 selectivity = 0;
2233  Const *otherConst;
2234 
2235  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2236 
2237  if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2238  {
2239  POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2240  return DEFAULT_ND_SEL;
2241  }
2242 
2243  if (!IsA(other, Const))
2244  {
2245  ReleaseVariableStats(vardata);
2246  POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2247  return DEFAULT_ND_SEL;
2248  }
2249 
2250  otherConst = (Const*)other;
2251  if ((!otherConst) || otherConst->constisnull)
2252  {
2253  ReleaseVariableStats(vardata);
2254  POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2255  return DEFAULT_ND_SEL;
2256  }
2257 
2258  if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2259  {
2260  ReleaseVariableStats(vardata);
2261  POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2262  return 0.0;
2263  }
2264 
2265  if (!vardata.statsTuple)
2266  {
2267  POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2268  return DEFAULT_ND_SEL;
2269  }
2270 
2271  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2272  ReleaseVariableStats(vardata);
2273  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2274  if (nd_stats)
2275  pfree(nd_stats);
2276 
2277  return selectivity;
2278 }
2279 
2281 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2282 {
2283  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2284  // Oid operator_oid = PG_GETARG_OID(1);
2285  List *args = (List *) PG_GETARG_POINTER(2);
2286  int varRelid = PG_GETARG_INT32(3);
2287  int mode = PG_GETARG_INT32(4);
2288  float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2289  POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2290  PG_RETURN_FLOAT8(selectivity);
2291 }
2292 
2293 /************************************************************************/
2294 
2295 
2296 /*
2297  * Given an index and table column, confirm the
2298  * index was built on that column, and return the
2299  * corresponding index attribute for that column.
2300  */
2301 static int16
2302 index_has_attr(Oid index_oid, Oid table_oid, int16 table_attnum)
2303 {
2304  HeapTuple index_tuple;
2305  Form_pg_index index_form;
2306  int16 index_attnum = InvalidAttrNumber;
2307 
2308  /* Check if the index is on the desired column */
2309  index_tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
2310  if (!HeapTupleIsValid(index_tuple))
2311  elog(ERROR, "cache lookup failed for index %u", index_oid);
2312 
2313  index_form = (Form_pg_index) GETSTRUCT(index_tuple);
2314 
2315  /* Something went wrong, this index isn't on our table of interest */
2316  if (index_form->indrelid != table_oid)
2317  elog(ERROR, "table=%u and index=%u are not related", table_oid, index_oid);
2318 
2319  /* Check if the attnum is in the indkey array */
2320  for (int16 i = 0; i < (int16)(index_form->indkey.dim1); i++)
2321  {
2322  if (index_form->indkey.values[i] == table_attnum)
2323  {
2324  index_attnum = i+1;
2325  break;
2326  }
2327  }
2328  ReleaseSysCache(index_tuple);
2329  return index_attnum;
2330 }
2331 
2332 
2333 /*
2334  * Given an index return the access method.
2335  * (We only work with GIST access method.)
2336  */
2337 static int
2338 index_get_am(Oid index_oid)
2339 {
2340  int index_am;
2341  Form_pg_class index_rel_form;
2342  HeapTuple index_rel_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(index_oid));
2343 
2344  if (!HeapTupleIsValid(index_rel_tuple))
2345  elog(ERROR, "cache lookup failed for index %u", index_oid);
2346 
2347  index_rel_form = (Form_pg_class) GETSTRUCT(index_rel_tuple);
2348  index_am = index_rel_form->relam;
2349  ReleaseSysCache(index_rel_tuple);
2350  return index_am;
2351 }
2352 
2353 
2354 /*
2355  * Given an index and index attribute, lookup the
2356  * key type (box2df or gidx) of that index column.
2357  */
2358 static int
2359 index_get_keytype (Oid index_oid, int16 index_attnum)
2360 {
2361  Oid atttypid = InvalidOid;
2362  Form_pg_attribute att_form;
2363 
2364  /* Get the key type for the index key? */
2365  HeapTuple att_tuple = SearchSysCache2(ATTNUM,
2366  ObjectIdGetDatum(index_oid),
2367  Int16GetDatum(index_attnum));
2368 
2369  if (!HeapTupleIsValid(att_tuple))
2370  elog(ERROR, "cache lookup failed for index %u attribute %d", index_oid, index_attnum);
2371 
2372  att_form = (Form_pg_attribute) GETSTRUCT(att_tuple);
2373  atttypid = att_form->atttypid;
2374  ReleaseSysCache(att_tuple);
2375  return atttypid;
2376 }
2377 
2378 
2379 /*
2380  * Given a table and attribute number, find any
2381  * "spatial index" of that attribute. For our purposes
2382  * a spatial index is one we can read the top page of,
2383  * namely a geometry or geography column, with
2384  * a GIST index having either a gidx or box2df key.
2385  */
2386 static Oid
2387 table_get_spatial_index(Oid table_oid, int16 attnum, int *key_type, int16 *idx_attnum)
2388 {
2389  Relation table_rel;
2390  List *index_list;
2391  ListCell *lc;
2392 
2393  /* Lookup our spatial index key types */
2394  Oid b2d_oid = postgis_oid(BOX2DFOID);
2395  Oid gdx_oid = postgis_oid(GIDXOID);
2396 
2397  if (!(b2d_oid && gdx_oid))
2398  return InvalidOid;
2399 
2400  /* Read a list of all indexes on this table */
2401  table_rel = RelationIdGetRelation(table_oid);
2402  index_list = RelationGetIndexList(table_rel);
2403  RelationClose(table_rel);
2404 
2405  /* For each index associated with this table... */
2406  foreach(lc, index_list)
2407  {
2408  Oid index_oid = lfirst_oid(lc);
2409  Oid atttypid;
2410 
2411  /* Is our attribute indexed by this index? */
2412  *idx_attnum = index_has_attr(index_oid, table_oid, attnum);
2413 
2414  /* No, move on */
2415  if (*idx_attnum == InvalidAttrNumber)
2416  continue;
2417 
2418  /* We only handle GIST spatial indexes */
2419  if (index_get_am(index_oid) != GIST_AM_OID)
2420  continue;
2421 
2422  /* Is the column actually spatial? */
2423  /* Only if it uses our spatial key types */
2424  atttypid = index_get_keytype (index_oid, *idx_attnum);
2425  if (atttypid == b2d_oid || atttypid == gdx_oid)
2426  {
2427  /* Spatial key found in this index! */
2428  *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
2429  return index_oid;
2430  }
2431  }
2432  return InvalidOid;
2433 }
2434 
2435 /*
2436  * Given an index and indexed attribute, look up
2437  * the keys in the top page of the index, and using
2438  * the appropriate key type, return a box that is the
2439  * union of all those keys.
2440  */
2441 static GBOX *
2442 spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type)
2443 {
2444  BOX2DF *bounds_2df = NULL;
2445  GIDX *bounds_gidx = NULL;
2446  GBOX *gbox = NULL;
2447  Relation idx_rel;
2448  Buffer buffer;
2449  Page page;
2450  unsigned long offset;
2451  unsigned long offset_max;
2452 
2453  if (!idx_oid)
2454  return NULL;
2455 
2456  idx_rel = index_open(idx_oid, AccessShareLock);
2457  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2458  page = (Page) BufferGetPage(buffer);
2459  offset = FirstOffsetNumber;
2460  offset_max = PageGetMaxOffsetNumber(page);
2461  while (offset <= offset_max)
2462  {
2463  ItemId iid = PageGetItemId(page, offset);
2464  IndexTuple ituple;
2465  if (!iid)
2466  {
2467  ReleaseBuffer(buffer);
2468  index_close(idx_rel, AccessShareLock);
2469  return NULL;
2470  }
2471  ituple = (IndexTuple) PageGetItem(page, iid);
2472  if (!GistTupleIsInvalid(ituple))
2473  {
2474  bool isnull;
2475  Datum idx_attr = index_getattr(ituple, idx_att_num, idx_rel->rd_att, &isnull);
2476  if (!isnull)
2477  {
2478  if (key_type == STATISTIC_KIND_2D)
2479  {
2480  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2481  if (bounds_2df)
2482  box2df_merge(bounds_2df, b);
2483  else
2484  bounds_2df = box2df_copy(b);
2485  }
2486  else
2487  {
2488  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2489  if (bounds_gidx)
2490  gidx_merge(&bounds_gidx, b);
2491  else
2492  bounds_gidx = gidx_copy(b);
2493  }
2494  }
2495  }
2496  offset++;
2497  }
2498 
2499  ReleaseBuffer(buffer);
2500  index_close(idx_rel, AccessShareLock);
2501 
2502  if (key_type == STATISTIC_KIND_2D && bounds_2df)
2503  {
2504  if (box2df_is_empty(bounds_2df))
2505  return NULL;
2506  gbox = gbox_new(0);
2507  box2df_to_gbox_p(bounds_2df, gbox);
2508  }
2509  else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
2510  {
2511  lwflags_t flags = 0;
2512  if (gidx_is_unknown(bounds_gidx))
2513  return NULL;
2514  FLAGS_SET_Z(flags, GIDX_NDIMS(bounds_gidx) > 2);
2515  FLAGS_SET_M(flags, GIDX_NDIMS(bounds_gidx) > 3);
2516  gbox = gbox_new(flags);
2517  gbox_from_gidx(bounds_gidx, gbox, flags);
2518  }
2519  else
2520  return NULL;
2521 
2522  return gbox;
2523 }
2524 
2525 /*
2526 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2527  RETURNS box2d
2528  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2529  LANGUAGE 'c' STABLE STRICT;
2530 */
2531 
2533 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2534 {
2535  GBOX *gbox = NULL;
2536  int key_type;
2537  int16 att_num, idx_att_num = InvalidAttrNumber;
2538  Oid tbl_oid = PG_GETARG_DATUM(0);
2539  char *col = text_to_cstring(PG_GETARG_TEXT_P(1));
2540  Oid idx_oid;
2541 
2542  if(!tbl_oid)
2543  PG_RETURN_NULL();
2544 
2545  /* We need to initialize the internal cache to access it later via postgis_oid() */
2546  postgis_initialize_cache();
2547 
2548  att_num = get_attnum(tbl_oid, col);
2549  if (att_num == InvalidAttrNumber)
2550  PG_RETURN_NULL();
2551 
2552  idx_oid = table_get_spatial_index(tbl_oid, att_num, &key_type, &idx_att_num);
2553  if (!idx_oid)
2554  PG_RETURN_NULL();
2555 
2556  gbox = spatial_index_read_extent(idx_oid, idx_att_num, key_type);
2557  if (!gbox)
2558  PG_RETURN_NULL();
2559  else
2560  PG_RETURN_POINTER(gbox);
2561 }
2562 
2563 
2564 /*
2565  * Given a table and column name, look up the attribute number
2566  * and type of that column.
2567  */
2568 static bool
2569 get_attnum_attypid(Oid table_oid, const char *col, int16 *attnum, Oid *atttypid)
2570 {
2571  HeapTuple att_tuple;
2572  Form_pg_attribute att;
2573 
2574  if (!attnum || !atttypid)
2575  elog(ERROR, "%s got null input parameters", __func__);
2576 
2577  /* Is the index on the column name we are looking for? */
2578  att_tuple = SearchSysCache2(ATTNAME,
2579  ObjectIdGetDatum(table_oid),
2580  PointerGetDatum(col));
2581 
2582  if (!HeapTupleIsValid(att_tuple))
2583  return false;
2584 
2585  att = (Form_pg_attribute) GETSTRUCT(att_tuple);
2586  *atttypid = att->atttypid;
2587  *attnum = att->attnum;
2588  ReleaseSysCache(att_tuple);
2589  return true;
2590 }
2591 
2592 
2599 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2600 {
2601  text *coltxt = NULL;
2602  char *col = NULL;
2603  int16 attnum, idx_attnum;
2604  Oid atttypid = InvalidOid;
2605  char nsp_tbl[2*NAMEDATALEN+6];
2606  char *tbl;
2607  Oid tbl_oid, idx_oid = 0;
2608  ND_STATS *nd_stats;
2609  GBOX *gbox = NULL;
2610  bool only_parent = false;
2611  int key_type;
2612  Oid geographyOid = postgis_oid(GEOGRAPHYOID);
2613  Oid geometryOid = postgis_oid(GEOMETRYOID);
2614 
2615  /* We need to initialize the internal cache to access it later via postgis_oid() */
2616  postgis_initialize_cache();
2617 
2618  if (PG_NARGS() < 2 || PG_NARGS() > 4)
2619  elog(ERROR, "ST_EstimatedExtent() called with wrong number of arguments");
2620 
2621  if ( PG_NARGS() == 4 )
2622  {
2623  only_parent = PG_GETARG_BOOL(3);
2624  }
2625  if ( PG_NARGS() >= 3 )
2626  {
2627  char *nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2628  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2629  coltxt = PG_GETARG_TEXT_P(2);
2630  snprintf(nsp_tbl, sizeof(nsp_tbl), "\"%s\".\"%s\"", nsp, tbl);
2631  }
2632  if ( PG_NARGS() == 2 )
2633  {
2634  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2635  coltxt = PG_GETARG_TEXT_P(1);
2636  snprintf(nsp_tbl, sizeof(nsp_tbl), "\"%s\"", tbl);
2637  }
2638 
2639  /* Parse the namespace/table strings and lookup in system catalogs */
2640  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2641  if (!tbl_oid)
2642  elog(ERROR, "cannot lookup table %s", nsp_tbl);
2643 
2644  /* Get the attribute number and type from the column name */
2645  col = text_to_cstring(coltxt);
2646  if (!get_attnum_attypid(tbl_oid, col, &attnum, &atttypid))
2647  elog(ERROR, "column %s.\"%s\" does not exist", nsp_tbl, col);
2648 
2649  /* We can only do estimates on geograpy and geometry */
2650  if ((atttypid != geographyOid) && (atttypid != geometryOid))
2651  {
2652  elog(ERROR, "column %s.\"%s\" must be a geometry or geography", nsp_tbl, col);
2653  }
2654 
2655  /* Read the extent from the head of the spatial index */
2656  /* works if there is a spatial index */
2657  idx_oid = table_get_spatial_index(tbl_oid, attnum, &key_type, &idx_attnum);
2658  if (idx_oid != InvalidOid)
2659  {
2660  /* TODO: how about only_parent ? */
2661  gbox = spatial_index_read_extent(idx_oid, idx_attnum, key_type);
2662  elog(DEBUG3, "index for %s.\"%s\" exists, reading gbox from there", nsp_tbl, col);
2663  if (!gbox) PG_RETURN_NULL();
2664  }
2665  /* Read the extent from the stats tables, */
2666  /* works if ANALYZE has been run */
2667  else
2668  {
2669  int stats_mode = 2;
2670  elog(DEBUG3, "index for %s.\"%s\" does not exist", nsp_tbl, col);
2671 
2672  /* For a geography column, we need the XYZ geocentric bounds */
2673  if (atttypid == geographyOid)
2674  stats_mode = 3;
2675 
2676  /* ND stats include an extent for the histogram */
2677  nd_stats = pg_get_nd_stats_by_name(tbl_oid, coltxt, stats_mode, only_parent);
2678 
2679  /* Error out on no stats */
2680  if (!nd_stats)
2681  {
2682  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, col);
2683  PG_RETURN_NULL();
2684  }
2685 
2686  /* Construct the box */
2687  gbox = gbox_new(0);
2688  gbox->xmin = nd_stats->extent.min[0];
2689  gbox->xmax = nd_stats->extent.max[0];
2690  gbox->ymin = nd_stats->extent.min[1];
2691  gbox->ymax = nd_stats->extent.max[1];
2692  if (stats_mode != 2)
2693  {
2694  FLAGS_SET_Z(gbox->flags, 1);
2695  gbox->zmin = nd_stats->extent.min[2];
2696  gbox->zmax = nd_stats->extent.max[2];
2697  }
2698 
2699  pfree(nd_stats);
2700  }
2701 
2702  /* Convert geocentric geography box into a planar box */
2703  /* that users understand */
2704  if (atttypid == geographyOid)
2705  {
2706  GBOX *gbox_planar = gbox_new(0);
2707  gbox_geocentric_get_gbox_cartesian(gbox, gbox_planar);
2708  PG_RETURN_POINTER(gbox_planar);
2709  }
2710  else
2711  PG_RETURN_POINTER(gbox);
2712 }
2713 
2714 /*
2715  * Legacy prototype for Estimated_Extent()
2716  */
2718 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2719 {
2720  if ( PG_NARGS() == 3 )
2721  {
2722  PG_RETURN_DATUM(
2723  DirectFunctionCall3(gserialized_estimated_extent,
2724  PG_GETARG_DATUM(0),
2725  PG_GETARG_DATUM(1),
2726  PG_GETARG_DATUM(2)));
2727  }
2728  else if ( PG_NARGS() == 2 )
2729  {
2730  PG_RETURN_DATUM(
2731  DirectFunctionCall2(gserialized_estimated_extent,
2732  PG_GETARG_DATUM(0),
2733  PG_GETARG_DATUM(1)));
2734  }
2735 
2736  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2737  PG_RETURN_NULL();
2738 }
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: gbox.c:32
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: gbox.c:404
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int index_get_keytype(Oid index_oid, int16 index_attnum)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
#define STATISTIC_SLOT_ND
static char * nd_stats_to_grid(const ND_STATS *stats)
Create a printable view of the ND_STATS histogram.
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static int range_full(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Expand the bounds of target to include source.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
static bool get_attnum_attypid(Oid table_oid, const char *col, int16 *attnum, Oid *atttypid)
#define SDFACTOR
static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type)
#define FALLBACK_ND_JOINSEL
#define MAX_NUM_BINS
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum)
static int index_get_am(Oid index_oid)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
#define BIN_MIN_SIZE
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogeneously distributed or contentrated within one dimension,...
static int16 index_has_attr(Oid index_oid, Oid table_oid, int16 table_attnum)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:96
uint16_t lwflags_t
Definition: liblwgeom.h:299
#define FLAGS_GET_Z(flags)
Definition: liblwgeom.h:165
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:166
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:173
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:172
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:168
This library is the generic geometry handling section of PostGIS.
int gbox_geocentric_get_gbox_cartesian(const GBOX *gbox_geocentric, GBOX *gbox_planar)
Definition: lwgeodetic.c:3596
#define str(s)
args
Definition: ovdump.py:45
Datum buffer(PG_FUNCTION_ARGS)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:254
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:85
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:136
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:105
double ymax
Definition: liblwgeom.h:357
double zmax
Definition: liblwgeom.h:359
double xmax
Definition: liblwgeom.h:355
double zmin
Definition: liblwgeom.h:358
double mmax
Definition: liblwgeom.h:361
double ymin
Definition: liblwgeom.h:356
double xmin
Definition: liblwgeom.h:354
double mmin
Definition: liblwgeom.h:360
lwflags_t flags
Definition: liblwgeom.h:353
AnalyzeAttrComputeStatsFunc std_compute_stats
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.