PostGIS  3.0.0dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
80 #endif
81 #include "utils/tqual.h"
82 #include "utils/builtins.h"
83 #include "utils/datum.h"
84 #include "utils/snapmgr.h"
85 #include "utils/fmgroids.h"
86 #include "funcapi.h"
87 #include "access/heapam.h"
88 #include "catalog/pg_type.h"
89 #include "access/relscan.h"
90 
91 #include "executor/spi.h"
92 #include "fmgr.h"
93 #include "commands/vacuum.h"
94 #include "nodes/relation.h"
95 #include "parser/parsetree.h"
96 #include "utils/array.h"
97 #include "utils/lsyscache.h"
98 #include "utils/builtins.h"
99 #include "utils/syscache.h"
100 #include "utils/rel.h"
101 #include "utils/selfuncs.h"
102 
103 #include "../postgis_config.h"
104 
105 #if POSTGIS_PGSQL_VERSION >= 93
106  #include "access/htup_details.h"
107 #endif
108 
109 #include "stringbuffer.h"
110 #include "liblwgeom.h"
111 #include "lwgeom_pg.h" /* For debugging macros. */
112 #include "gserialized_gist.h" /* For index common functions */
113 
114 #include <math.h>
115 #if HAVE_IEEEFP_H
116 #include <ieeefp.h>
117 #endif
118 #include <float.h>
119 #include <string.h>
120 #include <stdio.h>
121 #include <ctype.h>
122 
123 
124 /************************************************************************/
125 
126 
127 /* Fall back to older finite() if necessary */
128 #ifndef HAVE_ISFINITE
129 # ifdef HAVE_GNU_ISFINITE
130 # define _GNU_SOURCE
131 # else
132 # define isfinite finite
133 # endif
134 #endif
135 
136 
137 /* Prototypes */
138 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
139 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
140 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
141 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
142 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
143 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
144 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
145 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
146 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
147 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
148 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
149 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
150 
151 /* Local prototypes */
152 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
153 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
154 
155 
156 /* Old Prototype */
157 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
158 
159 /*
160 * Assign a number to the n-dimensional statistics kind
161 *
162 * tgl suggested:
163 *
164 * 1-100: reserved for assignment by the core Postgres project
165 * 100-199: reserved for assignment by PostGIS
166 * 200-9999: reserved for other globally-known stats kinds
167 * 10000-32767: reserved for private site-local use
168 */
169 #define STATISTIC_KIND_ND 102
170 #define STATISTIC_KIND_2D 103
171 #define STATISTIC_SLOT_ND 0
172 #define STATISTIC_SLOT_2D 1
173 
174 /*
175 * To look-up the spatial index associated with a table we
176 * need to find GIST indexes using our spatial keys.
177 */
178 #define INDEX_KEY_ND "gidx"
179 #define INDEX_KEY_2D "box2df"
180 
181 /*
182 * The SD factor restricts the side of the statistics histogram
183 * based on the standard deviation of the extent of the data.
184 * SDFACTOR is the number of standard deviations from the mean
185 * the histogram will extend.
186 */
187 #define SDFACTOR 3.25
188 
194 #define ND_DIMS 4
195 
202 #define MIN_DIMENSION_WIDTH 0.000000001
203 
208 #define MAX_DIMENSION_WIDTH 1.0E+20
209 
213 #define DEFAULT_ND_SEL 0.0001
214 #define DEFAULT_ND_JOINSEL 0.001
215 
219 #define FALLBACK_ND_SEL 0.2
220 #define FALLBACK_ND_JOINSEL 0.3
221 
227 typedef struct ND_BOX_T
228 {
229  float4 min[ND_DIMS];
230  float4 max[ND_DIMS];
231 } ND_BOX;
232 
236 typedef struct ND_IBOX_T
237 {
238  int min[ND_DIMS];
239  int max[ND_DIMS];
240 } ND_IBOX;
241 
242 
249 typedef struct ND_STATS_T
250 {
251  /* Dimensionality of the histogram. */
252  float4 ndims;
253 
254  /* Size of n-d histogram in each dimension. */
255  float4 size[ND_DIMS];
256 
257  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
259 
260  /* How many rows in the table itself? */
262 
263  /* How many rows were in the sample that built this histogram? */
265 
266  /* How many not-Null/Empty features were in the sample? */
268 
269  /* How many features actually got sampled in the histogram? */
271 
272  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
274 
275  /* How many cells did those histogram features cover? */
276  /* Since we are pro-rating coverage, this number should */
277  /* now always equal histogram_features */
279 
280  /* Variable length # of floats for histogram */
281  float4 value[1];
282 } ND_STATS;
283 
284 
285 
286 
293 static int
294 gbox_ndims(const GBOX* gbox)
295 {
296  int dims = 2;
297  if ( FLAGS_GET_GEODETIC(gbox->flags) )
298  return 3;
299  if ( FLAGS_GET_Z(gbox->flags) )
300  dims++;
301  if ( FLAGS_GET_M(gbox->flags) )
302  dims++;
303  return dims;
304 }
305 
311 static int
312 text_p_get_mode(const text *txt)
313 {
314  int mode = 2;
315  char *modestr;
316  if (VARSIZE(txt) - VARHDRSZ <= 0)
317  return mode;
318  modestr = (char*)VARDATA(txt);
319  if ( modestr[0] == 'N' )
320  mode = 0;
321  return mode;
322 }
323 
324 
328 static int
329 cmp_int (const void *a, const void *b)
330 {
331  int ia = *((const int*)a);
332  int ib = *((const int*)b);
333 
334  if ( ia == ib )
335  return 0;
336  else if ( ia > ib )
337  return 1;
338  else
339  return -1;
340 }
341 
346 static int
347 range_quintile(int *vals, int nvals)
348 {
349  qsort(vals, nvals, sizeof(int), cmp_int);
350  return vals[4*nvals/5] - vals[nvals/5];
351 }
352 
356 static double
357 total_double(const double *vals, int nvals)
358 {
359  int i;
360  float total = 0;
361  /* Calculate total */
362  for ( i = 0; i < nvals; i++ )
363  total += vals[i];
364 
365  return total;
366 }
367 
368 #if POSTGIS_DEBUG_LEVEL >= 3
369 
373 static int
374 total_int(const int *vals, int nvals)
375 {
376  int i;
377  int total = 0;
378  /* Calculate total */
379  for ( i = 0; i < nvals; i++ )
380  total += vals[i];
381 
382  return total;
383 }
384 
388 static double
389 avg(const int *vals, int nvals)
390 {
391  int t = total_int(vals, nvals);
392  return (double)t / (double)nvals;
393 }
394 
398 static double
399 stddev(const int *vals, int nvals)
400 {
401  int i;
402  double sigma2 = 0;
403  double mean = avg(vals, nvals);
404 
405  /* Calculate sigma2 */
406  for ( i = 0; i < nvals; i++ )
407  {
408  double v = (double)(vals[i]);
409  sigma2 += (mean - v) * (mean - v);
410  }
411  return sqrt(sigma2 / nvals);
412 }
413 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
414 
419 static int
420 nd_stats_value_index(const ND_STATS *stats, int *indexes)
421 {
422  int d;
423  int accum = 1, vdx = 0;
424 
425  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
426  /* n-d histogram coordinate implies. */
427  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
428  for ( d = 0; d < (int)(stats->ndims); d++ )
429  {
430  int size = (int)(stats->size[d]);
431  if ( indexes[d] < 0 || indexes[d] >= size )
432  {
433  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
434  return -1;
435  }
436  vdx += indexes[d] * accum;
437  accum *= size;
438  }
439  return vdx;
440 }
441 
445 static char*
446 nd_box_to_json(const ND_BOX *nd_box, int ndims)
447 {
448  char *rv;
449  int i;
451 
452  stringbuffer_append(sb, "{\"min\":[");
453  for ( i = 0; i < ndims; i++ )
454  {
455  if ( i ) stringbuffer_append(sb, ",");
456  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
457  }
458  stringbuffer_append(sb, "],\"max\":[");
459  for ( i = 0; i < ndims; i++ )
460  {
461  if ( i ) stringbuffer_append(sb, ",");
462  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
463  }
464  stringbuffer_append(sb, "]}");
465 
468  return rv;
469 }
470 
471 
476 static char*
477 nd_stats_to_json(const ND_STATS *nd_stats)
478 {
479  char *json_extent, *str;
480  int d;
482  int ndims = (int)roundf(nd_stats->ndims);
483 
484  stringbuffer_append(sb, "{");
485  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
486 
487  /* Size */
488  stringbuffer_append(sb, "\"size\":[");
489  for ( d = 0; d < ndims; d++ )
490  {
491  if ( d ) stringbuffer_append(sb, ",");
492  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
493  }
494  stringbuffer_append(sb, "],");
495 
496  /* Extent */
497  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
498  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
499  pfree(json_extent);
500 
501  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
502  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
503  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
504  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
505  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
506  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
507  stringbuffer_append(sb, "}");
508 
509  str = stringbuffer_getstringcopy(sb);
511  return str;
512 }
513 
514 
520 // static char*
521 // nd_stats_to_grid(const ND_STATS *stats)
522 // {
523 // char *rv;
524 // int j, k;
525 // int sizex = (int)roundf(stats->size[0]);
526 // int sizey = (int)roundf(stats->size[1]);
527 // stringbuffer_t *sb = stringbuffer_create();
528 //
529 // for ( k = 0; k < sizey; k++ )
530 // {
531 // for ( j = 0; j < sizex; j++ )
532 // {
533 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
534 // }
535 // stringbuffer_append(sb, "\n");
536 // }
537 //
538 // rv = stringbuffer_getstringcopy(sb);
539 // stringbuffer_destroy(sb);
540 // return rv;
541 // }
542 
543 
545 static int
546 nd_box_merge(const ND_BOX *source, ND_BOX *target)
547 {
548  int d;
549  for ( d = 0; d < ND_DIMS; d++ )
550  {
551  target->min[d] = Min(target->min[d], source->min[d]);
552  target->max[d] = Max(target->max[d], source->max[d]);
553  }
554  return true;
555 }
556 
558 static int
560 {
561  memset(a, 0, sizeof(ND_BOX));
562  return true;
563 }
564 
570 static int
572 {
573  int d;
574  for ( d = 0; d < ND_DIMS; d++ )
575  {
576  a->min[d] = FLT_MAX;
577  a->max[d] = -1 * FLT_MAX;
578  }
579  return true;
580 }
581 
583 static void
584 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
585 {
586  int d = 0;
587  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
588 
589  nd_box_init(nd_box);
590  nd_box->min[d] = gbox->xmin;
591  nd_box->max[d] = gbox->xmax;
592  d++;
593  nd_box->min[d] = gbox->ymin;
594  nd_box->max[d] = gbox->ymax;
595  d++;
596  if ( FLAGS_GET_GEODETIC(gbox->flags) )
597  {
598  nd_box->min[d] = gbox->zmin;
599  nd_box->max[d] = gbox->zmax;
600  return;
601  }
602  if ( FLAGS_GET_Z(gbox->flags) )
603  {
604  nd_box->min[d] = gbox->zmin;
605  nd_box->max[d] = gbox->zmax;
606  d++;
607  }
608  if ( FLAGS_GET_M(gbox->flags) )
609  {
610  nd_box->min[d] = gbox->mmin;
611  nd_box->max[d] = gbox->mmax;
612  d++;
613  }
614  return;
615 }
616 
620 static int
621 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
622 {
623  int d;
624  for ( d = 0; d < ndims; d++ )
625  {
626  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
627  return false;
628  }
629  return true;
630 }
631 
635 static int
636 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
637 {
638  int d;
639  for ( d = 0; d < ndims; d++ )
640  {
641  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
642  return false;
643  }
644  return true;
645 }
646 
651 static int
652 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
653 {
654  int d;
655  double size;
656  for ( d = 0; d < ND_DIMS; d++ )
657  {
658  size = nd_box->max[d] - nd_box->min[d];
659  if ( size <= 0 ) continue;
660  nd_box->min[d] -= size * expansion_factor / 2;
661  nd_box->max[d] += size * expansion_factor / 2;
662  }
663  return true;
664 }
665 
670 static inline int
671 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
672 {
673  int d;
674 
675  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
676 
677  /* Initialize ibox */
678  memset(nd_ibox, 0, sizeof(ND_IBOX));
679 
680  /* In each dimension... */
681  for ( d = 0; d < nd_stats->ndims; d++ )
682  {
683  double smin = nd_stats->extent.min[d];
684  double smax = nd_stats->extent.max[d];
685  double width = smax - smin;
686  int size = roundf(nd_stats->size[d]);
687 
688  /* ... find cells the box overlaps with in this dimension */
689  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
690  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
691 
692  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
693  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
694 
695  /* Push any out-of range values into range */
696  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
697  nd_ibox->max[d] = Min(nd_ibox->max[d], size-1);
698  }
699  return true;
700 }
701 
705 static inline double
706 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
707 {
708  int d;
709  bool covered = true;
710  double ivol = 1.0;
711  double vol2 = 1.0;
712  double vol1 = 1.0;
713 
714  for ( d = 0 ; d < ndims; d++ )
715  {
716  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
717  return 0.0; /* Disjoint */
718 
719  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
720  covered = false;
721  }
722 
723  if ( covered )
724  return 1.0;
725 
726  for ( d = 0; d < ndims; d++ )
727  {
728  double width1 = b1->max[d] - b1->min[d];
729  double width2 = b2->max[d] - b2->min[d];
730  double imin, imax, iwidth;
731 
732  vol1 *= width1;
733  vol2 *= width2;
734 
735  imin = Max(b1->min[d], b2->min[d]);
736  imax = Min(b1->max[d], b2->max[d]);
737  iwidth = imax - imin;
738  iwidth = Max(0.0, iwidth);
739 
740  ivol *= iwidth;
741  }
742 
743  if ( vol2 == 0.0 )
744  return vol2;
745 
746  return ivol / vol2;
747 }
748 
749 /* How many bins shall we use in figuring out the distribution? */
750 #define NUM_BINS 50
751 
767 static int
768 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
769 {
770  int d, i, k, range;
771  int counts[NUM_BINS];
772  double smin, smax; /* Spatial min, spatial max */
773  double swidth; /* Spatial width of dimension */
774 #if POSTGIS_DEBUG_LEVEL >= 3
775  double average, sdev, sdev_ratio;
776 #endif
777  int bmin, bmax; /* Bin min, bin max */
778  const ND_BOX *ndb;
779 
780  /* For each dimension... */
781  for ( d = 0; d < ndims; d++ )
782  {
783  /* Initialize counts for this dimension */
784  memset(counts, 0, sizeof(counts));
785 
786  smin = extent->min[d];
787  smax = extent->max[d];
788  swidth = smax - smin;
789 
790  /* Don't try and calculate distribution of overly narrow */
791  /* or overly wide dimensions. Here we're being pretty geographical, */
792  /* expecting "normal" planar or geographic coordinates. */
793  /* Otherwise we have to "handle" +/- Inf bounded features and */
794  /* the assumptions needed for that are as bad as this hack. */
795  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
796  {
797  distribution[d] = 0;
798  continue;
799  }
800 
801  /* Sum up the overlaps of each feature with the dimensional bins */
802  for ( i = 0; i < num_boxes; i++ )
803  {
804  double minoffset, maxoffset;
805 
806  /* Skip null entries */
807  ndb = nd_boxes[i];
808  if ( ! ndb ) continue;
809 
810  /* Where does box fall relative to the working range */
811  minoffset = ndb->min[d] - smin;
812  maxoffset = ndb->max[d] - smin;
813 
814  /* Skip boxes that our outside our working range */
815  if ( minoffset < 0 || minoffset > swidth ||
816  maxoffset < 0 || maxoffset > swidth )
817  {
818  continue;
819  }
820 
821  /* What bins does this range correspond to? */
822  bmin = floor(NUM_BINS * minoffset / swidth);
823  bmax = floor(NUM_BINS * maxoffset / swidth);
824 
825  /* Should only happen when maxoffset==swidth */
826  if (bmax >= NUM_BINS)
827  bmax = NUM_BINS-1;
828 
829  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
830 
831  /* Increment the counts in all the bins this feature overlaps */
832  for ( k = bmin; k <= bmax; k++ )
833  {
834  counts[k] += 1;
835  }
836 
837  }
838 
839  /* How dispersed is the distribution of features across bins? */
840  range = range_quintile(counts, NUM_BINS);
841 
842 #if POSTGIS_DEBUG_LEVEL >= 3
843  average = avg(counts, NUM_BINS);
844  sdev = stddev(counts, NUM_BINS);
845  sdev_ratio = sdev/average;
846 
847  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
848  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
849  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
850  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
851 #endif
852 
853  distribution[d] = range;
854  }
855 
856  return true;
857 }
858 
864 static inline int
865 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
866 {
867  int d = 0;
868 
869  while ( d < ndims )
870  {
871  if ( counter[d] < ibox->max[d] )
872  {
873  counter[d] += 1;
874  break;
875  }
876  counter[d] = ibox->min[d];
877  d++;
878  }
879  /* That's it, cannot increment any more! */
880  if ( d == ndims )
881  return false;
882 
883  /* Increment complete! */
884  return true;
885 }
886 
887 static ND_STATS*
888 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
889 {
890  int stats_kind = STATISTIC_KIND_ND;
891  int rv;
892  ND_STATS *nd_stats;
893 
894  /* If we're in 2D mode, set the kind appropriately */
895  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
896 
897  /* Then read the geom status histogram from that */
898 
899 #if POSTGIS_PGSQL_VERSION < 100
900  {
901  float4 *floatptr;
902  int nvalues;
903 
904  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
905  NULL, NULL, NULL, &floatptr, &nvalues);
906 
907  if ( ! rv ) {
908  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
909  return NULL;
910  }
911 
912  /* Clone the stats here so we can release the attstatsslot immediately */
913  nd_stats = palloc(sizeof(float) * nvalues);
914  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
915 
916  /* Clean up */
917  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
918  }
919 #else /* PostgreSQL 10 or higher */
920  {
921  AttStatsSlot sslot;
922  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
923  ATTSTATSSLOT_NUMBERS);
924  if ( ! rv ) {
925  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
926  return NULL;
927  }
928 
929  /* Clone the stats here so we can release the attstatsslot immediately */
930  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
931  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
932 
933  free_attstatsslot(&sslot);
934  }
935 #endif
936 
937  return nd_stats;
938 }
939 
944 static ND_STATS*
945 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
946 {
947  HeapTuple stats_tuple = NULL;
948  ND_STATS *nd_stats;
949 
950  /* First pull the stats tuple for the whole tree */
951  if ( ! only_parent )
952  {
953  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
954  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
955  if ( stats_tuple )
956  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
957  }
958  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
959  if ( only_parent || ! stats_tuple )
960  {
961  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
962  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
963  if ( stats_tuple )
964  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
965  }
966  if ( ! stats_tuple )
967  {
968  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
969  return NULL;
970  }
971 
972  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
973  ReleaseSysCache(stats_tuple);
974  if ( ! nd_stats )
975  {
976  POSTGIS_DEBUGF(2,
977  "histogram for attribute %d of table \"%s\" does not exist?",
978  att_num, get_rel_name(table_oid));
979  }
980 
981  return nd_stats;
982 }
983 
992 static ND_STATS*
993 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
994 {
995  const char *att_name = text_to_cstring(att_text);
996  AttrNumber att_num;
997 
998  /* We know the name? Look up the num */
999  if ( att_text )
1000  {
1001  /* Get the attribute number */
1002  att_num = get_attnum(table_oid, att_name);
1003  if ( ! att_num ) {
1004  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1005  return NULL;
1006  }
1007  }
1008  else
1009  {
1010  elog(ERROR, "attribute name is null");
1011  return NULL;
1012  }
1013 
1014  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1015 }
1016 
1030 static float8
1032 {
1033  int ncells1, ncells2;
1034  int ndims1, ndims2, ndims;
1035  double ntuples_max;
1036  double ntuples_not_null1, ntuples_not_null2;
1037 
1038  ND_BOX extent1, extent2;
1039  ND_IBOX ibox1, ibox2;
1040  int at1[ND_DIMS];
1041  int at2[ND_DIMS];
1042  double min1[ND_DIMS];
1043  double width1[ND_DIMS];
1044  double cellsize1[ND_DIMS];
1045  int size2[ND_DIMS];
1046  double min2[ND_DIMS];
1047  double width2[ND_DIMS];
1048  double cellsize2[ND_DIMS];
1049  int size1[ND_DIMS];
1050  int d;
1051  double val = 0;
1052  float8 selectivity;
1053 
1054  /* Drop out on null inputs */
1055  if ( ! ( s1 && s2 ) )
1056  {
1057  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1058  return FALLBACK_ND_SEL;
1059  }
1060 
1061  /* We need to know how many cells each side has... */
1062  ncells1 = (int)roundf(s1->histogram_cells);
1063  ncells2 = (int)roundf(s2->histogram_cells);
1064 
1065  /* ...so that we can drive the summation loop with the smaller histogram. */
1066  if ( ncells1 > ncells2 )
1067  {
1068  const ND_STATS *stats_tmp = s1;
1069  s1 = s2;
1070  s2 = stats_tmp;
1071  }
1072 
1073  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1074  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1075 
1076  /* Re-read that info after the swap */
1077  ncells1 = (int)roundf(s1->histogram_cells);
1078  ncells2 = (int)roundf(s2->histogram_cells);
1079 
1080  /* Q: What's the largest possible join size these relations can create? */
1081  /* A: The product of the # of non-null rows in each relation. */
1082  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1083  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1084  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1085 
1086  /* Get the ndims as ints */
1087  ndims1 = (int)roundf(s1->ndims);
1088  ndims2 = (int)roundf(s2->ndims);
1089  ndims = Max(ndims1, ndims2);
1090 
1091  /* Get the extents */
1092  extent1 = s1->extent;
1093  extent2 = s2->extent;
1094 
1095  /* If relation stats do not intersect, join is very very selective. */
1096  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1097  {
1098  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1099  PG_RETURN_FLOAT8(0.0);
1100  }
1101 
1102  /*
1103  * First find the index range of the part of the smaller
1104  * histogram that overlaps the larger one.
1105  */
1106  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1107  {
1108  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1109  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1110  }
1111 
1112  /* Initialize counters / constants on s1 */
1113  for ( d = 0; d < ndims1; d++ )
1114  {
1115  at1[d] = ibox1.min[d];
1116  min1[d] = s1->extent.min[d];
1117  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1118  size1[d] = (int)roundf(s1->size[d]);
1119  cellsize1[d] = width1[d] / size1[d];
1120  }
1121 
1122  /* Initialize counters / constants on s2 */
1123  for ( d = 0; d < ndims2; d++ )
1124  {
1125  min2[d] = s2->extent.min[d];
1126  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1127  size2[d] = (int)roundf(s2->size[d]);
1128  cellsize2[d] = width2[d] / size2[d];
1129  }
1130 
1131  /* For each affected cell of s1... */
1132  do
1133  {
1134  double val1;
1135  /* Construct the bounds of this cell */
1136  ND_BOX nd_cell1;
1137  nd_box_init(&nd_cell1);
1138  for ( d = 0; d < ndims1; d++ )
1139  {
1140  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1141  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1142  }
1143 
1144  /* Find the cells of s2 that cell1 overlaps.. */
1145  nd_box_overlap(s2, &nd_cell1, &ibox2);
1146 
1147  /* Initialize counter */
1148  for ( d = 0; d < ndims2; d++ )
1149  {
1150  at2[d] = ibox2.min[d];
1151  }
1152 
1153  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1154 
1155  /* Get the value at this cell */
1156  val1 = s1->value[nd_stats_value_index(s1, at1)];
1157 
1158  /* For each overlapped cell of s2... */
1159  do
1160  {
1161  double ratio2;
1162  double val2;
1163 
1164  /* Construct the bounds of this cell */
1165  ND_BOX nd_cell2;
1166  nd_box_init(&nd_cell2);
1167  for ( d = 0; d < ndims2; d++ )
1168  {
1169  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1170  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1171  }
1172 
1173  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1174 
1175  /* Calculate overlap ratio of the cells */
1176  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1177 
1178  /* Multiply the cell counts, scaled by overlap ratio */
1179  val2 = s2->value[nd_stats_value_index(s2, at2)];
1180  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1181  val += val1 * (val2 * ratio2);
1182  }
1183  while ( nd_increment(&ibox2, ndims2, at2) );
1184 
1185  }
1186  while( nd_increment(&ibox1, ndims1, at1) );
1187 
1188  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1189 
1190  /*
1191  * In order to compare our total cell count "val" to the
1192  * ntuples_max, we need to scale val up to reflect a full
1193  * table estimate. So, multiply by ratio of table size to
1194  * sample size.
1195  */
1196  val *= (s1->table_features / s1->sample_features);
1197  val *= (s2->table_features / s2->sample_features);
1198 
1199  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1200 
1201  /*
1202  * Because the cell counts are over-determined due to
1203  * double counting of features that overlap multiple cells
1204  * (see the compute_gserialized_stats routine)
1205  * we also have to scale our cell count "val" *down*
1206  * to adjust for the double counting.
1207  */
1208 // val /= (s1->cells_covered / s1->histogram_features);
1209 // val /= (s2->cells_covered / s2->histogram_features);
1210 
1211  /*
1212  * Finally, the selectivity is the estimated number of
1213  * rows to be returned divided by the maximum possible
1214  * number of rows that can be returned.
1215  */
1216  selectivity = val / ntuples_max;
1217 
1218  /* Guard against over-estimates and crazy numbers :) */
1219  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1220  {
1221  selectivity = DEFAULT_ND_JOINSEL;
1222  }
1223  else if ( selectivity > 1.0 )
1224  {
1225  selectivity = 1.0;
1226  }
1227 
1228  return selectivity;
1229 }
1230 
1236 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1237 {
1238  PG_RETURN_DATUM(DirectFunctionCall5(
1240  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1241  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1242  Int32GetDatum(0) /* ND mode */
1243  ));
1244 }
1245 
1251 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1252 {
1253  PG_RETURN_DATUM(DirectFunctionCall5(
1255  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1256  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1257  Int32GetDatum(2) /* 2D mode */
1258  ));
1259 }
1260 
1270 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1271 {
1272  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1273  /* Oid operator = PG_GETARG_OID(1); */
1274  List *args = (List *) PG_GETARG_POINTER(2);
1275  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1276  int mode = PG_GETARG_INT32(4);
1277 
1278  Node *arg1, *arg2;
1279  Var *var1, *var2;
1280  Oid relid1, relid2;
1281 
1282  ND_STATS *stats1, *stats2;
1283  float8 selectivity;
1284 
1285  /* Only respond to an inner join/unknown context join */
1286  if (jointype != JOIN_INNER)
1287  {
1288  elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
1289  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1290  }
1291 
1292  /* Find Oids of the geometry columns we are working with */
1293  arg1 = (Node*) linitial(args);
1294  arg2 = (Node*) lsecond(args);
1295  var1 = (Var*) arg1;
1296  var2 = (Var*) arg2;
1297 
1298  /* We only do column joins right now, no functional joins */
1299  /* TODO: handle g1 && ST_Expand(g2) */
1300  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1301  {
1302  elog(DEBUG1, "%s called with arguments that are not column references", __func__);
1303  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1304  }
1305 
1306  /* What are the Oids of our tables/relations? */
1307  relid1 = getrelid(var1->varno, root->parse->rtable);
1308  relid2 = getrelid(var2->varno, root->parse->rtable);
1309 
1310  POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1311  get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1312 
1313  /* Pull the stats from the stats system. */
1314  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1315  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1316 
1317  /* If we can't get stats, we have to stop here! */
1318  if ( ! stats1 )
1319  {
1320  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
1321  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1322  }
1323  else if ( ! stats2 )
1324  {
1325  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1326  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1327  }
1328 
1329  selectivity = estimate_join_selectivity(stats1, stats2);
1330  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1331 
1332  pfree(stats1);
1333  pfree(stats2);
1334  PG_RETURN_FLOAT8(selectivity);
1335 }
1336 
1337 
1338 
1339 
1358 static void
1359 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1360  int sample_rows, double total_rows, int mode)
1361 {
1362  MemoryContext old_context;
1363  int d, i; /* Counters */
1364  int notnull_cnt = 0; /* # not null rows in the sample */
1365  int null_cnt = 0; /* # null rows in the sample */
1366  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1367 
1368  ND_STATS *nd_stats; /* Our histogram */
1369  size_t nd_stats_size; /* Size to allocate */
1370 
1371  double total_width = 0; /* # of bytes used by sample */
1372  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1373  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1374 
1375  ND_BOX sum; /* Sum of extents of sample boxes */
1376  ND_BOX avg; /* Avg of extents of sample boxes */
1377  ND_BOX stddev; /* StdDev of extents of sample boxes */
1378 
1379  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1380  ND_BOX sample_extent; /* Extent of the raw sample */
1381  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1382  ND_BOX histo_extent; /* Spatial extent of the histogram */
1383  ND_BOX histo_extent_new; /* Temporary variable */
1384  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1385  int histo_cells; /* Number of cells in the histogram */
1386  int histo_cells_new = 1; /* Temporary variable */
1387 
1388  int ndims = 2; /* Dimensionality of the sample */
1389  int histo_ndims = 0; /* Dimensionality of the histogram */
1390  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1391  double total_distribution; /* Total of sample_distribution */
1392 
1393  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1394  int stats_kind; /* And this is what? (2D vs ND) */
1395 
1396  /* Initialize sum and stddev */
1397  nd_box_init(&sum);
1398  nd_box_init(&stddev);
1399  nd_box_init(&avg);
1400  nd_box_init(&histo_extent);
1401  nd_box_init(&histo_extent_new);
1402 
1403  /*
1404  * This is where gserialized_analyze_nd
1405  * should put its' custom parameters.
1406  */
1407  /* void *mystats = stats->extra_data; */
1408 
1409  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1410  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1411  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1412 
1413  /*
1414  * We might need less space, but don't think
1415  * its worth saving...
1416  */
1417  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1418 
1419  /*
1420  * First scan:
1421  * o read boxes
1422  * o find dimensionality of the sample
1423  * o find extent of the sample
1424  * o count null-infinite/not-null values
1425  * o compute total_width
1426  * o compute total features's box area (for avgFeatureArea)
1427  * o sum features box coordinates (for standard deviation)
1428  */
1429  for ( i = 0; i < sample_rows; i++ )
1430  {
1431  Datum datum;
1432  GSERIALIZED *geom;
1433  GBOX gbox;
1434  ND_BOX *nd_box;
1435  bool is_null;
1436  bool is_copy;
1437 
1438  datum = fetchfunc(stats, i, &is_null);
1439 
1440  /* Skip all NULLs. */
1441  if ( is_null )
1442  {
1443  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1444  null_cnt++;
1445  continue;
1446  }
1447 
1448  /* Read the bounds from the gserialized. */
1449  geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1450  is_copy = VARATT_IS_EXTENDED(datum);
1451  if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1452  {
1453  /* Skip empties too. */
1454  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1455  continue;
1456  }
1457 
1458  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1459  if ( mode == 2 )
1460  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1461 
1462  /* Check bounds for validity (finite and not NaN) */
1463  if ( ! gbox_is_valid(&gbox) )
1464  {
1465  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1466  continue;
1467  }
1468 
1469  /*
1470  * In N-D mode, set the ndims to the maximum dimensionality found
1471  * in the sample. Otherwise, leave at ndims == 2.
1472  */
1473  if ( mode != 2 )
1474  ndims = Max(gbox_ndims(&gbox), ndims);
1475 
1476  /* Convert gbox to n-d box */
1477  nd_box = palloc(sizeof(ND_BOX));
1478  nd_box_from_gbox(&gbox, nd_box);
1479 
1480  /* Cache n-d bounding box */
1481  sample_boxes[notnull_cnt] = nd_box;
1482 
1483  /* Initialize sample extent before merging first entry */
1484  if ( ! notnull_cnt )
1485  nd_box_init_bounds(&sample_extent);
1486 
1487  /* Add current sample to overall sample extent */
1488  nd_box_merge(nd_box, &sample_extent);
1489 
1490  /* How many bytes does this sample use? */
1491  total_width += VARSIZE(geom);
1492 
1493  /* Add bounds coordinates to sums for stddev calculation */
1494  for ( d = 0; d < ndims; d++ )
1495  {
1496  sum.min[d] += nd_box->min[d];
1497  sum.max[d] += nd_box->max[d];
1498  }
1499 
1500  /* Increment our "good feature" count */
1501  notnull_cnt++;
1502 
1503  /* Free up memory if our sample geometry was copied */
1504  if ( is_copy )
1505  pfree(geom);
1506 
1507  /* Give backend a chance of interrupting us */
1508  vacuum_delay_point();
1509  }
1510 
1511  /*
1512  * We'll build a histogram having stats->attr->attstattarget cells
1513  * on each side, within reason... we'll use ndims*10000 as the
1514  * maximum number of cells.
1515  * Also, if we're sampling a relatively small table, we'll try to ensure that
1516  * we have an average of 5 features for each cell so the histogram isn't
1517  * so sparse.
1518  */
1519  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1520  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1521  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1522  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1523  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1524 
1525  /* If there's no useful features, we can't work out stats */
1526  if ( ! notnull_cnt )
1527  {
1528  elog(NOTICE, "no non-null/empty features, unable to compute statistics");
1529  stats->stats_valid = false;
1530  return;
1531  }
1532 
1533  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1534 
1535  /*
1536  * Second scan:
1537  * o compute standard deviation
1538  */
1539  for ( d = 0; d < ndims; d++ )
1540  {
1541  /* Calculate average bounds values */
1542  avg.min[d] = sum.min[d] / notnull_cnt;
1543  avg.max[d] = sum.max[d] / notnull_cnt;
1544 
1545  /* Calculate standard deviation for this dimension bounds */
1546  for ( i = 0; i < notnull_cnt; i++ )
1547  {
1548  const ND_BOX *ndb = sample_boxes[i];
1549  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1550  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1551  }
1552  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1553  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1554 
1555  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1556  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1557  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1558  }
1559 
1560  /*
1561  * Third scan:
1562  * o skip hard deviants
1563  * o compute new histogram box
1564  */
1565  nd_box_init_bounds(&histo_extent_new);
1566  for ( i = 0; i < notnull_cnt; i++ )
1567  {
1568  const ND_BOX *ndb = sample_boxes[i];
1569  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1570  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1571  {
1572  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1573  sample_boxes[i] = NULL;
1574  continue;
1575  }
1576  /* Expand our new box to fit all the other features. */
1577  nd_box_merge(ndb, &histo_extent_new);
1578  }
1579  /*
1580  * Expand the box slightly (1%) to avoid edge effects
1581  * with objects that are on the boundary
1582  */
1583  nd_box_expand(&histo_extent_new, 0.01);
1584  histo_extent = histo_extent_new;
1585 
1586  /*
1587  * How should we allocate our histogram cells to the
1588  * different dimensions? We can't do it by raw dimensional width,
1589  * because in x/y/z space, the z can have different units
1590  * from the x/y. Similarly for x/y/t space.
1591  * So, we instead calculate how much features overlap
1592  * each other in their dimension to figure out which
1593  * dimensions have useful selectivity characteristics (more
1594  * variability in density) and therefor would find
1595  * more cells useful (to distinguish between dense places and
1596  * homogeneous places).
1597  */
1598  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1599  sample_distribution);
1600 
1601  /*
1602  * The sample_distribution array now tells us how spread out the
1603  * data is in each dimension, so we use that data to allocate
1604  * the histogram cells we have available.
1605  * At this point, histo_cells_target is the approximate target number
1606  * of cells.
1607  */
1608 
1609  /*
1610  * Some dimensions have basically a uniform distribution, we want
1611  * to allocate no cells to those dimensions, only to dimensions
1612  * that have some interesting differences in data distribution.
1613  * Here we count up the number of interesting dimensions
1614  */
1615  for ( d = 0; d < ndims; d++ )
1616  {
1617  if ( sample_distribution[d] > 0 )
1618  histo_ndims++;
1619  }
1620 
1621  if ( histo_ndims == 0 )
1622  {
1623  /* Special case: all our dimensions had low variability! */
1624  /* We just divide the cells up evenly */
1625  POSTGIS_DEBUG(3, " special case: no axes have variability");
1626  histo_cells_new = 1;
1627  for ( d = 0; d < ndims; d++ )
1628  {
1629  histo_size[d] = 1 + (int)pow((double)histo_cells_target, 1/(double)ndims);
1630  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1631  histo_cells_new *= histo_size[d];
1632  }
1633  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1634  }
1635  else
1636  {
1637  /*
1638  * We're going to express the amount of variability in each dimension
1639  * as a proportion of the total variability and allocate cells in that
1640  * dimension relative to that proportion.
1641  */
1642  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1643  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1644  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1645  histo_cells_new = 1; /* For the number of cells in the final histogram */
1646  for ( d = 0; d < ndims; d++ )
1647  {
1648  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1649  {
1650  histo_size[d] = 1;
1651  }
1652  else /* Interesting dimension */
1653  {
1654  /* How does this dims variability compare to the total? */
1655  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1656  /*
1657  * Scale the target cells number by the # of dims and ratio,
1658  * then take the appropriate root to get the estimated number of cells
1659  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1660  */
1661  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1662  /* If something goes awry, just give this dim one slot */
1663  if ( ! histo_size[d] )
1664  histo_size[d] = 1;
1665  }
1666  histo_cells_new *= histo_size[d];
1667  }
1668  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1669  }
1670 
1671  /* Update histo_cells to the actual number of cells we need to allocate */
1672  histo_cells = histo_cells_new;
1673  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1674 
1675  /*
1676  * Create the histogram (ND_STATS) in the stats memory context
1677  */
1678  old_context = MemoryContextSwitchTo(stats->anl_context);
1679  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1680  nd_stats = palloc(nd_stats_size);
1681  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1682  MemoryContextSwitchTo(old_context);
1683 
1684  /* Initialize the #ND_STATS objects */
1685  nd_stats->ndims = ndims;
1686  nd_stats->extent = histo_extent;
1687  nd_stats->sample_features = sample_rows;
1688  nd_stats->table_features = total_rows;
1689  nd_stats->not_null_features = notnull_cnt;
1690  /* Copy in the histogram dimensions */
1691  for ( d = 0; d < ndims; d++ )
1692  nd_stats->size[d] = histo_size[d];
1693 
1694  /*
1695  * Fourth scan:
1696  * o fill histogram values with the proportion of
1697  * features' bbox overlaps: a feature's bvol
1698  * can fully overlap (1) or partially overlap
1699  * (fraction of 1) an histogram cell.
1700  *
1701  * Note that we are filling each cell with the "portion of
1702  * the feature's box that overlaps the cell". So, if we sum
1703  * up the values in the histogram, we could get the
1704  * histogram feature count.
1705  *
1706  */
1707  for ( i = 0; i < notnull_cnt; i++ )
1708  {
1709  const ND_BOX *nd_box;
1710  ND_IBOX nd_ibox;
1711  int at[ND_DIMS];
1712  int d;
1713  double num_cells = 0;
1714  double tmp_volume = 1.0;
1715  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1716  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1717  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1718 
1719  nd_box = sample_boxes[i];
1720  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1721 
1722  /* Give backend a chance of interrupting us */
1723  vacuum_delay_point();
1724 
1725  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1726  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1727  memset(at, 0, sizeof(int)*ND_DIMS);
1728 
1729  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1730  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1731  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1732 
1733  for ( d = 0; d < nd_stats->ndims; d++ )
1734  {
1735  /* Initialize the starting values */
1736  at[d] = nd_ibox.min[d];
1737  min[d] = nd_stats->extent.min[d];
1738  max[d] = nd_stats->extent.max[d];
1739  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1740 
1741  /* What's the volume (area) of this feature's box? */
1742  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1743  }
1744 
1745  /* Add feature volume (area) to our total */
1746  total_sample_volume += tmp_volume;
1747 
1748  /*
1749  * Move through all the overlaped histogram cells values and
1750  * add the box overlap proportion to them.
1751  */
1752  do
1753  {
1754  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1755  double ratio;
1756  /* Create a box for this histogram cell */
1757  for ( d = 0; d < nd_stats->ndims; d++ )
1758  {
1759  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1760  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1761  }
1762 
1763  /*
1764  * If a feature box is completely inside one cell the ratio will be
1765  * 1.0. If a feature box is 50% in two cells, each cell will get
1766  * 0.5 added on.
1767  */
1768  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1769  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1770  num_cells += ratio;
1771  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1772  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1773  }
1774  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1775 
1776  /* Keep track of overall number of overlaps counted */
1777  total_cell_count += num_cells;
1778  /* How many features have we added to this histogram? */
1779  histogram_features++;
1780  }
1781 
1782  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1783  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1784  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1785 
1786  /* Error out if we got no sample information */
1787  if ( ! histogram_features )
1788  {
1789  POSTGIS_DEBUG(3, " no stats have been gathered");
1790  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1791  stats->stats_valid = false;
1792  return;
1793  }
1794 
1795  nd_stats->histogram_features = histogram_features;
1796  nd_stats->histogram_cells = histo_cells;
1797  nd_stats->cells_covered = total_cell_count;
1798 
1799  /* Put this histogram data into the right slot/kind */
1800  if ( mode == 2 )
1801  {
1802  stats_slot = STATISTIC_SLOT_2D;
1803  stats_kind = STATISTIC_KIND_2D;
1804  }
1805  else
1806  {
1807  stats_slot = STATISTIC_SLOT_ND;
1808  stats_kind = STATISTIC_KIND_ND;
1809  }
1810 
1811  /* Write the statistics data */
1812  stats->stakind[stats_slot] = stats_kind;
1813  stats->staop[stats_slot] = InvalidOid;
1814  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1815  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1816  stats->stanullfrac = (float4)null_cnt/sample_rows;
1817  stats->stawidth = total_width/notnull_cnt;
1818  stats->stadistinct = -1.0;
1819  stats->stats_valid = true;
1820 
1821  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1822  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1823  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1824  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1825  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1826  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1827  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1828  /*
1829  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1830  */
1831 
1832  return;
1833 }
1834 
1835 
1853 static void
1854 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1855  int sample_rows, double total_rows)
1856 {
1857  /* 2D Mode */
1858  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1859  /* ND Mode */
1860  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1861 }
1862 
1863 
1892 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1893 {
1894  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1895  Form_pg_attribute attr = stats->attr;
1896 
1897  POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1898 
1899  /* If the attstattarget column is negative, use the default value */
1900  /* NB: it is okay to scribble on stats->attr since it's a copy */
1901  if (attr->attstattarget < 0)
1902  attr->attstattarget = default_statistics_target;
1903 
1904  POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1905 
1906  /* Setup the minimum rows and the algorithm function */
1907  stats->minrows = 300 * stats->attr->attstattarget;
1908  stats->compute_stats = compute_gserialized_stats;
1909 
1910  POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1911 
1912  /* Indicate we are done successfully */
1913  PG_RETURN_BOOL(true);
1914 }
1915 
1928 static float8
1929 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1930 {
1931  int d; /* counter */
1932  float8 selectivity;
1933  ND_BOX nd_box;
1934  ND_IBOX nd_ibox;
1935  int at[ND_DIMS];
1936  double cell_size[ND_DIMS];
1937  double min[ND_DIMS];
1938  double max[ND_DIMS];
1939  double total_count = 0.0;
1940  int ndims_max;
1941 
1942  /* Calculate the overlap of the box on the histogram */
1943  if ( ! nd_stats )
1944  {
1945  elog(NOTICE, " estimate_selectivity called with null input");
1946  return FALLBACK_ND_SEL;
1947  }
1948 
1949  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1950 
1951  /* Initialize nd_box. */
1952  nd_box_from_gbox(box, &nd_box);
1953 
1954  /*
1955  * To return 2D stats on an ND sample, we need to make the
1956  * 2D box cover the full range of the other dimensions in the
1957  * histogram.
1958  */
1959  POSTGIS_DEBUGF(3, " mode: %d", mode);
1960  if ( mode == 2 )
1961  {
1962  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1963  ndims_max = 2;
1964  }
1965 
1966  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1967  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1968 
1969  /*
1970  * Search box completely misses histogram extent?
1971  * We have to intersect in all N dimensions or else we have
1972  * zero interaction under the &&& operator. It's important
1973  * to short circuit in this case, as some of the tests below
1974  * will return junk results when run on non-intersecting inputs.
1975  */
1976  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1977  {
1978  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1979  return 0.0;
1980  }
1981 
1982  /* Search box completely contains histogram extent! */
1983  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1984  {
1985  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1986  return 1.0;
1987  }
1988 
1989  /* Calculate the overlap of the box on the histogram */
1990  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1991  {
1992  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1993  return FALLBACK_ND_SEL;
1994  }
1995 
1996  /* Work out some measurements of the histogram */
1997  for ( d = 0; d < nd_stats->ndims; d++ )
1998  {
1999  /* Cell size in each dim */
2000  min[d] = nd_stats->extent.min[d];
2001  max[d] = nd_stats->extent.max[d];
2002  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2003  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2004 
2005  /* Initialize the counter */
2006  at[d] = nd_ibox.min[d];
2007  }
2008 
2009  /* Move through all the overlap values and sum them */
2010  do
2011  {
2012  float cell_count, ratio;
2013  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2014 
2015  /* We have to pro-rate partially overlapped cells. */
2016  for ( d = 0; d < nd_stats->ndims; d++ )
2017  {
2018  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2019  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2020  }
2021 
2022  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2023  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2024 
2025  /* Add the pro-rated count for this cell to the overall total */
2026  total_count += cell_count * ratio;
2027  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2028  }
2029  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2030 
2031  /* Scale by the number of features in our histogram to get the proportion */
2032  selectivity = total_count / nd_stats->histogram_features;
2033 
2034  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2035  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2036  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2037  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2038 
2039  /* Prevent rounding overflows */
2040  if (selectivity > 1.0) selectivity = 1.0;
2041  else if (selectivity < 0.0) selectivity = 0.0;
2042 
2043  return selectivity;
2044 }
2045 
2046 
2047 
2053 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2054 {
2055  Oid table_oid = PG_GETARG_OID(0);
2056  text *att_text = PG_GETARG_TEXT_P(1);
2057  ND_STATS *nd_stats;
2058  char *str;
2059  text *json;
2060  int mode = 2; /* default to 2D mode */
2061  bool only_parent = false; /* default to whole tree stats */
2062 
2063  /* Check if we've been asked to not use 2d mode */
2064  if ( ! PG_ARGISNULL(2) )
2065  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2066 
2067  /* Check if we've been asked to only use stats from parent */
2068  if ( ! PG_ARGISNULL(3) )
2069  only_parent = PG_GETARG_BOOL(3);
2070 
2071  /* Retrieve the stats object */
2072  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2073  if ( ! nd_stats )
2074  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2075 
2076  /* Convert to JSON */
2077  str = nd_stats_to_json(nd_stats);
2078  json = cstring_to_text(str);
2079  pfree(str);
2080  pfree(nd_stats);
2081  PG_RETURN_TEXT_P(json);
2082 }
2083 
2084 
2090 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2091 {
2092  Oid table_oid = PG_GETARG_OID(0);
2093  text *att_text = PG_GETARG_TEXT_P(1);
2094  Datum geom_datum = PG_GETARG_DATUM(2);
2095  GBOX gbox; /* search box read from gserialized datum */
2096  float8 selectivity = 0;
2097  ND_STATS *nd_stats;
2098  int mode = 2; /* 2D mode by default */
2099 
2100  /* Check if we've been asked to not use 2d mode */
2101  if ( ! PG_ARGISNULL(3) )
2102  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2103 
2104  /* Retrieve the stats object */
2105  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2106 
2107  if ( ! nd_stats )
2108  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2109 
2110  /* Calculate the gbox */
2111  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2112  elog(ERROR, "unable to calculate bounding box from geometry");
2113 
2114  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2115 
2116  /* Do the estimation */
2117  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2118 
2119  pfree(nd_stats);
2120  PG_RETURN_FLOAT8(selectivity);
2121 }
2122 
2123 
2129 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2130 {
2131  Oid table_oid1 = PG_GETARG_OID(0);
2132  text *att_text1 = PG_GETARG_TEXT_P(1);
2133  Oid table_oid2 = PG_GETARG_OID(2);
2134  text *att_text2 = PG_GETARG_TEXT_P(3);
2135  ND_STATS *nd_stats1, *nd_stats2;
2136  float8 selectivity = 0;
2137  int mode = 2; /* 2D mode by default */
2138 
2139 
2140  /* Retrieve the stats object */
2141  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2142  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2143 
2144  if ( ! nd_stats1 )
2145  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2146 
2147  if ( ! nd_stats2 )
2148  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2149 
2150  /* Check if we've been asked to not use 2d mode */
2151  if ( ! PG_ARGISNULL(4) )
2152  {
2153  text *modetxt = PG_GETARG_TEXT_P(4);
2154  char *modestr = text_to_cstring(modetxt);
2155  if ( modestr[0] == 'N' )
2156  mode = 0;
2157  }
2158 
2159  /* Do the estimation */
2160  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2161 
2162  pfree(nd_stats1);
2163  pfree(nd_stats2);
2164  PG_RETURN_FLOAT8(selectivity);
2165 }
2166 
2172 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2173 {
2174  PG_RETURN_DATUM(DirectFunctionCall5(
2176  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2177  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2178  Int32GetDatum(2) /* 2-D mode */
2179  ));
2180 }
2181 
2187 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2188 {
2189  PG_RETURN_DATUM(DirectFunctionCall5(
2191  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2192  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2193  Int32GetDatum(0) /* N-D mode */
2194  ));
2195 }
2196 
2211 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2212 {
2213  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2214  /* Oid operator_oid = PG_GETARG_OID(1); */
2215  List *args = (List *) PG_GETARG_POINTER(2);
2216  /* int varRelid = PG_GETARG_INT32(3); */
2217  int mode = PG_GETARG_INT32(4);
2218 
2219  VariableStatData vardata;
2220  ND_STATS *nd_stats = NULL;
2221 
2222  Node *other;
2223  Var *self;
2224  GBOX search_box;
2225  float8 selectivity = 0;
2226 
2227  POSTGIS_DEBUG(2, "gserialized_gist_sel called");
2228 
2229  /*
2230  * TODO: This is a big one,
2231  * All this statistics code *only* tries to generate a valid
2232  * selectivity for && and &&&. That leaves all the other
2233  * geometry operators with bad stats! The selectivity
2234  * calculation should take account of the incoming operator
2235  * type and do the right thing.
2236  */
2237 
2238  /* Fail if not a binary opclause (probably shouldn't happen) */
2239  if (list_length(args) != 2)
2240  {
2241  POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
2242  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2243  }
2244 
2245  /* Find the constant part */
2246  other = (Node *) linitial(args);
2247  if ( ! IsA(other, Const) )
2248  {
2249  self = (Var *)other;
2250  other = (Node *) lsecond(args);
2251  }
2252  else
2253  {
2254  self = (Var *) lsecond(args);
2255  }
2256 
2257  if ( ! IsA(other, Const) )
2258  {
2259  POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
2260  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2261  }
2262 
2263  /* Convert the constant to a BOX */
2264  if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2265  {
2266  POSTGIS_DEBUG(3, "search box is EMPTY");
2267  PG_RETURN_FLOAT8(0.0);
2268  }
2269  POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
2270 
2271  /* Get pg_statistic row */
2272  examine_variable(root, (Node*)self, 0, &vardata);
2273  if ( vardata.statsTuple ) {
2274  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2275  }
2276  ReleaseVariableStats(vardata);
2277 
2278  if ( ! nd_stats )
2279  {
2280  POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
2281  PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
2282  }
2283 
2284  POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
2285 
2286  /* Do the estimation! */
2287  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2288  POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
2289 
2290  pfree(nd_stats);
2291  PG_RETURN_FLOAT8(selectivity);
2292 }
2293 
2294 
2295 
2302 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2303 {
2304  char *nsp = NULL;
2305  char *tbl = NULL;
2306  text *col = NULL;
2307  char *nsp_tbl = NULL;
2308  Oid tbl_oid, idx_oid;
2309  ND_STATS *nd_stats;
2310  GBOX *gbox = NULL;
2311  bool only_parent = false;
2312  int key_type;
2313 
2314  if ( PG_NARGS() == 4 )
2315  {
2316  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2317  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2318  col = PG_GETARG_TEXT_P(2);
2319  only_parent = PG_GETARG_BOOL(3);
2320  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2321  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2322  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2323  pfree(nsp_tbl);
2324  }
2325  else if ( PG_NARGS() == 3 )
2326  {
2327  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2328  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2329  col = PG_GETARG_TEXT_P(2);
2330  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2331  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2332  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2333  pfree(nsp_tbl);
2334  }
2335  else if ( PG_NARGS() == 2 )
2336  {
2337  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2338  col = PG_GETARG_TEXT_P(1);
2339  nsp_tbl = palloc(strlen(tbl) + 3);
2340  sprintf(nsp_tbl, "\"%s\"", tbl);
2341  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2342  pfree(nsp_tbl);
2343  }
2344  else
2345  {
2346  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2347  PG_RETURN_NULL();
2348  }
2349 
2350 #if 1
2351  /* Read the extent from the head of the spatial index, if there is one */
2352  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2353  if (!idx_oid)
2354  elog(DEBUG2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2355  gbox = spatial_index_read_extent(idx_oid, key_type);
2356 #endif
2357 
2358  /* Fall back to reading the stats, if no index answer */
2359  if (!gbox)
2360  {
2361  /* Estimated extent only returns 2D bounds, so use mode 2 */
2362  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2363 
2364  /* Error out on no stats */
2365  if ( ! nd_stats ) {
2366  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2367  PG_RETURN_NULL();
2368  }
2369 
2370  /* Construct the box */
2371  gbox = palloc(sizeof(GBOX));
2372  FLAGS_SET_GEODETIC(gbox->flags, 0);
2373  FLAGS_SET_Z(gbox->flags, 0);
2374  FLAGS_SET_M(gbox->flags, 0);
2375  gbox->xmin = nd_stats->extent.min[0];
2376  gbox->xmax = nd_stats->extent.max[0];
2377  gbox->ymin = nd_stats->extent.min[1];
2378  gbox->ymax = nd_stats->extent.max[1];
2379  pfree(nd_stats);
2380  }
2381 
2382  PG_RETURN_POINTER(gbox);
2383 }
2384 
2392 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2393 {
2394  if ( PG_NARGS() == 3 )
2395  {
2396  PG_RETURN_DATUM(
2397  DirectFunctionCall3(gserialized_estimated_extent,
2398  PG_GETARG_DATUM(0),
2399  PG_GETARG_DATUM(1),
2400  PG_GETARG_DATUM(2)));
2401  }
2402  else if ( PG_NARGS() == 2 )
2403  {
2404  PG_RETURN_DATUM(
2405  DirectFunctionCall2(gserialized_estimated_extent,
2406  PG_GETARG_DATUM(0),
2407  PG_GETARG_DATUM(1)));
2408  }
2409 
2410  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2411  PG_RETURN_NULL();
2412 }
2413 
2414 /************************************************************************/
2415 
2416 static Oid
2417 typname_to_oid(const char *typname)
2418 {
2419  Oid typoid = TypenameGetTypid(typname);
2420  if (OidIsValid(typoid) && get_typisdefined(typoid))
2421  return typoid;
2422  else
2423  return InvalidOid;
2424 }
2425 
2426 static Oid
2427 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
2428 {
2429  Relation tbl_rel;
2430  ListCell *lc;
2431  List *idx_list;
2432  Oid result = InvalidOid;
2433  char *colname = text_to_cstring(col);
2434 
2435  /* Lookup our spatial index key types */
2436  Oid b2d_oid = typname_to_oid(INDEX_KEY_2D);
2437  Oid gdx_oid = typname_to_oid(INDEX_KEY_ND);
2438 
2439  if (!(b2d_oid && gdx_oid))
2440  return InvalidOid;
2441 
2442  tbl_rel = RelationIdGetRelation(tbl_oid);
2443  idx_list = RelationGetIndexList(tbl_rel);
2444  RelationClose(tbl_rel);
2445 
2446  /* For each index associated with this table... */
2447  foreach(lc, idx_list)
2448  {
2449  Form_pg_class idx_form;
2450  HeapTuple idx_tup;
2451  int idx_relam;
2452  Oid idx_oid = lfirst_oid(lc);
2453 
2454  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2455  if (!HeapTupleIsValid(idx_tup))
2456  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2457  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2458  idx_relam = idx_form->relam;
2459  ReleaseSysCache(idx_tup);
2460 
2461  /* Does the index use a GIST access method? */
2462  if (idx_relam == GIST_AM_OID)
2463  {
2464  Form_pg_attribute att;
2465  Oid atttypid;
2466  /* Is the index on the column name we are looking for? */
2467  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2468  ObjectIdGetDatum(idx_oid),
2469  PointerGetDatum(colname));
2470  if (!HeapTupleIsValid(att_tup))
2471  continue;
2472 
2473  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2474  atttypid = att->atttypid;
2475  ReleaseSysCache(att_tup);
2476 
2477  /* Is the column actually spatial? */
2478  if (b2d_oid == atttypid || gdx_oid == atttypid)
2479  {
2480  /* Save result, clean up, and break out */
2481  result = idx_oid;
2482  if (key_type)
2483  *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2484  break;
2485  }
2486  }
2487  }
2488  return result;
2489 }
2490 
2491 static GBOX *
2492 spatial_index_read_extent(Oid idx_oid, int key_type)
2493 {
2494  BOX2DF *bounds_2df = NULL;
2495  GIDX *bounds_gidx = NULL;
2496  GBOX *gbox = NULL;
2497  Relation idx_rel;
2498  Buffer buffer;
2499  Page page;
2500  OffsetNumber offset;
2501  unsigned long offset_max;
2502 
2503  if (!idx_oid)
2504  return NULL;
2505 
2506  idx_rel = index_open(idx_oid, AccessExclusiveLock);
2507  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2508  page = (Page) BufferGetPage(buffer);
2509  offset = FirstOffsetNumber;
2510  offset_max = PageGetMaxOffsetNumber(page);
2511  while (offset <= offset_max)
2512  {
2513  ItemId iid = PageGetItemId(page, offset);
2514  IndexTuple ituple;
2515  if (!iid)
2516  {
2517  ReleaseBuffer(buffer);
2518  index_close(idx_rel, AccessExclusiveLock);
2519  return NULL;
2520  }
2521  ituple = (IndexTuple) PageGetItem(page, iid);
2522  if (!GistTupleIsInvalid(ituple))
2523  {
2524  bool isnull;
2525  Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
2526  if (!isnull)
2527  {
2528  if (key_type == STATISTIC_SLOT_2D)
2529  {
2530  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2531  if (bounds_2df)
2532  box2df_merge(bounds_2df, b);
2533  else
2534  bounds_2df = box2df_copy(b);
2535  }
2536  else
2537  {
2538  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2539  if (bounds_gidx)
2540  gidx_merge(&bounds_gidx, b);
2541  else
2542  bounds_gidx = gidx_copy(b);
2543  }
2544  }
2545  }
2546  offset++;
2547  }
2548 
2549  ReleaseBuffer(buffer);
2550  index_close(idx_rel, AccessExclusiveLock);
2551 
2552  if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2553  {
2554  if (box2df_is_empty(bounds_2df))
2555  return NULL;
2556  gbox = gbox_new(0);
2557  box2df_to_gbox_p(bounds_2df, gbox);
2558  }
2559  else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2560  {
2561  if (gidx_is_unknown(bounds_gidx))
2562  return NULL;
2563  gbox = gbox_new(0);
2564  gbox_from_gidx(bounds_gidx, gbox, 0);
2565  }
2566  else
2567  return NULL;
2568 
2569  return gbox;
2570 }
2571 
2572 /*
2573 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2574  RETURNS box2d
2575  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2576  LANGUAGE 'c' STABLE STRICT;
2577 */
2578 
2580 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2581 {
2582  GBOX *gbox = NULL;
2583  int key_type;
2584  Oid tbl_oid = PG_GETARG_DATUM(0);
2585  text *col = PG_GETARG_TEXT_P(1);
2586 
2587  Oid idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2588  if (!idx_oid)
2589  PG_RETURN_NULL();
2590 
2591  gbox = spatial_index_read_extent(idx_oid, key_type);
2592  if (!gbox)
2593  PG_RETURN_NULL();
2594  else
2595  PG_RETURN_POINTER(gbox);
2596 }
2597 
args
Definition: ovdump.py:44
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
Definition: g_serialized.c:639
char * text_to_cstring(const text *textptr)
#define INDEX_KEY_ND
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is &#39;N&#39;.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
GBOX * gbox_new(uint8_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: g_box.c:39
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
#define DEFAULT_ND_JOINSEL
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
#define NUM_BINS
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:88
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: g_box.c:204
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: g_box.c:399
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:142
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string...
Definition: stringbuffer.c:124
double xmax
Definition: liblwgeom.h:295
static Oid typname_to_oid(const char *typname)
#define ND_DIMS
The maximum number of dimensions our code can handle.
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
#define FALLBACK_ND_JOINSEL
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
BOX2DF * box2df_copy(BOX2DF *b)
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:148
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
#define LW_FAILURE
Definition: liblwgeom.h:78
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided...
Definition: stringbuffer.c:217
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:145
Datum buffer(PG_FUNCTION_ARGS)
double zmax
Definition: liblwgeom.h:299
double ymin
Definition: liblwgeom.h:296
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension...
double xmin
Definition: liblwgeom.h:294
#define INDEX_KEY_2D
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
float4 size[ND_DIMS]
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
int min[ND_DIMS]
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
double ymax
Definition: liblwgeom.h:297
N-dimensional box index type.
#define FLAGS_GET_Z(flags)
Macros for manipulating the &#39;flags&#39; byte.
Definition: liblwgeom.h:139
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator...
uint8_t flags
Definition: liblwgeom.h:293
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
#define STATISTIC_KIND_2D
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:76
int max[ND_DIMS]
bool gidx_is_unknown(const GIDX *a)
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
float4 min[ND_DIMS]
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
double mmin
Definition: liblwgeom.h:300
#define SDFACTOR
double zmin
Definition: liblwgeom.h:298
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:140
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
double mmax
Definition: liblwgeom.h:301
#define STATISTIC_SLOT_ND
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
int value
Definition: genraster.py:61
N-dimensional statistics structure.
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
GIDX * gidx_copy(GIDX *b)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we&#39;ll bother trying to compute statistics on.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
bool box2df_is_empty(const BOX2DF *a)
This library is the generic geometry handling section of PostGIS.
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
#define STATISTIC_KIND_ND
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:146