PostGIS  2.5.2dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
80 #endif
81 #include "utils/tqual.h"
82 #include "utils/builtins.h"
83 #include "utils/datum.h"
84 #include "utils/snapmgr.h"
85 #include "utils/fmgroids.h"
86 #include "funcapi.h"
87 #include "access/heapam.h"
88 #include "catalog/pg_type.h"
89 #include "access/relscan.h"
90 
91 #include "executor/spi.h"
92 #include "fmgr.h"
93 #include "commands/vacuum.h"
94 #include "nodes/relation.h"
95 #include "parser/parsetree.h"
96 #include "utils/array.h"
97 #include "utils/lsyscache.h"
98 #include "utils/builtins.h"
99 #include "utils/syscache.h"
100 #include "utils/rel.h"
101 #include "utils/selfuncs.h"
102 
103 #include "../postgis_config.h"
104 
105 #if POSTGIS_PGSQL_VERSION >= 93
106  #include "access/htup_details.h"
107 #endif
108 
109 #include "stringbuffer.h"
110 #include "liblwgeom.h"
111 #include "lwgeom_pg.h" /* For debugging macros. */
112 #include "gserialized_gist.h" /* For index common functions */
113 
114 #include <math.h>
115 #if HAVE_IEEEFP_H
116 #include <ieeefp.h>
117 #endif
118 #include <float.h>
119 #include <string.h>
120 #include <stdio.h>
121 #include <errno.h>
122 #include <ctype.h>
123 
124 
125 /************************************************************************/
126 
127 
128 /* Fall back to older finite() if necessary */
129 #ifndef HAVE_ISFINITE
130 # ifdef HAVE_GNU_ISFINITE
131 # define _GNU_SOURCE
132 # else
133 # define isfinite finite
134 # endif
135 #endif
136 
137 
138 /* Prototypes */
139 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
140 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
141 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
142 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
143 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
144 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
145 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
146 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
147 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
148 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
149 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
150 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
151 
152 /* Local prototypes */
153 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
154 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161 * Assign a number to the n-dimensional statistics kind
162 *
163 * tgl suggested:
164 *
165 * 1-100: reserved for assignment by the core Postgres project
166 * 100-199: reserved for assignment by PostGIS
167 * 200-9999: reserved for other globally-known stats kinds
168 * 10000-32767: reserved for private site-local use
169 */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 #define STATISTIC_SLOT_ND 0
173 #define STATISTIC_SLOT_2D 1
174 
175 /*
176 * To look-up the spatial index associated with a table we
177 * need to find GIST indexes using our spatial keys.
178 */
179 #define INDEX_KEY_ND "gidx"
180 #define INDEX_KEY_2D "box2df"
181 
182 /*
183 * The SD factor restricts the side of the statistics histogram
184 * based on the standard deviation of the extent of the data.
185 * SDFACTOR is the number of standard deviations from the mean
186 * the histogram will extend.
187 */
188 #define SDFACTOR 3.25
189 
195 #define ND_DIMS 4
196 
203 #define MIN_DIMENSION_WIDTH 0.000000001
204 
209 #define MAX_DIMENSION_WIDTH 1.0E+20
210 
214 #define DEFAULT_ND_SEL 0.0001
215 #define DEFAULT_ND_JOINSEL 0.001
216 
220 #define FALLBACK_ND_SEL 0.2
221 #define FALLBACK_ND_JOINSEL 0.3
222 
228 typedef struct ND_BOX_T
229 {
230  float4 min[ND_DIMS];
231  float4 max[ND_DIMS];
232 } ND_BOX;
233 
237 typedef struct ND_IBOX_T
238 {
239  int min[ND_DIMS];
240  int max[ND_DIMS];
241 } ND_IBOX;
242 
243 
250 typedef struct ND_STATS_T
251 {
252  /* Dimensionality of the histogram. */
253  float4 ndims;
254 
255  /* Size of n-d histogram in each dimension. */
256  float4 size[ND_DIMS];
257 
258  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
260 
261  /* How many rows in the table itself? */
263 
264  /* How many rows were in the sample that built this histogram? */
266 
267  /* How many not-Null/Empty features were in the sample? */
269 
270  /* How many features actually got sampled in the histogram? */
272 
273  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
275 
276  /* How many cells did those histogram features cover? */
277  /* Since we are pro-rating coverage, this number should */
278  /* now always equal histogram_features */
280 
281  /* Variable length # of floats for histogram */
282  float4 value[1];
283 } ND_STATS;
284 
285 
286 
287 
294 static int
295 gbox_ndims(const GBOX* gbox)
296 {
297  int dims = 2;
298  if ( FLAGS_GET_GEODETIC(gbox->flags) )
299  return 3;
300  if ( FLAGS_GET_Z(gbox->flags) )
301  dims++;
302  if ( FLAGS_GET_M(gbox->flags) )
303  dims++;
304  return dims;
305 }
306 
312 static int
313 text_p_get_mode(const text *txt)
314 {
315  int mode = 2;
316  char *modestr;
317  if (VARSIZE(txt) - VARHDRSZ <= 0)
318  return mode;
319  modestr = (char*)VARDATA(txt);
320  if ( modestr[0] == 'N' )
321  mode = 0;
322  return mode;
323 }
324 
325 
329 static int
330 cmp_int (const void *a, const void *b)
331 {
332  int ia = *((const int*)a);
333  int ib = *((const int*)b);
334 
335  if ( ia == ib )
336  return 0;
337  else if ( ia > ib )
338  return 1;
339  else
340  return -1;
341 }
342 
347 static int
348 range_quintile(int *vals, int nvals)
349 {
350  qsort(vals, nvals, sizeof(int), cmp_int);
351  return vals[4*nvals/5] - vals[nvals/5];
352 }
353 
357 static double
358 total_double(const double *vals, int nvals)
359 {
360  int i;
361  float total = 0;
362  /* Calculate total */
363  for ( i = 0; i < nvals; i++ )
364  total += vals[i];
365 
366  return total;
367 }
368 
369 #if POSTGIS_DEBUG_LEVEL >= 3
370 
374 static int
375 total_int(const int *vals, int nvals)
376 {
377  int i;
378  int total = 0;
379  /* Calculate total */
380  for ( i = 0; i < nvals; i++ )
381  total += vals[i];
382 
383  return total;
384 }
385 
389 static double
390 avg(const int *vals, int nvals)
391 {
392  int t = total_int(vals, nvals);
393  return (double)t / (double)nvals;
394 }
395 
399 static double
400 stddev(const int *vals, int nvals)
401 {
402  int i;
403  double sigma2 = 0;
404  double mean = avg(vals, nvals);
405 
406  /* Calculate sigma2 */
407  for ( i = 0; i < nvals; i++ )
408  {
409  double v = (double)(vals[i]);
410  sigma2 += (mean - v) * (mean - v);
411  }
412  return sqrt(sigma2 / nvals);
413 }
414 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
415 
420 static int
421 nd_stats_value_index(const ND_STATS *stats, int *indexes)
422 {
423  int d;
424  int accum = 1, vdx = 0;
425 
426  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
427  /* n-d histogram coordinate implies. */
428  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
429  for ( d = 0; d < (int)(stats->ndims); d++ )
430  {
431  int size = (int)(stats->size[d]);
432  if ( indexes[d] < 0 || indexes[d] >= size )
433  {
434  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
435  return -1;
436  }
437  vdx += indexes[d] * accum;
438  accum *= size;
439  }
440  return vdx;
441 }
442 
446 static char*
447 nd_box_to_json(const ND_BOX *nd_box, int ndims)
448 {
449  char *rv;
450  int i;
452 
453  stringbuffer_append(sb, "{\"min\":[");
454  for ( i = 0; i < ndims; i++ )
455  {
456  if ( i ) stringbuffer_append(sb, ",");
457  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
458  }
459  stringbuffer_append(sb, "],\"max\":[");
460  for ( i = 0; i < ndims; i++ )
461  {
462  if ( i ) stringbuffer_append(sb, ",");
463  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
464  }
465  stringbuffer_append(sb, "]}");
466 
469  return rv;
470 }
471 
472 
477 static char*
478 nd_stats_to_json(const ND_STATS *nd_stats)
479 {
480  char *json_extent, *str;
481  int d;
483  int ndims = (int)roundf(nd_stats->ndims);
484 
485  stringbuffer_append(sb, "{");
486  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
487 
488  /* Size */
489  stringbuffer_append(sb, "\"size\":[");
490  for ( d = 0; d < ndims; d++ )
491  {
492  if ( d ) stringbuffer_append(sb, ",");
493  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
494  }
495  stringbuffer_append(sb, "],");
496 
497  /* Extent */
498  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
499  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
500  pfree(json_extent);
501 
502  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
503  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
504  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
505  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
506  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
507  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
508  stringbuffer_append(sb, "}");
509 
510  str = stringbuffer_getstringcopy(sb);
512  return str;
513 }
514 
515 
521 // static char*
522 // nd_stats_to_grid(const ND_STATS *stats)
523 // {
524 // char *rv;
525 // int j, k;
526 // int sizex = (int)roundf(stats->size[0]);
527 // int sizey = (int)roundf(stats->size[1]);
528 // stringbuffer_t *sb = stringbuffer_create();
529 //
530 // for ( k = 0; k < sizey; k++ )
531 // {
532 // for ( j = 0; j < sizex; j++ )
533 // {
534 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
535 // }
536 // stringbuffer_append(sb, "\n");
537 // }
538 //
539 // rv = stringbuffer_getstringcopy(sb);
540 // stringbuffer_destroy(sb);
541 // return rv;
542 // }
543 
544 
546 static int
547 nd_box_merge(const ND_BOX *source, ND_BOX *target)
548 {
549  int d;
550  for ( d = 0; d < ND_DIMS; d++ )
551  {
552  target->min[d] = Min(target->min[d], source->min[d]);
553  target->max[d] = Max(target->max[d], source->max[d]);
554  }
555  return true;
556 }
557 
559 static int
561 {
562  memset(a, 0, sizeof(ND_BOX));
563  return true;
564 }
565 
571 static int
573 {
574  int d;
575  for ( d = 0; d < ND_DIMS; d++ )
576  {
577  a->min[d] = FLT_MAX;
578  a->max[d] = -1 * FLT_MAX;
579  }
580  return true;
581 }
582 
584 static void
585 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
586 {
587  int d = 0;
588  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
589 
590  nd_box_init(nd_box);
591  nd_box->min[d] = gbox->xmin;
592  nd_box->max[d] = gbox->xmax;
593  d++;
594  nd_box->min[d] = gbox->ymin;
595  nd_box->max[d] = gbox->ymax;
596  d++;
597  if ( FLAGS_GET_GEODETIC(gbox->flags) )
598  {
599  nd_box->min[d] = gbox->zmin;
600  nd_box->max[d] = gbox->zmax;
601  return;
602  }
603  if ( FLAGS_GET_Z(gbox->flags) )
604  {
605  nd_box->min[d] = gbox->zmin;
606  nd_box->max[d] = gbox->zmax;
607  d++;
608  }
609  if ( FLAGS_GET_M(gbox->flags) )
610  {
611  nd_box->min[d] = gbox->mmin;
612  nd_box->max[d] = gbox->mmax;
613  d++;
614  }
615  return;
616 }
617 
621 static int
622 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
623 {
624  int d;
625  for ( d = 0; d < ndims; d++ )
626  {
627  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
628  return false;
629  }
630  return true;
631 }
632 
636 static int
637 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
638 {
639  int d;
640  for ( d = 0; d < ndims; d++ )
641  {
642  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
643  return false;
644  }
645  return true;
646 }
647 
652 static int
653 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
654 {
655  int d;
656  double size;
657  for ( d = 0; d < ND_DIMS; d++ )
658  {
659  size = nd_box->max[d] - nd_box->min[d];
660  /* Avoid expanding boxes that are either too wide or too narrow*/
661  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
662  continue;
663  nd_box->min[d] -= size * expansion_factor / 2;
664  nd_box->max[d] += size * expansion_factor / 2;
665  }
666  return true;
667 }
668 
673 static inline int
674 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
675 {
676  int d;
677 
678  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
679 
680  /* Initialize ibox */
681  memset(nd_ibox, 0, sizeof(ND_IBOX));
682 
683  /* In each dimension... */
684  for ( d = 0; d < nd_stats->ndims; d++ )
685  {
686  double smin = nd_stats->extent.min[d];
687  double smax = nd_stats->extent.max[d];
688  double width = smax - smin;
689 
690  if (width < MIN_DIMENSION_WIDTH)
691  {
692  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
693  }
694  else
695  {
696  int size = (int)roundf(nd_stats->size[d]);
697 
698  /* ... find cells the box overlaps with in this dimension */
699  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
700  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
701 
702  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
703  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
704 
705  /* Push any out-of range values into range */
706  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
707  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
708  }
709  }
710  return true;
711 }
712 
716 static inline double
717 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
718 {
719  int d;
720  bool covered = true;
721  double ivol = 1.0;
722  double vol2 = 1.0;
723  double vol1 = 1.0;
724 
725  for ( d = 0 ; d < ndims; d++ )
726  {
727  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
728  return 0.0; /* Disjoint */
729 
730  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
731  covered = false;
732  }
733 
734  if ( covered )
735  return 1.0;
736 
737  for ( d = 0; d < ndims; d++ )
738  {
739  double width1 = b1->max[d] - b1->min[d];
740  double width2 = b2->max[d] - b2->min[d];
741  double imin, imax, iwidth;
742 
743  vol1 *= width1;
744  vol2 *= width2;
745 
746  imin = Max(b1->min[d], b2->min[d]);
747  imax = Min(b1->max[d], b2->max[d]);
748  iwidth = imax - imin;
749  iwidth = Max(0.0, iwidth);
750 
751  ivol *= iwidth;
752  }
753 
754  if ( vol2 == 0.0 )
755  return vol2;
756 
757  return ivol / vol2;
758 }
759 
760 /* How many bins shall we use in figuring out the distribution? */
761 #define NUM_BINS 50
762 
778 static int
779 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
780 {
781  int d, i, k, range;
782  int counts[NUM_BINS];
783  double smin, smax; /* Spatial min, spatial max */
784  double swidth; /* Spatial width of dimension */
785 #if POSTGIS_DEBUG_LEVEL >= 3
786  double average, sdev, sdev_ratio;
787 #endif
788  int bmin, bmax; /* Bin min, bin max */
789  const ND_BOX *ndb;
790 
791  /* For each dimension... */
792  for ( d = 0; d < ndims; d++ )
793  {
794  /* Initialize counts for this dimension */
795  memset(counts, 0, sizeof(counts));
796 
797  smin = extent->min[d];
798  smax = extent->max[d];
799  swidth = smax - smin;
800 
801  /* Don't try and calculate distribution of overly narrow */
802  /* or overly wide dimensions. Here we're being pretty geographical, */
803  /* expecting "normal" planar or geographic coordinates. */
804  /* Otherwise we have to "handle" +/- Inf bounded features and */
805  /* the assumptions needed for that are as bad as this hack. */
806  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
807  {
808  distribution[d] = 0;
809  continue;
810  }
811 
812  /* Sum up the overlaps of each feature with the dimensional bins */
813  for ( i = 0; i < num_boxes; i++ )
814  {
815  double minoffset, maxoffset;
816 
817  /* Skip null entries */
818  ndb = nd_boxes[i];
819  if ( ! ndb ) continue;
820 
821  /* Where does box fall relative to the working range */
822  minoffset = ndb->min[d] - smin;
823  maxoffset = ndb->max[d] - smin;
824 
825  /* Skip boxes that are outside our working range */
826  if ( minoffset < 0 || minoffset > swidth ||
827  maxoffset < 0 || maxoffset > swidth )
828  {
829  continue;
830  }
831 
832  /* What bins does this range correspond to? */
833  bmin = floor(NUM_BINS * minoffset / swidth);
834  bmax = floor(NUM_BINS * maxoffset / swidth);
835 
836  /* Should only happen when maxoffset==swidth */
837  bmax = bmax >= NUM_BINS ? NUM_BINS-1 : bmax;
838 
839  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
840 
841  /* Increment the counts in all the bins this feature overlaps */
842  for ( k = bmin; k <= bmax; k++ )
843  {
844  counts[k] += 1;
845  }
846 
847  }
848 
849  /* How dispersed is the distribution of features across bins? */
850  range = range_quintile(counts, NUM_BINS);
851 
852 #if POSTGIS_DEBUG_LEVEL >= 3
853  average = avg(counts, NUM_BINS);
854  sdev = stddev(counts, NUM_BINS);
855  sdev_ratio = sdev/average;
856 
857  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
858  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
859  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
860  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
861 #endif
862 
863  distribution[d] = range;
864  }
865 
866  return true;
867 }
868 
874 static inline int
875 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
876 {
877  int d = 0;
878 
879  while ( d < ndims )
880  {
881  if ( counter[d] < ibox->max[d] )
882  {
883  counter[d] += 1;
884  break;
885  }
886  counter[d] = ibox->min[d];
887  d++;
888  }
889  /* That's it, cannot increment any more! */
890  if ( d == ndims )
891  return false;
892 
893  /* Increment complete! */
894  return true;
895 }
896 
897 static ND_STATS*
898 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
899 {
900  int stats_kind = STATISTIC_KIND_ND;
901  int rv;
902  ND_STATS *nd_stats;
903 
904  /* If we're in 2D mode, set the kind appropriately */
905  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
906 
907  /* Then read the geom status histogram from that */
908 
909 #if POSTGIS_PGSQL_VERSION < 100
910  {
911  float4 *floatptr;
912  int nvalues;
913 
914  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
915  NULL, NULL, NULL, &floatptr, &nvalues);
916 
917  if ( ! rv ) {
918  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
919  return NULL;
920  }
921 
922  /* Clone the stats here so we can release the attstatsslot immediately */
923  nd_stats = palloc(sizeof(float) * nvalues);
924  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
925 
926  /* Clean up */
927  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
928  }
929 #else /* PostgreSQL 10 or higher */
930  {
931  AttStatsSlot sslot;
932  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
933  ATTSTATSSLOT_NUMBERS);
934  if ( ! rv ) {
935  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
936  return NULL;
937  }
938 
939  /* Clone the stats here so we can release the attstatsslot immediately */
940  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
941  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
942 
943  free_attstatsslot(&sslot);
944  }
945 #endif
946 
947  return nd_stats;
948 }
949 
954 static ND_STATS*
955 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
956 {
957  HeapTuple stats_tuple = NULL;
958  ND_STATS *nd_stats;
959 
960  /* First pull the stats tuple for the whole tree */
961  if ( ! only_parent )
962  {
963  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
964  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
965  if ( stats_tuple )
966  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
967  }
968  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
969  if ( only_parent || ! stats_tuple )
970  {
971  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
972  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
973  if ( stats_tuple )
974  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
975  }
976  if ( ! stats_tuple )
977  {
978  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
979  return NULL;
980  }
981 
982  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
983  ReleaseSysCache(stats_tuple);
984  if ( ! nd_stats )
985  {
986  POSTGIS_DEBUGF(2,
987  "histogram for attribute %d of table \"%s\" does not exist?",
988  att_num, get_rel_name(table_oid));
989  }
990 
991  return nd_stats;
992 }
993 
1002 static ND_STATS*
1003 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
1004 {
1005  const char *att_name = text_to_cstring(att_text);
1006  AttrNumber att_num;
1007 
1008  /* We know the name? Look up the num */
1009  if ( att_text )
1010  {
1011  /* Get the attribute number */
1012  att_num = get_attnum(table_oid, att_name);
1013  if ( ! att_num ) {
1014  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1015  return NULL;
1016  }
1017  }
1018  else
1019  {
1020  elog(ERROR, "attribute name is null");
1021  return NULL;
1022  }
1023 
1024  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1025 }
1026 
1040 static float8
1042 {
1043  int ncells1, ncells2;
1044  int ndims1, ndims2, ndims;
1045  double ntuples_max;
1046  double ntuples_not_null1, ntuples_not_null2;
1047 
1048  ND_BOX extent1, extent2;
1049  ND_IBOX ibox1, ibox2;
1050  int at1[ND_DIMS];
1051  int at2[ND_DIMS];
1052  double min1[ND_DIMS];
1053  double width1[ND_DIMS];
1054  double cellsize1[ND_DIMS];
1055  int size2[ND_DIMS];
1056  double min2[ND_DIMS];
1057  double width2[ND_DIMS];
1058  double cellsize2[ND_DIMS];
1059  int size1[ND_DIMS];
1060  int d;
1061  double val = 0;
1062  float8 selectivity;
1063 
1064  /* Drop out on null inputs */
1065  if ( ! ( s1 && s2 ) )
1066  {
1067  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1068  return FALLBACK_ND_SEL;
1069  }
1070 
1071  /* We need to know how many cells each side has... */
1072  ncells1 = (int)roundf(s1->histogram_cells);
1073  ncells2 = (int)roundf(s2->histogram_cells);
1074 
1075  /* ...so that we can drive the summation loop with the smaller histogram. */
1076  if ( ncells1 > ncells2 )
1077  {
1078  const ND_STATS *stats_tmp = s1;
1079  s1 = s2;
1080  s2 = stats_tmp;
1081  }
1082 
1083  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1084  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1085 
1086  /* Re-read that info after the swap */
1087  ncells1 = (int)roundf(s1->histogram_cells);
1088  ncells2 = (int)roundf(s2->histogram_cells);
1089 
1090  /* Q: What's the largest possible join size these relations can create? */
1091  /* A: The product of the # of non-null rows in each relation. */
1092  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1093  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1094  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1095 
1096  /* Get the ndims as ints */
1097  ndims1 = (int)roundf(s1->ndims);
1098  ndims2 = (int)roundf(s2->ndims);
1099  ndims = Max(ndims1, ndims2);
1100 
1101  /* Get the extents */
1102  extent1 = s1->extent;
1103  extent2 = s2->extent;
1104 
1105  /* If relation stats do not intersect, join is very very selective. */
1106  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1107  {
1108  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1109  PG_RETURN_FLOAT8(0.0);
1110  }
1111 
1112  /*
1113  * First find the index range of the part of the smaller
1114  * histogram that overlaps the larger one.
1115  */
1116  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1117  {
1118  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1119  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1120  }
1121 
1122  /* Initialize counters / constants on s1 */
1123  for ( d = 0; d < ndims1; d++ )
1124  {
1125  at1[d] = ibox1.min[d];
1126  min1[d] = s1->extent.min[d];
1127  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1128  size1[d] = (int)roundf(s1->size[d]);
1129  cellsize1[d] = width1[d] / size1[d];
1130  }
1131 
1132  /* Initialize counters / constants on s2 */
1133  for ( d = 0; d < ndims2; d++ )
1134  {
1135  min2[d] = s2->extent.min[d];
1136  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1137  size2[d] = (int)roundf(s2->size[d]);
1138  cellsize2[d] = width2[d] / size2[d];
1139  }
1140 
1141  /* For each affected cell of s1... */
1142  do
1143  {
1144  double val1;
1145  /* Construct the bounds of this cell */
1146  ND_BOX nd_cell1;
1147  nd_box_init(&nd_cell1);
1148  for ( d = 0; d < ndims1; d++ )
1149  {
1150  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1151  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1152  }
1153 
1154  /* Find the cells of s2 that cell1 overlaps.. */
1155  nd_box_overlap(s2, &nd_cell1, &ibox2);
1156 
1157  /* Initialize counter */
1158  for ( d = 0; d < ndims2; d++ )
1159  {
1160  at2[d] = ibox2.min[d];
1161  }
1162 
1163  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1164 
1165  /* Get the value at this cell */
1166  val1 = s1->value[nd_stats_value_index(s1, at1)];
1167 
1168  /* For each overlapped cell of s2... */
1169  do
1170  {
1171  double ratio2;
1172  double val2;
1173 
1174  /* Construct the bounds of this cell */
1175  ND_BOX nd_cell2;
1176  nd_box_init(&nd_cell2);
1177  for ( d = 0; d < ndims2; d++ )
1178  {
1179  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1180  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1181  }
1182 
1183  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1184 
1185  /* Calculate overlap ratio of the cells */
1186  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1187 
1188  /* Multiply the cell counts, scaled by overlap ratio */
1189  val2 = s2->value[nd_stats_value_index(s2, at2)];
1190  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1191  val += val1 * (val2 * ratio2);
1192  }
1193  while ( nd_increment(&ibox2, ndims2, at2) );
1194 
1195  }
1196  while( nd_increment(&ibox1, ndims1, at1) );
1197 
1198  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1199 
1200  /*
1201  * In order to compare our total cell count "val" to the
1202  * ntuples_max, we need to scale val up to reflect a full
1203  * table estimate. So, multiply by ratio of table size to
1204  * sample size.
1205  */
1206  val *= (s1->table_features / s1->sample_features);
1207  val *= (s2->table_features / s2->sample_features);
1208 
1209  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1210 
1211  /*
1212  * Because the cell counts are over-determined due to
1213  * double counting of features that overlap multiple cells
1214  * (see the compute_gserialized_stats routine)
1215  * we also have to scale our cell count "val" *down*
1216  * to adjust for the double counting.
1217  */
1218 // val /= (s1->cells_covered / s1->histogram_features);
1219 // val /= (s2->cells_covered / s2->histogram_features);
1220 
1221  /*
1222  * Finally, the selectivity is the estimated number of
1223  * rows to be returned divided by the maximum possible
1224  * number of rows that can be returned.
1225  */
1226  selectivity = val / ntuples_max;
1227 
1228  /* Guard against over-estimates and crazy numbers :) */
1229  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1230  {
1231  selectivity = DEFAULT_ND_JOINSEL;
1232  }
1233  else if ( selectivity > 1.0 )
1234  {
1235  selectivity = 1.0;
1236  }
1237 
1238  return selectivity;
1239 }
1240 
1246 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1247 {
1248  PG_RETURN_DATUM(DirectFunctionCall5(
1250  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1251  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1252  Int32GetDatum(0) /* ND mode */
1253  ));
1254 }
1255 
1261 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1262 {
1263  PG_RETURN_DATUM(DirectFunctionCall5(
1265  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1266  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1267  Int32GetDatum(2) /* 2D mode */
1268  ));
1269 }
1270 
1280 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1281 {
1282  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1283  /* Oid operator = PG_GETARG_OID(1); */
1284  List *args = (List *) PG_GETARG_POINTER(2);
1285  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1286  int mode = PG_GETARG_INT32(4);
1287 
1288  Node *arg1, *arg2;
1289  Var *var1, *var2;
1290  Oid relid1, relid2;
1291 
1292  ND_STATS *stats1, *stats2;
1293  float8 selectivity;
1294 
1295  /* Only respond to an inner join/unknown context join */
1296  if (jointype != JOIN_INNER)
1297  {
1298  elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
1299  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1300  }
1301 
1302  /* Find Oids of the geometry columns we are working with */
1303  arg1 = (Node*) linitial(args);
1304  arg2 = (Node*) lsecond(args);
1305  var1 = (Var*) arg1;
1306  var2 = (Var*) arg2;
1307 
1308  /* We only do column joins right now, no functional joins */
1309  /* TODO: handle g1 && ST_Expand(g2) */
1310  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1311  {
1312  elog(DEBUG1, "%s called with arguments that are not column references", __func__);
1313  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1314  }
1315 
1316  /* What are the Oids of our tables/relations? */
1317  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1318  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1319 
1320  POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1321  get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1322 
1323  /* Pull the stats from the stats system. */
1324  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1325  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1326 
1327  /* If we can't get stats, we have to stop here! */
1328  if ( ! stats1 )
1329  {
1330  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
1331  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1332  }
1333  else if ( ! stats2 )
1334  {
1335  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1336  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1337  }
1338 
1339  selectivity = estimate_join_selectivity(stats1, stats2);
1340  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1341 
1342  pfree(stats1);
1343  pfree(stats2);
1344  PG_RETURN_FLOAT8(selectivity);
1345 }
1346 
1347 
1348 
1349 
1368 static void
1369 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1370  int sample_rows, double total_rows, int mode)
1371 {
1372  MemoryContext old_context;
1373  int d, i; /* Counters */
1374  int notnull_cnt = 0; /* # not null rows in the sample */
1375  int null_cnt = 0; /* # null rows in the sample */
1376  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1377 
1378  ND_STATS *nd_stats; /* Our histogram */
1379  size_t nd_stats_size; /* Size to allocate */
1380 
1381  double total_width = 0; /* # of bytes used by sample */
1382  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1383  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1384 
1385  ND_BOX sum; /* Sum of extents of sample boxes */
1386  ND_BOX avg; /* Avg of extents of sample boxes */
1387  ND_BOX stddev; /* StdDev of extents of sample boxes */
1388 
1389  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1390  ND_BOX sample_extent; /* Extent of the raw sample */
1391  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1392  ND_BOX histo_extent; /* Spatial extent of the histogram */
1393  ND_BOX histo_extent_new; /* Temporary variable */
1394  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1395  int histo_cells; /* Number of cells in the histogram */
1396  int histo_cells_new = 1; /* Temporary variable */
1397 
1398  int ndims = 2; /* Dimensionality of the sample */
1399  int histo_ndims = 0; /* Dimensionality of the histogram */
1400  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1401  double total_distribution; /* Total of sample_distribution */
1402 
1403  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1404  int stats_kind; /* And this is what? (2D vs ND) */
1405 
1406  /* Initialize sum and stddev */
1407  nd_box_init(&sum);
1408  nd_box_init(&stddev);
1409  nd_box_init(&avg);
1410  nd_box_init(&histo_extent);
1411  nd_box_init(&histo_extent_new);
1412 
1413  /*
1414  * This is where gserialized_analyze_nd
1415  * should put its' custom parameters.
1416  */
1417  /* void *mystats = stats->extra_data; */
1418 
1419  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1420  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1421  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1422 
1423  /*
1424  * We might need less space, but don't think
1425  * its worth saving...
1426  */
1427  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1428 
1429  /*
1430  * First scan:
1431  * o read boxes
1432  * o find dimensionality of the sample
1433  * o find extent of the sample
1434  * o count null-infinite/not-null values
1435  * o compute total_width
1436  * o compute total features's box area (for avgFeatureArea)
1437  * o sum features box coordinates (for standard deviation)
1438  */
1439  for ( i = 0; i < sample_rows; i++ )
1440  {
1441  Datum datum;
1442  GSERIALIZED *geom;
1443  GBOX gbox;
1444  ND_BOX *nd_box;
1445  bool is_null;
1446  bool is_copy;
1447 
1448  datum = fetchfunc(stats, i, &is_null);
1449 
1450  /* Skip all NULLs. */
1451  if ( is_null )
1452  {
1453  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1454  null_cnt++;
1455  continue;
1456  }
1457 
1458  /* Read the bounds from the gserialized. */
1459  geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1460  is_copy = VARATT_IS_EXTENDED(datum);
1461  if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1462  {
1463  /* Skip empties too. */
1464  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1465  continue;
1466  }
1467 
1468  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1469  if ( mode == 2 )
1470  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1471 
1472  /* Check bounds for validity (finite and not NaN) */
1473  if ( ! gbox_is_valid(&gbox) )
1474  {
1475  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1476  continue;
1477  }
1478 
1479  /*
1480  * In N-D mode, set the ndims to the maximum dimensionality found
1481  * in the sample. Otherwise, leave at ndims == 2.
1482  */
1483  if ( mode != 2 )
1484  ndims = Max(gbox_ndims(&gbox), ndims);
1485 
1486  /* Convert gbox to n-d box */
1487  nd_box = palloc(sizeof(ND_BOX));
1488  nd_box_from_gbox(&gbox, nd_box);
1489 
1490  /* Cache n-d bounding box */
1491  sample_boxes[notnull_cnt] = nd_box;
1492 
1493  /* Initialize sample extent before merging first entry */
1494  if ( ! notnull_cnt )
1495  nd_box_init_bounds(&sample_extent);
1496 
1497  /* Add current sample to overall sample extent */
1498  nd_box_merge(nd_box, &sample_extent);
1499 
1500  /* How many bytes does this sample use? */
1501  total_width += VARSIZE(geom);
1502 
1503  /* Add bounds coordinates to sums for stddev calculation */
1504  for ( d = 0; d < ndims; d++ )
1505  {
1506  sum.min[d] += nd_box->min[d];
1507  sum.max[d] += nd_box->max[d];
1508  }
1509 
1510  /* Increment our "good feature" count */
1511  notnull_cnt++;
1512 
1513  /* Free up memory if our sample geometry was copied */
1514  if ( is_copy )
1515  pfree(geom);
1516 
1517  /* Give backend a chance of interrupting us */
1518  vacuum_delay_point();
1519  }
1520 
1521  /*
1522  * We'll build a histogram having stats->attr->attstattarget cells
1523  * on each side, within reason... we'll use ndims*10000 as the
1524  * maximum number of cells.
1525  * Also, if we're sampling a relatively small table, we'll try to ensure that
1526  * we have an average of 5 features for each cell so the histogram isn't
1527  * so sparse.
1528  */
1529  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1530  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1531  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1532  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1533  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1534 
1535  /* If there's no useful features, we can't work out stats */
1536  if ( ! notnull_cnt )
1537  {
1538  elog(NOTICE, "no non-null/empty features, unable to compute statistics");
1539  stats->stats_valid = false;
1540  return;
1541  }
1542 
1543  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1544 
1545  /*
1546  * Second scan:
1547  * o compute standard deviation
1548  */
1549  for ( d = 0; d < ndims; d++ )
1550  {
1551  /* Calculate average bounds values */
1552  avg.min[d] = sum.min[d] / notnull_cnt;
1553  avg.max[d] = sum.max[d] / notnull_cnt;
1554 
1555  /* Calculate standard deviation for this dimension bounds */
1556  for ( i = 0; i < notnull_cnt; i++ )
1557  {
1558  const ND_BOX *ndb = sample_boxes[i];
1559  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1560  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1561  }
1562  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1563  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1564 
1565  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1566  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1567  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1568  }
1569 
1570  /*
1571  * Third scan:
1572  * o skip hard deviants
1573  * o compute new histogram box
1574  */
1575  nd_box_init_bounds(&histo_extent_new);
1576  for ( i = 0; i < notnull_cnt; i++ )
1577  {
1578  const ND_BOX *ndb = sample_boxes[i];
1579  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1580  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1581  {
1582  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1583  sample_boxes[i] = NULL;
1584  continue;
1585  }
1586  /* Expand our new box to fit all the other features. */
1587  nd_box_merge(ndb, &histo_extent_new);
1588  }
1589  /*
1590  * Expand the box slightly (1%) to avoid edge effects
1591  * with objects that are on the boundary
1592  */
1593  nd_box_expand(&histo_extent_new, 0.01);
1594  histo_extent = histo_extent_new;
1595 
1596  /*
1597  * How should we allocate our histogram cells to the
1598  * different dimensions? We can't do it by raw dimensional width,
1599  * because in x/y/z space, the z can have different units
1600  * from the x/y. Similarly for x/y/t space.
1601  * So, we instead calculate how much features overlap
1602  * each other in their dimension to figure out which
1603  * dimensions have useful selectivity characteristics (more
1604  * variability in density) and therefor would find
1605  * more cells useful (to distinguish between dense places and
1606  * homogeneous places).
1607  */
1608  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1609  sample_distribution);
1610 
1611  /*
1612  * The sample_distribution array now tells us how spread out the
1613  * data is in each dimension, so we use that data to allocate
1614  * the histogram cells we have available.
1615  * At this point, histo_cells_target is the approximate target number
1616  * of cells.
1617  */
1618 
1619  /*
1620  * Some dimensions have basically a uniform distribution, we want
1621  * to allocate no cells to those dimensions, only to dimensions
1622  * that have some interesting differences in data distribution.
1623  * Here we count up the number of interesting dimensions
1624  */
1625  for ( d = 0; d < ndims; d++ )
1626  {
1627  if ( sample_distribution[d] > 0 )
1628  histo_ndims++;
1629  }
1630 
1631  if ( histo_ndims == 0 )
1632  {
1633  /* Special case: all our dimensions had low variability! */
1634  /* We just divide the cells up evenly */
1635  POSTGIS_DEBUG(3, " special case: no axes have variability");
1636  histo_cells_new = 1;
1637  for ( d = 0; d < ndims; d++ )
1638  {
1639  histo_size[d] = 1 + (int)pow((double)histo_cells_target, 1/(double)ndims);
1640  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1641  histo_cells_new *= histo_size[d];
1642  }
1643  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1644  }
1645  else
1646  {
1647  /*
1648  * We're going to express the amount of variability in each dimension
1649  * as a proportion of the total variability and allocate cells in that
1650  * dimension relative to that proportion.
1651  */
1652  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1653  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1654  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1655  histo_cells_new = 1; /* For the number of cells in the final histogram */
1656  for ( d = 0; d < ndims; d++ )
1657  {
1658  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1659  {
1660  histo_size[d] = 1;
1661  }
1662  else /* Interesting dimension */
1663  {
1664  /* How does this dims variability compare to the total? */
1665  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1666  /*
1667  * Scale the target cells number by the # of dims and ratio,
1668  * then take the appropriate root to get the estimated number of cells
1669  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1670  */
1671  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1672  /* If something goes awry, just give this dim one slot */
1673  if ( ! histo_size[d] )
1674  histo_size[d] = 1;
1675  }
1676  histo_cells_new *= histo_size[d];
1677  }
1678  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1679  }
1680 
1681  /* Update histo_cells to the actual number of cells we need to allocate */
1682  histo_cells = histo_cells_new;
1683  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1684 
1685  /*
1686  * Create the histogram (ND_STATS) in the stats memory context
1687  */
1688  old_context = MemoryContextSwitchTo(stats->anl_context);
1689  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1690  nd_stats = palloc(nd_stats_size);
1691  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1692  MemoryContextSwitchTo(old_context);
1693 
1694  /* Initialize the #ND_STATS objects */
1695  nd_stats->ndims = ndims;
1696  nd_stats->extent = histo_extent;
1697  nd_stats->sample_features = sample_rows;
1698  nd_stats->table_features = total_rows;
1699  nd_stats->not_null_features = notnull_cnt;
1700  /* Copy in the histogram dimensions */
1701  for ( d = 0; d < ndims; d++ )
1702  nd_stats->size[d] = histo_size[d];
1703 
1704  /*
1705  * Fourth scan:
1706  * o fill histogram values with the proportion of
1707  * features' bbox overlaps: a feature's bvol
1708  * can fully overlap (1) or partially overlap
1709  * (fraction of 1) an histogram cell.
1710  *
1711  * Note that we are filling each cell with the "portion of
1712  * the feature's box that overlaps the cell". So, if we sum
1713  * up the values in the histogram, we could get the
1714  * histogram feature count.
1715  *
1716  */
1717  for ( i = 0; i < notnull_cnt; i++ )
1718  {
1719  const ND_BOX *nd_box;
1720  ND_IBOX nd_ibox;
1721  int at[ND_DIMS];
1722  int d;
1723  double num_cells = 0;
1724  double tmp_volume = 1.0;
1725  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1726  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1727  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1728 
1729  nd_box = sample_boxes[i];
1730  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1731 
1732  /* Give backend a chance of interrupting us */
1733  vacuum_delay_point();
1734 
1735  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1736  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1737  memset(at, 0, sizeof(int)*ND_DIMS);
1738 
1739  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1740  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1741  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1742 
1743  for ( d = 0; d < nd_stats->ndims; d++ )
1744  {
1745  /* Initialize the starting values */
1746  at[d] = nd_ibox.min[d];
1747  min[d] = nd_stats->extent.min[d];
1748  max[d] = nd_stats->extent.max[d];
1749  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1750 
1751  /* What's the volume (area) of this feature's box? */
1752  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1753  }
1754 
1755  /* Add feature volume (area) to our total */
1756  total_sample_volume += tmp_volume;
1757 
1758  /*
1759  * Move through all the overlaped histogram cells values and
1760  * add the box overlap proportion to them.
1761  */
1762  do
1763  {
1764  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1765  double ratio;
1766  /* Create a box for this histogram cell */
1767  for ( d = 0; d < nd_stats->ndims; d++ )
1768  {
1769  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1770  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1771  }
1772 
1773  /*
1774  * If a feature box is completely inside one cell the ratio will be
1775  * 1.0. If a feature box is 50% in two cells, each cell will get
1776  * 0.5 added on.
1777  */
1778  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1779  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1780  num_cells += ratio;
1781  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1782  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1783  }
1784  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1785 
1786  /* Keep track of overall number of overlaps counted */
1787  total_cell_count += num_cells;
1788  /* How many features have we added to this histogram? */
1789  histogram_features++;
1790  }
1791 
1792  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1793  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1794  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1795 
1796  /* Error out if we got no sample information */
1797  if ( ! histogram_features )
1798  {
1799  POSTGIS_DEBUG(3, " no stats have been gathered");
1800  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1801  stats->stats_valid = false;
1802  return;
1803  }
1804 
1805  nd_stats->histogram_features = histogram_features;
1806  nd_stats->histogram_cells = histo_cells;
1807  nd_stats->cells_covered = total_cell_count;
1808 
1809  /* Put this histogram data into the right slot/kind */
1810  if ( mode == 2 )
1811  {
1812  stats_slot = STATISTIC_SLOT_2D;
1813  stats_kind = STATISTIC_KIND_2D;
1814  }
1815  else
1816  {
1817  stats_slot = STATISTIC_SLOT_ND;
1818  stats_kind = STATISTIC_KIND_ND;
1819  }
1820 
1821  /* Write the statistics data */
1822  stats->stakind[stats_slot] = stats_kind;
1823  stats->staop[stats_slot] = InvalidOid;
1824  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1825  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1826  stats->stanullfrac = (float4)null_cnt/sample_rows;
1827  stats->stawidth = total_width/notnull_cnt;
1828  stats->stadistinct = -1.0;
1829  stats->stats_valid = true;
1830 
1831  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1832  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1833  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1834  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1835  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1836  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1837  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1838  /*
1839  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1840  */
1841 
1842  return;
1843 }
1844 
1845 
1863 static void
1864 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1865  int sample_rows, double total_rows)
1866 {
1867  /* 2D Mode */
1868  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1869  /* ND Mode */
1870  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1871 }
1872 
1873 
1902 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1903 {
1904  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1905  Form_pg_attribute attr = stats->attr;
1906 
1907  POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1908 
1909  /* If the attstattarget column is negative, use the default value */
1910  /* NB: it is okay to scribble on stats->attr since it's a copy */
1911  if (attr->attstattarget < 0)
1912  attr->attstattarget = default_statistics_target;
1913 
1914  POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1915 
1916  /* Setup the minimum rows and the algorithm function.
1917  * 300 matches the default value set in
1918  * postgresql/src/backend/commands/analyze.c */
1919  stats->minrows = 300 * stats->attr->attstattarget;
1920  stats->compute_stats = compute_gserialized_stats;
1921 
1922  POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1923 
1924  /* Indicate we are done successfully */
1925  PG_RETURN_BOOL(true);
1926 }
1927 
1940 static float8
1941 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1942 {
1943  int d; /* counter */
1944  float8 selectivity;
1945  ND_BOX nd_box;
1946  ND_IBOX nd_ibox;
1947  int at[ND_DIMS];
1948  double cell_size[ND_DIMS];
1949  double min[ND_DIMS];
1950  double max[ND_DIMS];
1951  double total_count = 0.0;
1952  int ndims_max;
1953 
1954  /* Calculate the overlap of the box on the histogram */
1955  if ( ! nd_stats )
1956  {
1957  elog(NOTICE, " estimate_selectivity called with null input");
1958  return FALLBACK_ND_SEL;
1959  }
1960 
1961  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1962 
1963  /* Initialize nd_box. */
1964  nd_box_from_gbox(box, &nd_box);
1965 
1966  /*
1967  * To return 2D stats on an ND sample, we need to make the
1968  * 2D box cover the full range of the other dimensions in the
1969  * histogram.
1970  */
1971  POSTGIS_DEBUGF(3, " mode: %d", mode);
1972  if ( mode == 2 )
1973  {
1974  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1975  ndims_max = 2;
1976  }
1977 
1978  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1979  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1980 
1981  /*
1982  * Search box completely misses histogram extent?
1983  * We have to intersect in all N dimensions or else we have
1984  * zero interaction under the &&& operator. It's important
1985  * to short circuit in this case, as some of the tests below
1986  * will return junk results when run on non-intersecting inputs.
1987  */
1988  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1989  {
1990  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1991  return 0.0;
1992  }
1993 
1994  /* Search box completely contains histogram extent! */
1995  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1996  {
1997  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1998  return 1.0;
1999  }
2000 
2001  /* Calculate the overlap of the box on the histogram */
2002  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2003  {
2004  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2005  return FALLBACK_ND_SEL;
2006  }
2007 
2008  /* Work out some measurements of the histogram */
2009  for ( d = 0; d < nd_stats->ndims; d++ )
2010  {
2011  /* Cell size in each dim */
2012  min[d] = nd_stats->extent.min[d];
2013  max[d] = nd_stats->extent.max[d];
2014  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2015  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2016 
2017  /* Initialize the counter */
2018  at[d] = nd_ibox.min[d];
2019  }
2020 
2021  /* Move through all the overlap values and sum them */
2022  do
2023  {
2024  float cell_count, ratio;
2025  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2026 
2027  /* We have to pro-rate partially overlapped cells. */
2028  for ( d = 0; d < nd_stats->ndims; d++ )
2029  {
2030  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2031  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2032  }
2033 
2034  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2035  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2036 
2037  /* Add the pro-rated count for this cell to the overall total */
2038  total_count += cell_count * ratio;
2039  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2040  }
2041  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2042 
2043  /* Scale by the number of features in our histogram to get the proportion */
2044  selectivity = total_count / nd_stats->histogram_features;
2045 
2046  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2047  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2048  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2049  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2050 
2051  /* Prevent rounding overflows */
2052  if (selectivity > 1.0) selectivity = 1.0;
2053  else if (selectivity < 0.0) selectivity = 0.0;
2054 
2055  return selectivity;
2056 }
2057 
2058 
2059 
2065 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2066 {
2067  Oid table_oid = PG_GETARG_OID(0);
2068  text *att_text = PG_GETARG_TEXT_P(1);
2069  ND_STATS *nd_stats;
2070  char *str;
2071  text *json;
2072  int mode = 2; /* default to 2D mode */
2073  bool only_parent = false; /* default to whole tree stats */
2074 
2075  /* Check if we've been asked to not use 2d mode */
2076  if ( ! PG_ARGISNULL(2) )
2077  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2078 
2079  /* Check if we've been asked to only use stats from parent */
2080  if ( ! PG_ARGISNULL(3) )
2081  only_parent = PG_GETARG_BOOL(3);
2082 
2083  /* Retrieve the stats object */
2084  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2085  if ( ! nd_stats )
2086  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2087 
2088  /* Convert to JSON */
2089  str = nd_stats_to_json(nd_stats);
2090  json = cstring_to_text(str);
2091  pfree(str);
2092  pfree(nd_stats);
2093  PG_RETURN_TEXT_P(json);
2094 }
2095 
2096 
2102 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2103 {
2104  Oid table_oid = PG_GETARG_OID(0);
2105  text *att_text = PG_GETARG_TEXT_P(1);
2106  Datum geom_datum = PG_GETARG_DATUM(2);
2107  GBOX gbox; /* search box read from gserialized datum */
2108  float8 selectivity = 0;
2109  ND_STATS *nd_stats;
2110  int mode = 2; /* 2D mode by default */
2111 
2112  /* Check if we've been asked to not use 2d mode */
2113  if ( ! PG_ARGISNULL(3) )
2114  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2115 
2116  /* Retrieve the stats object */
2117  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2118 
2119  if ( ! nd_stats )
2120  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2121 
2122  /* Calculate the gbox */
2123  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2124  elog(ERROR, "unable to calculate bounding box from geometry");
2125 
2126  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2127 
2128  /* Do the estimation */
2129  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2130 
2131  pfree(nd_stats);
2132  PG_RETURN_FLOAT8(selectivity);
2133 }
2134 
2135 
2141 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2142 {
2143  Oid table_oid1 = PG_GETARG_OID(0);
2144  text *att_text1 = PG_GETARG_TEXT_P(1);
2145  Oid table_oid2 = PG_GETARG_OID(2);
2146  text *att_text2 = PG_GETARG_TEXT_P(3);
2147  ND_STATS *nd_stats1, *nd_stats2;
2148  float8 selectivity = 0;
2149  int mode = 2; /* 2D mode by default */
2150 
2151 
2152  /* Retrieve the stats object */
2153  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2154  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2155 
2156  if ( ! nd_stats1 )
2157  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2158 
2159  if ( ! nd_stats2 )
2160  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2161 
2162  /* Check if we've been asked to not use 2d mode */
2163  if ( ! PG_ARGISNULL(4) )
2164  {
2165  text *modetxt = PG_GETARG_TEXT_P(4);
2166  char *modestr = text_to_cstring(modetxt);
2167  if ( modestr[0] == 'N' )
2168  mode = 0;
2169  }
2170 
2171  /* Do the estimation */
2172  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2173 
2174  pfree(nd_stats1);
2175  pfree(nd_stats2);
2176  PG_RETURN_FLOAT8(selectivity);
2177 }
2178 
2184 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2185 {
2186  PG_RETURN_DATUM(DirectFunctionCall5(
2188  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2189  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2190  Int32GetDatum(2) /* 2-D mode */
2191  ));
2192 }
2193 
2199 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2200 {
2201  PG_RETURN_DATUM(DirectFunctionCall5(
2203  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2204  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2205  Int32GetDatum(0) /* N-D mode */
2206  ));
2207 }
2208 
2223 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2224 {
2225  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2226  /* Oid operator_oid = PG_GETARG_OID(1); */
2227  List *args = (List *) PG_GETARG_POINTER(2);
2228  /* int varRelid = PG_GETARG_INT32(3); */
2229  int mode = PG_GETARG_INT32(4);
2230 
2231  VariableStatData vardata;
2232  ND_STATS *nd_stats = NULL;
2233 
2234  Node *other;
2235  Var *self;
2236  GBOX search_box;
2237  float8 selectivity = 0;
2238 
2239  POSTGIS_DEBUG(2, "gserialized_gist_sel called");
2240 
2241  /*
2242  * TODO: This is a big one,
2243  * All this statistics code *only* tries to generate a valid
2244  * selectivity for && and &&&. That leaves all the other
2245  * geometry operators with bad stats! The selectivity
2246  * calculation should take account of the incoming operator
2247  * type and do the right thing.
2248  */
2249 
2250  /* Fail if not a binary opclause (probably shouldn't happen) */
2251  if (list_length(args) != 2)
2252  {
2253  POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
2254  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2255  }
2256 
2257  /* Find the constant part */
2258  other = (Node *) linitial(args);
2259  if ( ! IsA(other, Const) )
2260  {
2261  self = (Var *)other;
2262  other = (Node *) lsecond(args);
2263  }
2264  else
2265  {
2266  self = (Var *) lsecond(args);
2267  }
2268 
2269  if ( ! IsA(other, Const) )
2270  {
2271  POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
2272  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2273  }
2274 
2275  /* Convert the constant to a BOX */
2276  if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2277  {
2278  POSTGIS_DEBUG(3, "search box is EMPTY");
2279  PG_RETURN_FLOAT8(0.0);
2280  }
2281  POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
2282 
2283  /* Get pg_statistic row */
2284  examine_variable(root, (Node*)self, 0, &vardata);
2285  if ( vardata.statsTuple ) {
2286  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2287  }
2288  ReleaseVariableStats(vardata);
2289 
2290  if ( ! nd_stats )
2291  {
2292  POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
2293  PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
2294  }
2295 
2296  POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
2297 
2298  /* Do the estimation! */
2299  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2300  POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
2301 
2302  pfree(nd_stats);
2303  PG_RETURN_FLOAT8(selectivity);
2304 }
2305 
2306 
2307 
2314 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2315 {
2316  char *nsp = NULL;
2317  char *tbl = NULL;
2318  text *col = NULL;
2319  char *nsp_tbl = NULL;
2320  Oid tbl_oid, idx_oid;
2321  ND_STATS *nd_stats;
2322  GBOX *gbox = NULL;
2323  bool only_parent = false;
2324  int key_type;
2325 
2326  if ( PG_NARGS() == 4 )
2327  {
2328  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2329  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2330  col = PG_GETARG_TEXT_P(2);
2331  only_parent = PG_GETARG_BOOL(3);
2332  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2333  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2334  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2335  pfree(nsp_tbl);
2336  }
2337  else if ( PG_NARGS() == 3 )
2338  {
2339  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2340  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2341  col = PG_GETARG_TEXT_P(2);
2342  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2343  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2344  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2345  pfree(nsp_tbl);
2346  }
2347  else if ( PG_NARGS() == 2 )
2348  {
2349  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2350  col = PG_GETARG_TEXT_P(1);
2351  nsp_tbl = palloc(strlen(tbl) + 3);
2352  sprintf(nsp_tbl, "\"%s\"", tbl);
2353  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2354  pfree(nsp_tbl);
2355  }
2356  else
2357  {
2358  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2359  PG_RETURN_NULL();
2360  }
2361 
2362 #if 1
2363  /* Read the extent from the head of the spatial index, if there is one */
2364  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2365  if (!idx_oid)
2366  elog(DEBUG2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2367  gbox = spatial_index_read_extent(idx_oid, key_type);
2368 #endif
2369 
2370  /* Fall back to reading the stats, if no index answer */
2371  if (!gbox)
2372  {
2373  /* Estimated extent only returns 2D bounds, so use mode 2 */
2374  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2375 
2376  /* Error out on no stats */
2377  if ( ! nd_stats ) {
2378  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2379  PG_RETURN_NULL();
2380  }
2381 
2382  /* Construct the box */
2383  gbox = palloc(sizeof(GBOX));
2384  FLAGS_SET_GEODETIC(gbox->flags, 0);
2385  FLAGS_SET_Z(gbox->flags, 0);
2386  FLAGS_SET_M(gbox->flags, 0);
2387  gbox->xmin = nd_stats->extent.min[0];
2388  gbox->xmax = nd_stats->extent.max[0];
2389  gbox->ymin = nd_stats->extent.min[1];
2390  gbox->ymax = nd_stats->extent.max[1];
2391  pfree(nd_stats);
2392  }
2393 
2394  PG_RETURN_POINTER(gbox);
2395 }
2396 
2404 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2405 {
2406  if ( PG_NARGS() == 3 )
2407  {
2408  PG_RETURN_DATUM(
2409  DirectFunctionCall3(gserialized_estimated_extent,
2410  PG_GETARG_DATUM(0),
2411  PG_GETARG_DATUM(1),
2412  PG_GETARG_DATUM(2)));
2413  }
2414  else if ( PG_NARGS() == 2 )
2415  {
2416  PG_RETURN_DATUM(
2417  DirectFunctionCall2(gserialized_estimated_extent,
2418  PG_GETARG_DATUM(0),
2419  PG_GETARG_DATUM(1)));
2420  }
2421 
2422  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2423  PG_RETURN_NULL();
2424 }
2425 
2426 /************************************************************************/
2427 
2428 static Oid
2429 typname_to_oid(const char *typname)
2430 {
2431  Oid typoid = TypenameGetTypid(typname);
2432  if (OidIsValid(typoid) && get_typisdefined(typoid))
2433  return typoid;
2434  else
2435  return InvalidOid;
2436 }
2437 
2438 static Oid
2439 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
2440 {
2441  Relation tbl_rel;
2442  ListCell *lc;
2443  List *idx_list;
2444  Oid result = InvalidOid;
2445  char *colname = text_to_cstring(col);
2446 
2447  /* Lookup our spatial index key types */
2448  Oid b2d_oid = typname_to_oid(INDEX_KEY_2D);
2449  Oid gdx_oid = typname_to_oid(INDEX_KEY_ND);
2450 
2451  if (!(b2d_oid && gdx_oid))
2452  return InvalidOid;
2453 
2454  tbl_rel = RelationIdGetRelation(tbl_oid);
2455  idx_list = RelationGetIndexList(tbl_rel);
2456  RelationClose(tbl_rel);
2457 
2458  /* For each index associated with this table... */
2459  foreach(lc, idx_list)
2460  {
2461  Form_pg_class idx_form;
2462  HeapTuple idx_tup;
2463  int idx_relam;
2464  Oid idx_oid = lfirst_oid(lc);
2465 
2466  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2467  if (!HeapTupleIsValid(idx_tup))
2468  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2469  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2470  idx_relam = idx_form->relam;
2471  ReleaseSysCache(idx_tup);
2472 
2473  /* Does the index use a GIST access method? */
2474  if (idx_relam == GIST_AM_OID)
2475  {
2476  Form_pg_attribute att;
2477  Oid atttypid;
2478  /* Is the index on the column name we are looking for? */
2479  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2480  ObjectIdGetDatum(idx_oid),
2481  PointerGetDatum(colname));
2482  if (!HeapTupleIsValid(att_tup))
2483  continue;
2484 
2485  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2486  atttypid = att->atttypid;
2487  ReleaseSysCache(att_tup);
2488 
2489  /* Is the column actually spatial? */
2490  if (b2d_oid == atttypid || gdx_oid == atttypid)
2491  {
2492  /* Save result, clean up, and break out */
2493  result = idx_oid;
2494  if (key_type)
2495  *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2496  break;
2497  }
2498  }
2499  }
2500  return result;
2501 }
2502 
2503 static GBOX *
2504 spatial_index_read_extent(Oid idx_oid, int key_type)
2505 {
2506  BOX2DF *bounds_2df = NULL;
2507  GIDX *bounds_gidx = NULL;
2508  GBOX *gbox = NULL;
2509  Relation idx_rel;
2510  Buffer buffer;
2511  Page page;
2512  OffsetNumber offset;
2513  unsigned long offset_max;
2514 
2515  if (!idx_oid)
2516  return NULL;
2517 
2518  idx_rel = index_open(idx_oid, AccessExclusiveLock);
2519  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2520  page = (Page) BufferGetPage(buffer);
2521  offset = FirstOffsetNumber;
2522  offset_max = PageGetMaxOffsetNumber(page);
2523  while (offset <= offset_max)
2524  {
2525  ItemId iid = PageGetItemId(page, offset);
2526  IndexTuple ituple;
2527  if (!iid)
2528  {
2529  ReleaseBuffer(buffer);
2530  index_close(idx_rel, AccessExclusiveLock);
2531  return NULL;
2532  }
2533  ituple = (IndexTuple) PageGetItem(page, iid);
2534  if (!GistTupleIsInvalid(ituple))
2535  {
2536  bool isnull;
2537  Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
2538  if (!isnull)
2539  {
2540  if (key_type == STATISTIC_SLOT_2D)
2541  {
2542  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2543  if (bounds_2df)
2544  box2df_merge(bounds_2df, b);
2545  else
2546  bounds_2df = box2df_copy(b);
2547  }
2548  else
2549  {
2550  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2551  if (bounds_gidx)
2552  gidx_merge(&bounds_gidx, b);
2553  else
2554  bounds_gidx = gidx_copy(b);
2555  }
2556  }
2557  }
2558  offset++;
2559  }
2560 
2561  ReleaseBuffer(buffer);
2562  index_close(idx_rel, AccessExclusiveLock);
2563 
2564  if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2565  {
2566  if (box2df_is_empty(bounds_2df))
2567  return NULL;
2568  gbox = gbox_new(0);
2569  box2df_to_gbox_p(bounds_2df, gbox);
2570  }
2571  else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2572  {
2573  if (gidx_is_unknown(bounds_gidx))
2574  return NULL;
2575  gbox = gbox_new(0);
2576  gbox_from_gidx(bounds_gidx, gbox, 0);
2577  }
2578  else
2579  return NULL;
2580 
2581  return gbox;
2582 }
2583 
2584 /*
2585 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2586  RETURNS box2d
2587  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2588  LANGUAGE 'c' STABLE STRICT;
2589 */
2590 
2592 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2593 {
2594  GBOX *gbox = NULL;
2595  int key_type;
2596  Oid tbl_oid = PG_GETARG_DATUM(0);
2597  text *col = PG_GETARG_TEXT_P(1);
2598 
2599  Oid idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2600  if (!idx_oid)
2601  PG_RETURN_NULL();
2602 
2603  gbox = spatial_index_read_extent(idx_oid, key_type);
2604  if (!gbox)
2605  PG_RETURN_NULL();
2606  else
2607  PG_RETURN_POINTER(gbox);
2608 }
2609 
args
Definition: ovdump.py:44
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
Definition: g_serialized.c:640
char * text_to_cstring(const text *textptr)
#define INDEX_KEY_ND
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is &#39;N&#39;.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
GBOX * gbox_new(uint8_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: g_box.c:39
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
#define DEFAULT_ND_JOINSEL
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:35
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
#define NUM_BINS
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: g_box.c:204
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we&#39;ll bother trying to compute statistics on.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: g_box.c:399
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:142
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string...
Definition: stringbuffer.c:160
double xmax
Definition: liblwgeom.h:295
static Oid typname_to_oid(const char *typname)
#define ND_DIMS
The maximum number of dimensions our code can handle.
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
#define FALLBACK_ND_JOINSEL
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
BOX2DF * box2df_copy(BOX2DF *b)
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:148
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
#define LW_FAILURE
Definition: liblwgeom.h:78
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided...
Definition: stringbuffer.c:253
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:145
Datum buffer(PG_FUNCTION_ARGS)
double zmax
Definition: liblwgeom.h:299
double ymin
Definition: liblwgeom.h:296
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension...
double xmin
Definition: liblwgeom.h:294
#define INDEX_KEY_2D
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
float4 size[ND_DIMS]
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
int min[ND_DIMS]
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
double ymax
Definition: liblwgeom.h:297
N-dimensional box index type.
#define FLAGS_GET_Z(flags)
Macros for manipulating the &#39;flags&#39; byte.
Definition: liblwgeom.h:139
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator...
uint8_t flags
Definition: liblwgeom.h:293
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.c:134
#define STATISTIC_KIND_2D
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:78
int max[ND_DIMS]
bool gidx_is_unknown(const GIDX *a)
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
float4 min[ND_DIMS]
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
double mmin
Definition: liblwgeom.h:300
#define SDFACTOR
double zmin
Definition: liblwgeom.h:298
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:140
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
double mmax
Definition: liblwgeom.h:301
#define STATISTIC_SLOT_ND
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
int value
Definition: genraster.py:61
N-dimensional statistics structure.
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
GIDX * gidx_copy(GIDX *b)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we&#39;ll bother trying to compute statistics on.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
bool box2df_is_empty(const BOX2DF *a)
This library is the generic geometry handling section of PostGIS.
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
#define STATISTIC_KIND_ND
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:146