PostGIS  3.2.2dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #if PG_VERSION_NUM < 130000
70 #include "access/tuptoaster.h" /* For toast_raw_datum_size */
71 #else
72 #include "access/detoast.h" /* For toast_raw_datum_size */
73 #endif
74 #include "utils/datum.h"
75 #include "access/heapam.h"
76 #include "catalog/index.h"
77 #include "catalog/pg_am.h"
78 #include "miscadmin.h"
79 #include "storage/lmgr.h"
80 #include "catalog/namespace.h"
81 #include "catalog/indexing.h"
82 #if PG_VERSION_NUM >= 100000
83 #include "utils/regproc.h"
84 #include "utils/varlena.h"
85 #endif
86 #include "utils/builtins.h"
87 #include "utils/datum.h"
88 #include "utils/snapmgr.h"
89 #include "utils/fmgroids.h"
90 #include "funcapi.h"
91 #include "access/heapam.h"
92 #include "catalog/pg_type.h"
93 #include "access/relscan.h"
94 
95 #include "executor/spi.h"
96 #include "fmgr.h"
97 #include "commands/vacuum.h"
98 #if PG_VERSION_NUM < 120000
99 #include "nodes/relation.h"
100 #else
101 #include "nodes/pathnodes.h"
102 #endif
103 #include "parser/parsetree.h"
104 #include "utils/array.h"
105 #include "utils/lsyscache.h"
106 #include "utils/builtins.h"
107 #include "utils/syscache.h"
108 #include "utils/rel.h"
109 #include "utils/selfuncs.h"
110 
111 #include "../postgis_config.h"
112 
113 #include "access/htup_details.h"
114 
115 #include "stringbuffer.h"
116 #include "liblwgeom.h"
117 #include "lwgeom_pg.h" /* For debugging macros. */
118 #include "gserialized_gist.h" /* For index common functions */
119 
120 #include <math.h>
121 #if HAVE_IEEEFP_H
122 #include <ieeefp.h>
123 #endif
124 #include <float.h>
125 #include <string.h>
126 #include <stdio.h>
127 #include <ctype.h>
128 
129 
130 /************************************************************************/
131 
132 
133 /* Prototypes */
134 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
135 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
136 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
137 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
138 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
139 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
140 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
141 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
142 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
143 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
144 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
145 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
146 
147 /* Local prototypes */
148 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num);
149 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num);
150 
151 
152 /* Other prototypes */
153 float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
154 float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161  * Assign a number to the n-dimensional statistics kind
162  *
163  * tgl suggested:
164  *
165  * 1-100: reserved for assignment by the core Postgres project
166  * 100-199: reserved for assignment by PostGIS
167  * 200-9999: reserved for other globally-known stats kinds
168  * 10000-32767: reserved for private site-local use
169  */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 
173 /*
174  * Postgres does not pin its slots and uses them as they come.
175  * We need to preserve its Correlation for brin to work
176  * 0 may be MCV
177  * 1 may be Histogram
178  * 2 may be Correlation
179  * We take 3 and 4.
180  */
181 #define STATISTIC_SLOT_ND 3
182 #define STATISTIC_SLOT_2D 4
183 
184 /*
185 * The SD factor restricts the side of the statistics histogram
186 * based on the standard deviation of the extent of the data.
187 * SDFACTOR is the number of standard deviations from the mean
188 * the histogram will extend.
189 */
190 #define SDFACTOR 3.25
191 
197 #define ND_DIMS 4
198 
205 #define MIN_DIMENSION_WIDTH 0.000000001
206 
211 #define MAX_DIMENSION_WIDTH 1.0E+20
212 
216 #define DEFAULT_ND_SEL 0.0001
217 #define DEFAULT_ND_JOINSEL 0.001
218 
222 #define FALLBACK_ND_SEL 0.2
223 #define FALLBACK_ND_JOINSEL 0.3
224 
230 typedef struct ND_BOX_T
231 {
232  float4 min[ND_DIMS];
233  float4 max[ND_DIMS];
235 
239 typedef struct ND_IBOX_T
240 {
241  int min[ND_DIMS];
242  int max[ND_DIMS];
244 
245 
252 typedef struct ND_STATS_T
253 {
254  /* Dimensionality of the histogram. */
255  float4 ndims;
256 
257  /* Size of n-d histogram in each dimension. */
258  float4 size[ND_DIMS];
259 
260  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
262 
263  /* How many rows in the table itself? */
265 
266  /* How many rows were in the sample that built this histogram? */
268 
269  /* How many not-Null/Empty features were in the sample? */
271 
272  /* How many features actually got sampled in the histogram? */
274 
275  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
277 
278  /* How many cells did those histogram features cover? */
279  /* Since we are pro-rating coverage, this number should */
280  /* now always equal histogram_features */
282 
283  /* Variable length # of floats for histogram */
284  float4 value[1];
286 
287 typedef struct {
288  /* Saved state from std_typanalyze() */
289  AnalyzeAttrComputeStatsFunc std_compute_stats;
292 
299 static int
300 gbox_ndims(const GBOX* gbox)
301 {
302  int dims = 2;
303  if ( FLAGS_GET_GEODETIC(gbox->flags) )
304  return 3;
305  if ( FLAGS_GET_Z(gbox->flags) )
306  dims++;
307  if ( FLAGS_GET_M(gbox->flags) )
308  dims++;
309  return dims;
310 }
311 
317 static int
318 text_p_get_mode(const text *txt)
319 {
320  int mode = 2;
321  char *modestr;
322  if (VARSIZE_ANY_EXHDR(txt) <= 0)
323  return mode;
324  modestr = (char*)VARDATA(txt);
325  if ( modestr[0] == 'N' )
326  mode = 0;
327  return mode;
328 }
329 
330 
334 static int
335 cmp_int (const void *a, const void *b)
336 {
337  int ia = *((const int*)a);
338  int ib = *((const int*)b);
339 
340  if ( ia == ib )
341  return 0;
342  else if ( ia > ib )
343  return 1;
344  else
345  return -1;
346 }
347 
352 static int
353 range_quintile(int *vals, int nvals)
354 {
355  qsort(vals, nvals, sizeof(int), cmp_int);
356  return vals[4*nvals/5] - vals[nvals/5];
357 }
358 
362 static double
363 total_double(const double *vals, int nvals)
364 {
365  int i;
366  float total = 0;
367  /* Calculate total */
368  for ( i = 0; i < nvals; i++ )
369  total += vals[i];
370 
371  return total;
372 }
373 
374 #if POSTGIS_DEBUG_LEVEL >= 3
375 
379 static int
380 total_int(const int *vals, int nvals)
381 {
382  int i;
383  int total = 0;
384  /* Calculate total */
385  for ( i = 0; i < nvals; i++ )
386  total += vals[i];
387 
388  return total;
389 }
390 
394 static double
395 avg(const int *vals, int nvals)
396 {
397  int t = total_int(vals, nvals);
398  return (double)t / (double)nvals;
399 }
400 
404 static double
405 stddev(const int *vals, int nvals)
406 {
407  int i;
408  double sigma2 = 0;
409  double mean = avg(vals, nvals);
410 
411  /* Calculate sigma2 */
412  for ( i = 0; i < nvals; i++ )
413  {
414  double v = (double)(vals[i]);
415  sigma2 += (mean - v) * (mean - v);
416  }
417  return sqrt(sigma2 / nvals);
418 }
419 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
420 
425 static int
426 nd_stats_value_index(const ND_STATS *stats, int *indexes)
427 {
428  int d;
429  int accum = 1, vdx = 0;
430 
431  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
432  /* n-d histogram coordinate implies. */
433  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
434  for ( d = 0; d < (int)(stats->ndims); d++ )
435  {
436  int size = (int)(stats->size[d]);
437  if ( indexes[d] < 0 || indexes[d] >= size )
438  {
439  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
440  return -1;
441  }
442  vdx += indexes[d] * accum;
443  accum *= size;
444  }
445  return vdx;
446 }
447 
451 static char*
452 nd_box_to_json(const ND_BOX *nd_box, int ndims)
453 {
454  char *rv;
455  int i;
457 
458  stringbuffer_append(sb, "{\"min\":[");
459  for ( i = 0; i < ndims; i++ )
460  {
461  if ( i ) stringbuffer_append(sb, ",");
462  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
463  }
464  stringbuffer_append(sb, "],\"max\":[");
465  for ( i = 0; i < ndims; i++ )
466  {
467  if ( i ) stringbuffer_append(sb, ",");
468  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
469  }
470  stringbuffer_append(sb, "]}");
471 
474  return rv;
475 }
476 
477 
482 static char*
483 nd_stats_to_json(const ND_STATS *nd_stats)
484 {
485  char *json_extent, *str;
486  int d;
488  int ndims = (int)roundf(nd_stats->ndims);
489 
490  stringbuffer_append(sb, "{");
491  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
492 
493  /* Size */
494  stringbuffer_append(sb, "\"size\":[");
495  for ( d = 0; d < ndims; d++ )
496  {
497  if ( d ) stringbuffer_append(sb, ",");
498  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
499  }
500  stringbuffer_append(sb, "],");
501 
502  /* Extent */
503  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
504  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
505  pfree(json_extent);
506 
507  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
508  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
509  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
510  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
511  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
512  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
513  stringbuffer_append(sb, "}");
514 
517  return str;
518 }
519 
520 
526 // static char*
527 // nd_stats_to_grid(const ND_STATS *stats)
528 // {
529 // char *rv;
530 // int j, k;
531 // int sizex = (int)roundf(stats->size[0]);
532 // int sizey = (int)roundf(stats->size[1]);
533 // stringbuffer_t *sb = stringbuffer_create();
534 //
535 // for ( k = 0; k < sizey; k++ )
536 // {
537 // for ( j = 0; j < sizex; j++ )
538 // {
539 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
540 // }
541 // stringbuffer_append(sb, "\n");
542 // }
543 //
544 // rv = stringbuffer_getstringcopy(sb);
545 // stringbuffer_destroy(sb);
546 // return rv;
547 // }
548 
549 
551 static int
552 nd_box_merge(const ND_BOX *source, ND_BOX *target)
553 {
554  int d;
555  for ( d = 0; d < ND_DIMS; d++ )
556  {
557  target->min[d] = Min(target->min[d], source->min[d]);
558  target->max[d] = Max(target->max[d], source->max[d]);
559  }
560  return true;
561 }
562 
564 static int
566 {
567  memset(a, 0, sizeof(ND_BOX));
568  return true;
569 }
570 
576 static int
578 {
579  int d;
580  for ( d = 0; d < ND_DIMS; d++ )
581  {
582  a->min[d] = FLT_MAX;
583  a->max[d] = -1 * FLT_MAX;
584  }
585  return true;
586 }
587 
589 static void
590 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
591 {
592  int d = 0;
593  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
594 
595  nd_box_init(nd_box);
596  nd_box->min[d] = gbox->xmin;
597  nd_box->max[d] = gbox->xmax;
598  d++;
599  nd_box->min[d] = gbox->ymin;
600  nd_box->max[d] = gbox->ymax;
601  d++;
602  if ( FLAGS_GET_GEODETIC(gbox->flags) )
603  {
604  nd_box->min[d] = gbox->zmin;
605  nd_box->max[d] = gbox->zmax;
606  return;
607  }
608  if ( FLAGS_GET_Z(gbox->flags) )
609  {
610  nd_box->min[d] = gbox->zmin;
611  nd_box->max[d] = gbox->zmax;
612  d++;
613  }
614  if ( FLAGS_GET_M(gbox->flags) )
615  {
616  nd_box->min[d] = gbox->mmin;
617  nd_box->max[d] = gbox->mmax;
618  d++;
619  }
620  return;
621 }
622 
626 static int
627 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
628 {
629  int d;
630  for ( d = 0; d < ndims; d++ )
631  {
632  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
633  return false;
634  }
635  return true;
636 }
637 
641 static int
642 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
643 {
644  int d;
645  for ( d = 0; d < ndims; d++ )
646  {
647  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
648  return false;
649  }
650  return true;
651 }
652 
657 static int
658 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
659 {
660  int d;
661  double size;
662  for ( d = 0; d < ND_DIMS; d++ )
663  {
664  size = nd_box->max[d] - nd_box->min[d];
665  /* Avoid expanding boxes that are either too wide or too narrow*/
666  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
667  continue;
668  nd_box->min[d] -= size * expansion_factor / 2;
669  nd_box->max[d] += size * expansion_factor / 2;
670  }
671  return true;
672 }
673 
678 static inline int
679 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
680 {
681  int d;
682 
683  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
684 
685  /* Initialize ibox */
686  memset(nd_ibox, 0, sizeof(ND_IBOX));
687 
688  /* In each dimension... */
689  for ( d = 0; d < nd_stats->ndims; d++ )
690  {
691  double smin = nd_stats->extent.min[d];
692  double smax = nd_stats->extent.max[d];
693  double width = smax - smin;
694 
695  if (width < MIN_DIMENSION_WIDTH)
696  {
697  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
698  }
699  else
700  {
701  int size = (int)roundf(nd_stats->size[d]);
702 
703  /* ... find cells the box overlaps with in this dimension */
704  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
705  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
706 
707  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
708  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
709 
710  /* Push any out-of range values into range */
711  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
712  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
713  }
714  }
715  return true;
716 }
717 
721 static inline double
722 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
723 {
724  int d;
725  bool covered = true;
726  double ivol = 1.0;
727  double vol2 = 1.0;
728  double vol1 = 1.0;
729 
730  for ( d = 0 ; d < ndims; d++ )
731  {
732  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
733  return 0.0; /* Disjoint */
734 
735  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
736  covered = false;
737  }
738 
739  if ( covered )
740  return 1.0;
741 
742  for ( d = 0; d < ndims; d++ )
743  {
744  double width1 = b1->max[d] - b1->min[d];
745  double width2 = b2->max[d] - b2->min[d];
746  double imin, imax, iwidth;
747 
748  vol1 = vol1 * width1;
749  vol2 = vol2 * width2;
750 
751  imin = Max(b1->min[d], b2->min[d]);
752  imax = Min(b1->max[d], b2->max[d]);
753  iwidth = imax - imin;
754  iwidth = Max(0.0, iwidth);
755 
756  ivol = ivol * iwidth;
757  }
758 
759  if ( vol2 == 0.0 )
760  return vol2;
761 
762  return ivol / vol2;
763 }
764 
765 /* How many bins shall we use in figuring out the distribution? */
766 #define NUM_BINS 50
767 
783 static int
784 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
785 {
786  int d, i, k, range;
787  int counts[NUM_BINS];
788  double smin, smax; /* Spatial min, spatial max */
789  double swidth; /* Spatial width of dimension */
790 #if POSTGIS_DEBUG_LEVEL >= 3
791  double average, sdev, sdev_ratio;
792 #endif
793  int bmin, bmax; /* Bin min, bin max */
794  const ND_BOX *ndb;
795 
796  /* For each dimension... */
797  for ( d = 0; d < ndims; d++ )
798  {
799  /* Initialize counts for this dimension */
800  memset(counts, 0, sizeof(counts));
801 
802  smin = extent->min[d];
803  smax = extent->max[d];
804  swidth = smax - smin;
805 
806  /* Don't try and calculate distribution of overly narrow */
807  /* or overly wide dimensions. Here we're being pretty geographical, */
808  /* expecting "normal" planar or geographic coordinates. */
809  /* Otherwise we have to "handle" +/- Inf bounded features and */
810  /* the assumptions needed for that are as bad as this hack. */
811  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
812  {
813  distribution[d] = 0;
814  continue;
815  }
816 
817  /* Sum up the overlaps of each feature with the dimensional bins */
818  for ( i = 0; i < num_boxes; i++ )
819  {
820  double minoffset, maxoffset;
821 
822  /* Skip null entries */
823  ndb = nd_boxes[i];
824  if ( ! ndb ) continue;
825 
826  /* Where does box fall relative to the working range */
827  minoffset = ndb->min[d] - smin;
828  maxoffset = ndb->max[d] - smin;
829 
830  /* Skip boxes that our outside our working range */
831  if ( minoffset < 0 || minoffset > swidth ||
832  maxoffset < 0 || maxoffset > swidth )
833  {
834  continue;
835  }
836 
837  /* What bins does this range correspond to? */
838  bmin = floor(NUM_BINS * minoffset / swidth);
839  bmax = floor(NUM_BINS * maxoffset / swidth);
840 
841  /* Should only happen when maxoffset==swidth */
842  if (bmax >= NUM_BINS)
843  bmax = NUM_BINS-1;
844 
845  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
846 
847  /* Increment the counts in all the bins this feature overlaps */
848  for ( k = bmin; k <= bmax; k++ )
849  {
850  counts[k] += 1;
851  }
852 
853  }
854 
855  /* How dispersed is the distribution of features across bins? */
856  range = range_quintile(counts, NUM_BINS);
857 
858 #if POSTGIS_DEBUG_LEVEL >= 3
859  average = avg(counts, NUM_BINS);
860  sdev = stddev(counts, NUM_BINS);
861  sdev_ratio = sdev/average;
862 
863  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
864  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
865  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
866  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
867 #endif
868 
869  distribution[d] = range;
870  }
871 
872  return true;
873 }
874 
880 static inline int
881 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
882 {
883  int d = 0;
884 
885  while ( d < ndims )
886  {
887  if ( counter[d] < ibox->max[d] )
888  {
889  counter[d] += 1;
890  break;
891  }
892  counter[d] = ibox->min[d];
893  d++;
894  }
895  /* That's it, cannot increment any more! */
896  if ( d == ndims )
897  return false;
898 
899  /* Increment complete! */
900  return true;
901 }
902 
903 static ND_STATS*
904 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
905 {
906  int stats_kind = STATISTIC_KIND_ND;
907  int rv;
908  ND_STATS *nd_stats;
909 
910  /* If we're in 2D mode, set the kind appropriately */
911  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
912 
913  /* Then read the geom status histogram from that */
914 
915 #if POSTGIS_PGSQL_VERSION < 100
916  {
917  float4 *floatptr;
918  int nvalues;
919 
920  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
921  NULL, NULL, NULL, &floatptr, &nvalues);
922 
923  if ( ! rv ) {
924  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
925  return NULL;
926  }
927 
928  /* Clone the stats here so we can release the attstatsslot immediately */
929  nd_stats = palloc(sizeof(float) * nvalues);
930  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
931 
932  /* Clean up */
933  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
934  }
935 #else /* PostgreSQL 10 or higher */
936  {
937  AttStatsSlot sslot;
938  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
939  ATTSTATSSLOT_NUMBERS);
940  if ( ! rv ) {
941  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
942  return NULL;
943  }
944 
945  /* Clone the stats here so we can release the attstatsslot immediately */
946  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
947  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
948 
949  free_attstatsslot(&sslot);
950  }
951 #endif
952 
953  return nd_stats;
954 }
955 
960 static ND_STATS*
961 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
962 {
963  HeapTuple stats_tuple = NULL;
964  ND_STATS *nd_stats;
965 
966  /* First pull the stats tuple for the whole tree */
967  if ( ! only_parent )
968  {
969  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
970  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
971  if ( stats_tuple )
972  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
973  }
974  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
975  if ( only_parent || ! stats_tuple )
976  {
977  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
978  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
979  if ( stats_tuple )
980  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
981  }
982  if ( ! stats_tuple )
983  {
984  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
985  return NULL;
986  }
987 
988  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
989  ReleaseSysCache(stats_tuple);
990  if ( ! nd_stats )
991  {
992  POSTGIS_DEBUGF(2,
993  "histogram for attribute %d of table \"%s\" does not exist?",
994  att_num, get_rel_name(table_oid));
995  }
996 
997  return nd_stats;
998 }
999 
1008 static ND_STATS*
1009 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
1010 {
1011  const char *att_name = text_to_cstring(att_text);
1012  AttrNumber att_num;
1013 
1014  /* We know the name? Look up the num */
1015  if ( att_text )
1016  {
1017  /* Get the attribute number */
1018  att_num = get_attnum(table_oid, att_name);
1019  if ( ! att_num ) {
1020  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1021  return NULL;
1022  }
1023  }
1024  else
1025  {
1026  elog(ERROR, "attribute name is null");
1027  return NULL;
1028  }
1029 
1030  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1031 }
1032 
1046 static float8
1048 {
1049  int ncells1, ncells2;
1050  int ndims1, ndims2, ndims;
1051  double ntuples_max;
1052  double ntuples_not_null1, ntuples_not_null2;
1053 
1054  ND_BOX extent1, extent2;
1055  ND_IBOX ibox1, ibox2;
1056  int at1[ND_DIMS];
1057  int at2[ND_DIMS];
1058  double min1[ND_DIMS];
1059  double width1[ND_DIMS];
1060  double cellsize1[ND_DIMS];
1061  int size2[ND_DIMS];
1062  double min2[ND_DIMS];
1063  double width2[ND_DIMS];
1064  double cellsize2[ND_DIMS];
1065  int size1[ND_DIMS];
1066  int d;
1067  double val = 0;
1068  float8 selectivity;
1069 
1070  /* Drop out on null inputs */
1071  if ( ! ( s1 && s2 ) )
1072  {
1073  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1074  return FALLBACK_ND_SEL;
1075  }
1076 
1077  /* We need to know how many cells each side has... */
1078  ncells1 = (int)roundf(s1->histogram_cells);
1079  ncells2 = (int)roundf(s2->histogram_cells);
1080 
1081  /* ...so that we can drive the summation loop with the smaller histogram. */
1082  if ( ncells1 > ncells2 )
1083  {
1084  const ND_STATS *stats_tmp = s1;
1085  s1 = s2;
1086  s2 = stats_tmp;
1087  }
1088 
1089  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1090  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1091 
1092  /* Re-read that info after the swap */
1093  ncells1 = (int)roundf(s1->histogram_cells);
1094  ncells2 = (int)roundf(s2->histogram_cells);
1095 
1096  /* Q: What's the largest possible join size these relations can create? */
1097  /* A: The product of the # of non-null rows in each relation. */
1098  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1099  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1100  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1101 
1102  /* Get the ndims as ints */
1103  ndims1 = (int)roundf(s1->ndims);
1104  ndims2 = (int)roundf(s2->ndims);
1105  ndims = Max(ndims1, ndims2);
1106 
1107  /* Get the extents */
1108  extent1 = s1->extent;
1109  extent2 = s2->extent;
1110 
1111  /* If relation stats do not intersect, join is very very selective. */
1112  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1113  {
1114  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1115  PG_RETURN_FLOAT8(0.0);
1116  }
1117 
1118  /*
1119  * First find the index range of the part of the smaller
1120  * histogram that overlaps the larger one.
1121  */
1122  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1123  {
1124  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1125  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1126  }
1127 
1128  /* Initialize counters / constants on s1 */
1129  for ( d = 0; d < ndims1; d++ )
1130  {
1131  at1[d] = ibox1.min[d];
1132  min1[d] = s1->extent.min[d];
1133  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1134  size1[d] = (int)roundf(s1->size[d]);
1135  cellsize1[d] = width1[d] / size1[d];
1136  }
1137 
1138  /* Initialize counters / constants on s2 */
1139  for ( d = 0; d < ndims2; d++ )
1140  {
1141  min2[d] = s2->extent.min[d];
1142  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1143  size2[d] = (int)roundf(s2->size[d]);
1144  cellsize2[d] = width2[d] / size2[d];
1145  }
1146 
1147  /* For each affected cell of s1... */
1148  do
1149  {
1150  double val1;
1151  /* Construct the bounds of this cell */
1152  ND_BOX nd_cell1;
1153  nd_box_init(&nd_cell1);
1154  for ( d = 0; d < ndims1; d++ )
1155  {
1156  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1157  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1158  }
1159 
1160  /* Find the cells of s2 that cell1 overlaps.. */
1161  nd_box_overlap(s2, &nd_cell1, &ibox2);
1162 
1163  /* Initialize counter */
1164  for ( d = 0; d < ndims2; d++ )
1165  {
1166  at2[d] = ibox2.min[d];
1167  }
1168 
1169  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1170 
1171  /* Get the value at this cell */
1172  val1 = s1->value[nd_stats_value_index(s1, at1)];
1173 
1174  /* For each overlapped cell of s2... */
1175  do
1176  {
1177  double ratio2;
1178  double val2;
1179 
1180  /* Construct the bounds of this cell */
1181  ND_BOX nd_cell2;
1182  nd_box_init(&nd_cell2);
1183  for ( d = 0; d < ndims2; d++ )
1184  {
1185  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1186  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1187  }
1188 
1189  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1190 
1191  /* Calculate overlap ratio of the cells */
1192  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1193 
1194  /* Multiply the cell counts, scaled by overlap ratio */
1195  val2 = s2->value[nd_stats_value_index(s2, at2)];
1196  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1197  val += val1 * (val2 * ratio2);
1198  }
1199  while ( nd_increment(&ibox2, ndims2, at2) );
1200 
1201  }
1202  while( nd_increment(&ibox1, ndims1, at1) );
1203 
1204  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1205 
1206  /*
1207  * In order to compare our total cell count "val" to the
1208  * ntuples_max, we need to scale val up to reflect a full
1209  * table estimate. So, multiply by ratio of table size to
1210  * sample size.
1211  */
1212  val *= (s1->table_features / s1->sample_features);
1213  val *= (s2->table_features / s2->sample_features);
1214 
1215  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1216 
1217  /*
1218  * Because the cell counts are over-determined due to
1219  * double counting of features that overlap multiple cells
1220  * (see the compute_gserialized_stats routine)
1221  * we also have to scale our cell count "val" *down*
1222  * to adjust for the double counting.
1223  */
1224 // val /= (s1->cells_covered / s1->histogram_features);
1225 // val /= (s2->cells_covered / s2->histogram_features);
1226 
1227  /*
1228  * Finally, the selectivity is the estimated number of
1229  * rows to be returned divided by the maximum possible
1230  * number of rows that can be returned.
1231  */
1232  selectivity = val / ntuples_max;
1233 
1234  /* Guard against over-estimates and crazy numbers :) */
1235  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1236  {
1237  selectivity = DEFAULT_ND_JOINSEL;
1238  }
1239  else if ( selectivity > 1.0 )
1240  {
1241  selectivity = 1.0;
1242  }
1243 
1244  return selectivity;
1245 }
1246 
1252 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1253 {
1254  PG_RETURN_DATUM(DirectFunctionCall5(
1256  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1257  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1258  Int32GetDatum(0) /* ND mode */
1259  ));
1260 }
1261 
1267 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1268 {
1269  PG_RETURN_DATUM(DirectFunctionCall5(
1271  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1272  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1273  Int32GetDatum(2) /* 2D mode */
1274  ));
1275 }
1276 
1277 double
1278 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1279 {
1280  float8 selectivity;
1281  Oid relid1, relid2;
1282  ND_STATS *stats1, *stats2;
1283  Node *arg1 = (Node*) linitial(args);
1284  Node *arg2 = (Node*) lsecond(args);
1285  Var *var1 = (Var*) arg1;
1286  Var *var2 = (Var*) arg2;
1287 
1288  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1289 
1290  /* We only do column joins right now, no functional joins */
1291  /* TODO: handle g1 && ST_Expand(g2) */
1292  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1293  {
1294  POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1295  return DEFAULT_ND_JOINSEL;
1296  }
1297 
1298  /* What are the Oids of our tables/relations? */
1299  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1300  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1301 
1302  /* Pull the stats from the stats system. */
1303  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1304  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1305 
1306  /* If we can't get stats, we have to stop here! */
1307  if (!stats1)
1308  {
1309  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1310  return DEFAULT_ND_JOINSEL;
1311  }
1312  else if (!stats2)
1313  {
1314  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1315  return DEFAULT_ND_JOINSEL;
1316  }
1317 
1318  selectivity = estimate_join_selectivity(stats1, stats2);
1319  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1320  pfree(stats1);
1321  pfree(stats2);
1322  return selectivity;
1323 }
1324 
1334 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1335 {
1336  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1337  /* Oid operator = PG_GETARG_OID(1); */
1338  List *args = (List *) PG_GETARG_POINTER(2);
1339  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1340  int mode = PG_GETARG_INT32(4);
1341 
1342  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1343 
1344  /* Check length of args and punt on > 2 */
1345  if (list_length(args) != 2)
1346  {
1347  POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1348  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1349  }
1350 
1351  /* Only respond to an inner join/unknown context join */
1352  if (jointype != JOIN_INNER)
1353  {
1354  POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1355  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1356  }
1357 
1358  PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1359 }
1360 
1379 static void
1380 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1381  int sample_rows, double total_rows, int mode)
1382 {
1383  MemoryContext old_context;
1384  int d, i; /* Counters */
1385  int notnull_cnt = 0; /* # not null rows in the sample */
1386  int null_cnt = 0; /* # null rows in the sample */
1387  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1388 
1389  ND_STATS *nd_stats; /* Our histogram */
1390  size_t nd_stats_size; /* Size to allocate */
1391 
1392  double total_width = 0; /* # of bytes used by sample */
1393  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1394  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1395 
1396  ND_BOX sum; /* Sum of extents of sample boxes */
1397  ND_BOX avg; /* Avg of extents of sample boxes */
1398  ND_BOX stddev; /* StdDev of extents of sample boxes */
1399 
1400  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1401  ND_BOX sample_extent; /* Extent of the raw sample */
1402  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1403  ND_BOX histo_extent; /* Spatial extent of the histogram */
1404  ND_BOX histo_extent_new; /* Temporary variable */
1405  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1406  int histo_cells; /* Number of cells in the histogram */
1407  int histo_cells_new = 1; /* Temporary variable */
1408 
1409  int ndims = 2; /* Dimensionality of the sample */
1410  int histo_ndims = 0; /* Dimensionality of the histogram */
1411  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1412  double total_distribution; /* Total of sample_distribution */
1413 
1414  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1415  int stats_kind; /* And this is what? (2D vs ND) */
1416 
1417  /* Initialize sum and stddev */
1418  nd_box_init(&sum);
1419  nd_box_init(&stddev);
1420  nd_box_init(&avg);
1421  nd_box_init(&histo_extent);
1422  nd_box_init(&histo_extent_new);
1423 
1424  /*
1425  * This is where gserialized_analyze_nd
1426  * should put its' custom parameters.
1427  */
1428  /* void *mystats = stats->extra_data; */
1429 
1430  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1431  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1432  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1433 
1434  /*
1435  * We might need less space, but don't think
1436  * its worth saving...
1437  */
1438  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1439 
1440  /*
1441  * First scan:
1442  * o read boxes
1443  * o find dimensionality of the sample
1444  * o find extent of the sample
1445  * o count null-infinite/not-null values
1446  * o compute total_width
1447  * o compute total features's box area (for avgFeatureArea)
1448  * o sum features box coordinates (for standard deviation)
1449  */
1450  for ( i = 0; i < sample_rows; i++ )
1451  {
1452  Datum datum;
1453  GBOX gbox = {0};
1454  ND_BOX *nd_box;
1455  bool is_null;
1456 
1457  datum = fetchfunc(stats, i, &is_null);
1458 
1459  /* Skip all NULLs. */
1460  if ( is_null )
1461  {
1462  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1463  null_cnt++;
1464  continue;
1465  }
1466 
1467  /* Read the bounds from the gserialized. */
1468  if (LW_FAILURE == gserialized_datum_get_gbox_p(datum, &gbox))
1469  {
1470  /* Skip empties too. */
1471  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1472  continue;
1473  }
1474 
1475  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1476  if ( mode == 2 )
1477  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1478 
1479  /* Check bounds for validity (finite and not NaN) */
1480  if ( ! gbox_is_valid(&gbox) )
1481  {
1482  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1483  continue;
1484  }
1485 
1486  /*
1487  * In N-D mode, set the ndims to the maximum dimensionality found
1488  * in the sample. Otherwise, leave at ndims == 2.
1489  */
1490  if ( mode != 2 )
1491  ndims = Max(gbox_ndims(&gbox), ndims);
1492 
1493  /* Convert gbox to n-d box */
1494  nd_box = palloc(sizeof(ND_BOX));
1495  nd_box_from_gbox(&gbox, nd_box);
1496 
1497  /* Cache n-d bounding box */
1498  sample_boxes[notnull_cnt] = nd_box;
1499 
1500  /* Initialize sample extent before merging first entry */
1501  if ( ! notnull_cnt )
1502  nd_box_init_bounds(&sample_extent);
1503 
1504  /* Add current sample to overall sample extent */
1505  nd_box_merge(nd_box, &sample_extent);
1506 
1507  /* How many bytes does this sample use? */
1508  total_width += toast_raw_datum_size(datum);
1509 
1510  /* Add bounds coordinates to sums for stddev calculation */
1511  for ( d = 0; d < ndims; d++ )
1512  {
1513  sum.min[d] += nd_box->min[d];
1514  sum.max[d] += nd_box->max[d];
1515  }
1516 
1517  /* Increment our "good feature" count */
1518  notnull_cnt++;
1519 
1520  /* Give backend a chance of interrupting us */
1521  vacuum_delay_point();
1522  }
1523 
1524  /*
1525  * We'll build a histogram having stats->attr->attstattarget cells
1526  * on each side, within reason... we'll use ndims*10000 as the
1527  * maximum number of cells.
1528  * Also, if we're sampling a relatively small table, we'll try to ensure that
1529  * we have an average of 5 features for each cell so the histogram isn't
1530  * so sparse.
1531  */
1532  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1533  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1534  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1535  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1536  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1537 
1538  /* If there's no useful features, we can't work out stats */
1539  if ( ! notnull_cnt )
1540  {
1541 #if POSTGIS_DEBUG_LEVEL > 0
1542  Oid relation_oid = stats->attr->attrelid;
1543  char *relation_name = get_rel_name(relation_oid);
1544  char *namespace = get_namespace_name(get_rel_namespace(relation_oid));
1545  elog(DEBUG1,
1546  "PostGIS: Unable to compute statistics for \"%s.%s.%s\": No non-null/empty features",
1547  namespace ? namespace : "(NULL)",
1548  relation_name ? relation_name : "(NULL)",
1549  stats->attr->attname.data);
1550 #endif /* POSTGIS_DEBUG_LEVEL > 0 */
1551  stats->stats_valid = false;
1552  return;
1553  }
1554 
1555  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1556 
1557  /*
1558  * Second scan:
1559  * o compute standard deviation
1560  */
1561  for ( d = 0; d < ndims; d++ )
1562  {
1563  /* Calculate average bounds values */
1564  avg.min[d] = sum.min[d] / notnull_cnt;
1565  avg.max[d] = sum.max[d] / notnull_cnt;
1566 
1567  /* Calculate standard deviation for this dimension bounds */
1568  for ( i = 0; i < notnull_cnt; i++ )
1569  {
1570  const ND_BOX *ndb = sample_boxes[i];
1571  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1572  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1573  }
1574  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1575  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1576 
1577  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1578  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1579  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1580  }
1581 
1582  /*
1583  * Third scan:
1584  * o skip hard deviants
1585  * o compute new histogram box
1586  */
1587  nd_box_init_bounds(&histo_extent_new);
1588  for ( i = 0; i < notnull_cnt; i++ )
1589  {
1590  const ND_BOX *ndb = sample_boxes[i];
1591  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1592  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1593  {
1594  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1595  sample_boxes[i] = NULL;
1596  continue;
1597  }
1598  /* Expand our new box to fit all the other features. */
1599  nd_box_merge(ndb, &histo_extent_new);
1600  }
1601  /*
1602  * Expand the box slightly (1%) to avoid edge effects
1603  * with objects that are on the boundary
1604  */
1605  nd_box_expand(&histo_extent_new, 0.01);
1606  histo_extent = histo_extent_new;
1607 
1608  /*
1609  * How should we allocate our histogram cells to the
1610  * different dimensions? We can't do it by raw dimensional width,
1611  * because in x/y/z space, the z can have different units
1612  * from the x/y. Similarly for x/y/t space.
1613  * So, we instead calculate how much features overlap
1614  * each other in their dimension to figure out which
1615  * dimensions have useful selectivity characteristics (more
1616  * variability in density) and therefor would find
1617  * more cells useful (to distinguish between dense places and
1618  * homogeneous places).
1619  */
1620  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1621  sample_distribution);
1622 
1623  /*
1624  * The sample_distribution array now tells us how spread out the
1625  * data is in each dimension, so we use that data to allocate
1626  * the histogram cells we have available.
1627  * At this point, histo_cells_target is the approximate target number
1628  * of cells.
1629  */
1630 
1631  /*
1632  * Some dimensions have basically a uniform distribution, we want
1633  * to allocate no cells to those dimensions, only to dimensions
1634  * that have some interesting differences in data distribution.
1635  * Here we count up the number of interesting dimensions
1636  */
1637  for ( d = 0; d < ndims; d++ )
1638  {
1639  if ( sample_distribution[d] > 0 )
1640  histo_ndims++;
1641  }
1642 
1643  if ( histo_ndims == 0 )
1644  {
1645  /* Special case: all our dimensions had low variability! */
1646  /* We just divide the cells up evenly */
1647  POSTGIS_DEBUG(3, " special case: no axes have variability");
1648  histo_cells_new = 1;
1649  for ( d = 0; d < ndims; d++ )
1650  {
1651  histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1652  if ( ! histo_size[d] )
1653  histo_size[d] = 1;
1654  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1655  histo_cells_new *= histo_size[d];
1656  }
1657  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1658  }
1659  else
1660  {
1661  /*
1662  * We're going to express the amount of variability in each dimension
1663  * as a proportion of the total variability and allocate cells in that
1664  * dimension relative to that proportion.
1665  */
1666  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1667  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1668  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1669  histo_cells_new = 1; /* For the number of cells in the final histogram */
1670  for ( d = 0; d < ndims; d++ )
1671  {
1672  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1673  {
1674  histo_size[d] = 1;
1675  }
1676  else /* Interesting dimension */
1677  {
1678  /* How does this dims variability compare to the total? */
1679  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1680  /*
1681  * Scale the target cells number by the # of dims and ratio,
1682  * then take the appropriate root to get the estimated number of cells
1683  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1684  */
1685  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1686  /* If something goes awry, just give this dim one slot */
1687  if ( ! histo_size[d] )
1688  histo_size[d] = 1;
1689  }
1690  histo_cells_new *= histo_size[d];
1691  }
1692  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1693  }
1694 
1695  /* Update histo_cells to the actual number of cells we need to allocate */
1696  histo_cells = histo_cells_new;
1697  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1698 
1699  /*
1700  * Create the histogram (ND_STATS) in the stats memory context
1701  */
1702  old_context = MemoryContextSwitchTo(stats->anl_context);
1703  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1704  nd_stats = palloc(nd_stats_size);
1705  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1706  MemoryContextSwitchTo(old_context);
1707 
1708  /* Initialize the #ND_STATS objects */
1709  nd_stats->ndims = ndims;
1710  nd_stats->extent = histo_extent;
1711  nd_stats->sample_features = sample_rows;
1712  nd_stats->table_features = total_rows;
1713  nd_stats->not_null_features = notnull_cnt;
1714  /* Copy in the histogram dimensions */
1715  for ( d = 0; d < ndims; d++ )
1716  nd_stats->size[d] = histo_size[d];
1717 
1718  /*
1719  * Fourth scan:
1720  * o fill histogram values with the proportion of
1721  * features' bbox overlaps: a feature's bvol
1722  * can fully overlap (1) or partially overlap
1723  * (fraction of 1) an histogram cell.
1724  *
1725  * Note that we are filling each cell with the "portion of
1726  * the feature's box that overlaps the cell". So, if we sum
1727  * up the values in the histogram, we could get the
1728  * histogram feature count.
1729  *
1730  */
1731  for ( i = 0; i < notnull_cnt; i++ )
1732  {
1733  const ND_BOX *nd_box;
1734  ND_IBOX nd_ibox;
1735  int at[ND_DIMS];
1736  int d;
1737  double num_cells = 0;
1738  double tmp_volume = 1.0;
1739  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1740  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1741  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1742 
1743  nd_box = sample_boxes[i];
1744  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1745 
1746  /* Give backend a chance of interrupting us */
1747  vacuum_delay_point();
1748 
1749  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1750  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1751  memset(at, 0, sizeof(int)*ND_DIMS);
1752 
1753  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1754  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1755  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1756 
1757  for ( d = 0; d < nd_stats->ndims; d++ )
1758  {
1759  /* Initialize the starting values */
1760  at[d] = nd_ibox.min[d];
1761  min[d] = nd_stats->extent.min[d];
1762  max[d] = nd_stats->extent.max[d];
1763  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1764 
1765  /* What's the volume (area) of this feature's box? */
1766  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1767  }
1768 
1769  /* Add feature volume (area) to our total */
1770  total_sample_volume = total_sample_volume + tmp_volume;
1771 
1772  /*
1773  * Move through all the overlaped histogram cells values and
1774  * add the box overlap proportion to them.
1775  */
1776  do
1777  {
1778  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1779  double ratio;
1780  /* Create a box for this histogram cell */
1781  for ( d = 0; d < nd_stats->ndims; d++ )
1782  {
1783  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1784  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1785  }
1786 
1787  /*
1788  * If a feature box is completely inside one cell the ratio will be
1789  * 1.0. If a feature box is 50% in two cells, each cell will get
1790  * 0.5 added on.
1791  */
1792  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1793  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1794  num_cells += ratio;
1795  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1796  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1797  }
1798  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1799 
1800  /* Keep track of overall number of overlaps counted */
1801  total_cell_count += num_cells;
1802  /* How many features have we added to this histogram? */
1803  histogram_features++;
1804  }
1805 
1806  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1807  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1808  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1809 
1810  /* Error out if we got no sample information */
1811  if ( ! histogram_features )
1812  {
1813  POSTGIS_DEBUG(3, " no stats have been gathered");
1814  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1815  stats->stats_valid = false;
1816  return;
1817  }
1818 
1819  nd_stats->histogram_features = histogram_features;
1820  nd_stats->histogram_cells = histo_cells;
1821  nd_stats->cells_covered = total_cell_count;
1822 
1823  /* Put this histogram data into the right slot/kind */
1824  if ( mode == 2 )
1825  {
1826  stats_slot = STATISTIC_SLOT_2D;
1827  stats_kind = STATISTIC_KIND_2D;
1828  }
1829  else
1830  {
1831  stats_slot = STATISTIC_SLOT_ND;
1832  stats_kind = STATISTIC_KIND_ND;
1833  }
1834 
1835  /* Write the statistics data */
1836  stats->stakind[stats_slot] = stats_kind;
1837  stats->staop[stats_slot] = InvalidOid;
1838  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1839  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1840  stats->stanullfrac = (float4)null_cnt/sample_rows;
1841  stats->stawidth = total_width/notnull_cnt;
1842  stats->stadistinct = -1.0;
1843  stats->stats_valid = true;
1844 
1845  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1846  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1847  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1848  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1849  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1850  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1851  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1852  /*
1853  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1854  */
1855 
1856  return;
1857 }
1858 
1859 
1877 static void
1878 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1879  int sample_rows, double total_rows)
1880 {
1881  GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
1882  /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
1883  stats->extra_data = extra_data->std_extra_data;
1884  extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
1885  stats->extra_data = extra_data;
1886 
1887  /* 2D Mode */
1888  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1889 
1890  if (stats->stats_valid)
1891  {
1892  /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1893  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1894  }
1895 }
1896 
1897 
1925 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1926 {
1927  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1928  GserializedAnalyzeExtraData *extra_data =
1930 
1931  /* Ask for standard analyze to fill in as much as possible */
1932  if (!std_typanalyze(stats))
1933  PG_RETURN_BOOL(false);
1934 
1935  /* Save old compute_stats and extra_data for scalar statistics ... */
1936  extra_data->std_compute_stats = stats->compute_stats;
1937  extra_data->std_extra_data = stats->extra_data;
1938  /* ... and replace with our info */
1939  stats->compute_stats = compute_gserialized_stats;
1940  stats->extra_data = extra_data;
1941 
1942  /* Indicate we are done successfully */
1943  PG_RETURN_BOOL(true);
1944 }
1945 
1958 static float8
1959 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1960 {
1961  int d; /* counter */
1962  float8 selectivity;
1963  ND_BOX nd_box;
1964  ND_IBOX nd_ibox;
1965  int at[ND_DIMS];
1966  double cell_size[ND_DIMS];
1967  double min[ND_DIMS];
1968  double max[ND_DIMS];
1969  double total_count = 0.0;
1970  int ndims_max;
1971 
1972  /* Calculate the overlap of the box on the histogram */
1973  if ( ! nd_stats )
1974  {
1975  elog(NOTICE, " estimate_selectivity called with null input");
1976  return FALLBACK_ND_SEL;
1977  }
1978 
1979  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1980 
1981  /* Initialize nd_box. */
1982  nd_box_from_gbox(box, &nd_box);
1983 
1984  /*
1985  * To return 2D stats on an ND sample, we need to make the
1986  * 2D box cover the full range of the other dimensions in the
1987  * histogram.
1988  */
1989  POSTGIS_DEBUGF(3, " mode: %d", mode);
1990  if ( mode == 2 )
1991  {
1992  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1993  ndims_max = 2;
1994  }
1995 
1996  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1997  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1998 
1999  /*
2000  * Search box completely misses histogram extent?
2001  * We have to intersect in all N dimensions or else we have
2002  * zero interaction under the &&& operator. It's important
2003  * to short circuit in this case, as some of the tests below
2004  * will return junk results when run on non-intersecting inputs.
2005  */
2006  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
2007  {
2008  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
2009  return 0.0;
2010  }
2011 
2012  /* Search box completely contains histogram extent! */
2013  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
2014  {
2015  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
2016  return 1.0;
2017  }
2018 
2019  /* Calculate the overlap of the box on the histogram */
2020  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2021  {
2022  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2023  return FALLBACK_ND_SEL;
2024  }
2025 
2026  /* Work out some measurements of the histogram */
2027  for ( d = 0; d < nd_stats->ndims; d++ )
2028  {
2029  /* Cell size in each dim */
2030  min[d] = nd_stats->extent.min[d];
2031  max[d] = nd_stats->extent.max[d];
2032  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2033  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2034 
2035  /* Initialize the counter */
2036  at[d] = nd_ibox.min[d];
2037  }
2038 
2039  /* Move through all the overlap values and sum them */
2040  do
2041  {
2042  float cell_count, ratio;
2043  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2044 
2045  /* We have to pro-rate partially overlapped cells. */
2046  for ( d = 0; d < nd_stats->ndims; d++ )
2047  {
2048  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2049  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2050  }
2051 
2052  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2053  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2054 
2055  /* Add the pro-rated count for this cell to the overall total */
2056  total_count += cell_count * ratio;
2057  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2058  }
2059  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2060 
2061  /* Scale by the number of features in our histogram to get the proportion */
2062  selectivity = total_count / nd_stats->histogram_features;
2063 
2064  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2065  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2066  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2067  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2068 
2069  /* Prevent rounding overflows */
2070  if (selectivity > 1.0) selectivity = 1.0;
2071  else if (selectivity < 0.0) selectivity = 0.0;
2072 
2073  return selectivity;
2074 }
2075 
2076 
2077 
2083 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2084 {
2085  Oid table_oid = PG_GETARG_OID(0);
2086  text *att_text = PG_GETARG_TEXT_P(1);
2087  ND_STATS *nd_stats;
2088  char *str;
2089  text *json;
2090  int mode = 2; /* default to 2D mode */
2091  bool only_parent = false; /* default to whole tree stats */
2092 
2093  /* Check if we've been asked to not use 2d mode */
2094  if ( ! PG_ARGISNULL(2) )
2095  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2096 
2097  /* Retrieve the stats object */
2098  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2099  if ( ! nd_stats )
2100  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2101 
2102  /* Convert to JSON */
2103  str = nd_stats_to_json(nd_stats);
2104  json = cstring_to_text(str);
2105  pfree(str);
2106  pfree(nd_stats);
2107  PG_RETURN_TEXT_P(json);
2108 }
2109 
2110 
2116 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2117 {
2118  Oid table_oid = PG_GETARG_OID(0);
2119  text *att_text = PG_GETARG_TEXT_P(1);
2120  Datum geom_datum = PG_GETARG_DATUM(2);
2121  GBOX gbox; /* search box read from gserialized datum */
2122  float8 selectivity = 0;
2123  ND_STATS *nd_stats;
2124  int mode = 2; /* 2D mode by default */
2125 
2126  /* Check if we've been asked to not use 2d mode */
2127  if ( ! PG_ARGISNULL(3) )
2128  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2129 
2130  /* Retrieve the stats object */
2131  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2132 
2133  if ( ! nd_stats )
2134  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2135 
2136  /* Calculate the gbox */
2137  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2138  elog(ERROR, "unable to calculate bounding box from geometry");
2139 
2140  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2141 
2142  /* Do the estimation */
2143  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2144 
2145  pfree(nd_stats);
2146  PG_RETURN_FLOAT8(selectivity);
2147 }
2148 
2149 
2155 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2156 {
2157  Oid table_oid1 = PG_GETARG_OID(0);
2158  text *att_text1 = PG_GETARG_TEXT_P(1);
2159  Oid table_oid2 = PG_GETARG_OID(2);
2160  text *att_text2 = PG_GETARG_TEXT_P(3);
2161  ND_STATS *nd_stats1, *nd_stats2;
2162  float8 selectivity = 0;
2163  int mode = 2; /* 2D mode by default */
2164 
2165 
2166  /* Retrieve the stats object */
2167  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2168  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2169 
2170  if ( ! nd_stats1 )
2171  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2172 
2173  if ( ! nd_stats2 )
2174  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2175 
2176  /* Check if we've been asked to not use 2d mode */
2177  if ( ! PG_ARGISNULL(4) )
2178  {
2179  text *modetxt = PG_GETARG_TEXT_P(4);
2180  char *modestr = text_to_cstring(modetxt);
2181  if ( modestr[0] == 'N' )
2182  mode = 0;
2183  }
2184 
2185  /* Do the estimation */
2186  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2187 
2188  pfree(nd_stats1);
2189  pfree(nd_stats2);
2190  PG_RETURN_FLOAT8(selectivity);
2191 }
2192 
2198 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2199 {
2200  PG_RETURN_DATUM(DirectFunctionCall5(
2202  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2203  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2204  Int32GetDatum(2) /* 2-D mode */
2205  ));
2206 }
2207 
2213 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2214 {
2215  PG_RETURN_DATUM(DirectFunctionCall5(
2217  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2218  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2219  Int32GetDatum(0) /* N-D mode */
2220  ));
2221 }
2222 
2223 
2238 float8
2239 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2240 {
2241  VariableStatData vardata;
2242  Node *other = NULL;
2243  bool varonleft;
2244  ND_STATS *nd_stats = NULL;
2245 
2246  GBOX search_box;
2247  float8 selectivity = 0;
2248  Const *otherConst;
2249 
2250  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2251 
2252  if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2253  {
2254  POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2255  return DEFAULT_ND_SEL;
2256  }
2257 
2258  if (!IsA(other, Const))
2259  {
2260  ReleaseVariableStats(vardata);
2261  POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2262  return DEFAULT_ND_SEL;
2263  }
2264 
2265  otherConst = (Const*)other;
2266  if ((!otherConst) || otherConst->constisnull)
2267  {
2268  ReleaseVariableStats(vardata);
2269  POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2270  return DEFAULT_ND_SEL;
2271  }
2272 
2273  if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2274  {
2275  ReleaseVariableStats(vardata);
2276  POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2277  return 0.0;
2278  }
2279 
2280  if (!vardata.statsTuple)
2281  {
2282  POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2283  return DEFAULT_ND_SEL;
2284  }
2285 
2286  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2287  ReleaseVariableStats(vardata);
2288  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2289  if (nd_stats) pfree(nd_stats);
2290  return selectivity;
2291 }
2292 
2294 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2295 {
2296  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2297  // Oid operator_oid = PG_GETARG_OID(1);
2298  List *args = (List *) PG_GETARG_POINTER(2);
2299  int varRelid = PG_GETARG_INT32(3);
2300  int mode = PG_GETARG_INT32(4);
2301  float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2302  POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2303  PG_RETURN_FLOAT8(selectivity);
2304 }
2305 
2306 
2307 
2314 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2315 {
2316  char *nsp = NULL;
2317  char *tbl = NULL;
2318  text *col = NULL;
2319  char *nsp_tbl = NULL;
2320  Oid tbl_oid, idx_oid = 0;
2321  ND_STATS *nd_stats;
2322  GBOX *gbox = NULL;
2323  bool only_parent = false;
2324  int key_type, att_num;
2325  size_t sz;
2326 
2327  /* We need to initialize the internal cache to access it later via postgis_oid() */
2328  postgis_initialize_cache();
2329 
2330  if ( PG_NARGS() == 4 )
2331  {
2332  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2333  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2334  col = PG_GETARG_TEXT_P(2);
2335  only_parent = PG_GETARG_BOOL(3);
2336  sz = strlen(nsp) + strlen(tbl) + 6;
2337  nsp_tbl = palloc(sz);
2338  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2339  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2340  pfree(nsp_tbl);
2341  }
2342  else if ( PG_NARGS() == 3 )
2343  {
2344  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2345  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2346  col = PG_GETARG_TEXT_P(2);
2347  sz = strlen(nsp) + strlen(tbl) + 6;
2348  nsp_tbl = palloc(sz);
2349  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2350  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2351  pfree(nsp_tbl);
2352  }
2353  else if ( PG_NARGS() == 2 )
2354  {
2355  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2356  col = PG_GETARG_TEXT_P(1);
2357  sz = strlen(tbl) + 3;
2358  nsp_tbl = palloc(sz);
2359  snprintf(nsp_tbl, sz, "\"%s\"", tbl);
2360  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2361  pfree(nsp_tbl);
2362  }
2363  else
2364  {
2365  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2366  PG_RETURN_NULL();
2367  }
2368 
2369  /* Read the extent from the head of the spatial index, if there is one */
2370 
2371  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2372  if (idx_oid)
2373  {
2374  /* TODO: how about only_parent ? */
2375  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2376  POSTGIS_DEBUGF(2, "index for \"%s.%s\" exists, reading gbox from there", tbl, text_to_cstring(col));
2377  if ( ! gbox ) PG_RETURN_NULL();
2378  }
2379  else
2380  {
2381  POSTGIS_DEBUGF(2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2382 
2383  /* Fall back to reading the stats, if no index is found */
2384 
2385  /* Estimated extent only returns 2D bounds, so use mode 2 */
2386  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2387 
2388  /* Error out on no stats */
2389  if ( ! nd_stats ) {
2390  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2391  PG_RETURN_NULL();
2392  }
2393 
2394  /* Construct the box */
2395  gbox = palloc(sizeof(GBOX));
2396  FLAGS_SET_GEODETIC(gbox->flags, 0);
2397  FLAGS_SET_Z(gbox->flags, 0);
2398  FLAGS_SET_M(gbox->flags, 0);
2399  gbox->xmin = nd_stats->extent.min[0];
2400  gbox->xmax = nd_stats->extent.max[0];
2401  gbox->ymin = nd_stats->extent.min[1];
2402  gbox->ymax = nd_stats->extent.max[1];
2403  pfree(nd_stats);
2404  }
2405 
2406  PG_RETURN_POINTER(gbox);
2407 }
2408 
2416 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2417 {
2418  if ( PG_NARGS() == 3 )
2419  {
2420  PG_RETURN_DATUM(
2421  DirectFunctionCall3(gserialized_estimated_extent,
2422  PG_GETARG_DATUM(0),
2423  PG_GETARG_DATUM(1),
2424  PG_GETARG_DATUM(2)));
2425  }
2426  else if ( PG_NARGS() == 2 )
2427  {
2428  PG_RETURN_DATUM(
2429  DirectFunctionCall2(gserialized_estimated_extent,
2430  PG_GETARG_DATUM(0),
2431  PG_GETARG_DATUM(1)));
2432  }
2433 
2434  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2435  PG_RETURN_NULL();
2436 }
2437 
2438 /************************************************************************/
2439 
2440 static Oid
2441 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
2442 {
2443  Relation tbl_rel;
2444  ListCell *lc;
2445  List *idx_list;
2446  Oid result = InvalidOid;
2447  char *colname = text_to_cstring(col);
2448 
2449  /* Lookup our spatial index key types */
2450  Oid b2d_oid = postgis_oid(BOX2DFOID);
2451  Oid gdx_oid = postgis_oid(BOX3DOID);
2452 
2453  if (!(b2d_oid && gdx_oid))
2454  return InvalidOid;
2455 
2456  tbl_rel = RelationIdGetRelation(tbl_oid);
2457  idx_list = RelationGetIndexList(tbl_rel);
2458  RelationClose(tbl_rel);
2459 
2460  /* For each index associated with this table... */
2461  foreach(lc, idx_list)
2462  {
2463  Form_pg_class idx_form;
2464  HeapTuple idx_tup;
2465  int idx_relam;
2466  Oid idx_oid = lfirst_oid(lc);
2467 
2468  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2469  if (!HeapTupleIsValid(idx_tup))
2470  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2471  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2472  idx_relam = idx_form->relam;
2473  ReleaseSysCache(idx_tup);
2474 
2475  /* Does the index use a GIST access method? */
2476  if (idx_relam == GIST_AM_OID)
2477  {
2478  Form_pg_attribute att;
2479  Oid atttypid;
2480  int attnum;
2481  /* Is the index on the column name we are looking for? */
2482  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2483  ObjectIdGetDatum(idx_oid),
2484  PointerGetDatum(colname));
2485  if (!HeapTupleIsValid(att_tup))
2486  continue;
2487 
2488  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2489  atttypid = att->atttypid;
2490  attnum = att->attnum;
2491  ReleaseSysCache(att_tup);
2492 
2493  /* Is the column actually spatial? */
2494  if (b2d_oid == atttypid || gdx_oid == atttypid)
2495  {
2496  /* Save result, clean up, and break out */
2497  result = idx_oid;
2498  if (att_num)
2499  *att_num = attnum;
2500  if (key_type)
2501  *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
2502  break;
2503  }
2504  }
2505  }
2506  return result;
2507 }
2508 
2509 static GBOX *
2510 spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
2511 {
2512  BOX2DF *bounds_2df = NULL;
2513  GIDX *bounds_gidx = NULL;
2514  GBOX *gbox = NULL;
2515  Relation idx_rel;
2516  Buffer buffer;
2517  Page page;
2518  OffsetNumber offset;
2519  unsigned long offset_max;
2520 
2521  if (!idx_oid)
2522  return NULL;
2523 
2524  idx_rel = index_open(idx_oid, AccessShareLock);
2525  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2526  page = (Page) BufferGetPage(buffer);
2527  offset = FirstOffsetNumber;
2528  offset_max = PageGetMaxOffsetNumber(page);
2529  while (offset <= offset_max)
2530  {
2531  ItemId iid = PageGetItemId(page, offset);
2532  IndexTuple ituple;
2533  if (!iid)
2534  {
2535  ReleaseBuffer(buffer);
2536  index_close(idx_rel, AccessShareLock);
2537  return NULL;
2538  }
2539  ituple = (IndexTuple) PageGetItem(page, iid);
2540  if (!GistTupleIsInvalid(ituple))
2541  {
2542  bool isnull;
2543  Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2544  if (!isnull)
2545  {
2546  if (key_type == STATISTIC_KIND_2D)
2547  {
2548  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2549  if (bounds_2df)
2550  box2df_merge(bounds_2df, b);
2551  else
2552  bounds_2df = box2df_copy(b);
2553  }
2554  else
2555  {
2556  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2557  if (bounds_gidx)
2558  gidx_merge(&bounds_gidx, b);
2559  else
2560  bounds_gidx = gidx_copy(b);
2561  }
2562  }
2563  }
2564  offset++;
2565  }
2566 
2567  ReleaseBuffer(buffer);
2568  index_close(idx_rel, AccessShareLock);
2569 
2570  if (key_type == STATISTIC_KIND_2D && bounds_2df)
2571  {
2572  if (box2df_is_empty(bounds_2df))
2573  return NULL;
2574  gbox = gbox_new(0);
2575  box2df_to_gbox_p(bounds_2df, gbox);
2576  }
2577  else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
2578  {
2579  if (gidx_is_unknown(bounds_gidx))
2580  return NULL;
2581  gbox = gbox_new(0);
2582  gbox_from_gidx(bounds_gidx, gbox, 0);
2583  }
2584  else
2585  return NULL;
2586 
2587  return gbox;
2588 }
2589 
2590 /*
2591 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2592  RETURNS box2d
2593  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2594  LANGUAGE 'c' STABLE STRICT;
2595 */
2596 
2598 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2599 {
2600  GBOX *gbox = NULL;
2601  int key_type;
2602  int att_num;
2603  Oid tbl_oid = PG_GETARG_DATUM(0);
2604  text *col = PG_GETARG_TEXT_P(1);
2605  Oid idx_oid;
2606 
2607  if(!tbl_oid)
2608  PG_RETURN_NULL();
2609 
2610  /* We need to initialize the internal cache to access it later via postgis_oid() */
2611  postgis_initialize_cache();
2612 
2613  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2614  if (!idx_oid)
2615  PG_RETURN_NULL();
2616 
2617  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2618  if (!gbox)
2619  PG_RETURN_NULL();
2620  else
2621  PG_RETURN_POINTER(gbox);
2622 }
2623 
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition: cu_print.c:267
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: gbox.c:32
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: gbox.c:392
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:110
#define FLAGS_GET_Z(flags)
Definition: liblwgeom.h:179
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:180
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:189
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:187
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:186
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:182
This library is the generic geometry handling section of PostGIS.
#define str(s)
args
Definition: ovdump.py:45
Datum buffer(PG_FUNCTION_ARGS)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:230
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:76
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:124
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:103
double ymax
Definition: liblwgeom.h:371
double zmax
Definition: liblwgeom.h:373
double xmax
Definition: liblwgeom.h:369
double zmin
Definition: liblwgeom.h:372
double mmax
Definition: liblwgeom.h:375
double ymin
Definition: liblwgeom.h:370
double xmin
Definition: liblwgeom.h:368
double mmin
Definition: liblwgeom.h:374
lwflags_t flags
Definition: liblwgeom.h:367
AnalyzeAttrComputeStatsFunc std_compute_stats
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.