PostGIS  3.1.6dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #if PG_VERSION_NUM < 130000
70 #include "access/tuptoaster.h" /* For toast_raw_datum_size */
71 #else
72 #include "access/detoast.h" /* For toast_raw_datum_size */
73 #endif
74 #include "utils/datum.h"
75 #include "access/heapam.h"
76 #include "catalog/index.h"
77 #include "catalog/pg_am.h"
78 #include "miscadmin.h"
79 #include "storage/lmgr.h"
80 #include "catalog/namespace.h"
81 #include "catalog/indexing.h"
82 #if PG_VERSION_NUM >= 100000
83 #include "utils/regproc.h"
84 #include "utils/varlena.h"
85 #endif
86 #include "utils/builtins.h"
87 #include "utils/datum.h"
88 #include "utils/snapmgr.h"
89 #include "utils/fmgroids.h"
90 #include "funcapi.h"
91 #include "access/heapam.h"
92 #include "catalog/pg_type.h"
93 #include "access/relscan.h"
94 
95 #include "executor/spi.h"
96 #include "fmgr.h"
97 #include "commands/vacuum.h"
98 #if PG_VERSION_NUM < 120000
99 #include "nodes/relation.h"
100 #else
101 #include "nodes/pathnodes.h"
102 #endif
103 #include "parser/parsetree.h"
104 #include "utils/array.h"
105 #include "utils/lsyscache.h"
106 #include "utils/builtins.h"
107 #include "utils/syscache.h"
108 #include "utils/rel.h"
109 #include "utils/selfuncs.h"
110 
111 #include "../postgis_config.h"
112 
113 #include "access/htup_details.h"
114 
115 #include "stringbuffer.h"
116 #include "liblwgeom.h"
117 #include "lwgeom_pg.h" /* For debugging macros. */
118 #include "gserialized_gist.h" /* For index common functions */
119 
120 #include <math.h>
121 #if HAVE_IEEEFP_H
122 #include <ieeefp.h>
123 #endif
124 #include <float.h>
125 #include <string.h>
126 #include <stdio.h>
127 #include <ctype.h>
128 
129 
130 /************************************************************************/
131 
132 
133 /* Prototypes */
134 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
135 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
136 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
137 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
138 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
139 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
140 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
141 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
142 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
143 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
144 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
145 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
146 
147 /* Local prototypes */
148 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num);
149 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num);
150 
151 
152 /* Other prototypes */
153 float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
154 float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161  * Assign a number to the n-dimensional statistics kind
162  *
163  * tgl suggested:
164  *
165  * 1-100: reserved for assignment by the core Postgres project
166  * 100-199: reserved for assignment by PostGIS
167  * 200-9999: reserved for other globally-known stats kinds
168  * 10000-32767: reserved for private site-local use
169  */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 
173 /*
174  * Postgres does not pin its slots and uses them as they come.
175  * We need to preserve its Correlation for brin to work
176  * 0 may be MCV
177  * 1 may be Histogram
178  * 2 may be Correlation
179  * We take 3 and 4.
180  */
181 #define STATISTIC_SLOT_ND 3
182 #define STATISTIC_SLOT_2D 4
183 
184 /*
185 * The SD factor restricts the side of the statistics histogram
186 * based on the standard deviation of the extent of the data.
187 * SDFACTOR is the number of standard deviations from the mean
188 * the histogram will extend.
189 */
190 #define SDFACTOR 3.25
191 
197 #define ND_DIMS 4
198 
205 #define MIN_DIMENSION_WIDTH 0.000000001
206 
211 #define MAX_DIMENSION_WIDTH 1.0E+20
212 
216 #define DEFAULT_ND_SEL 0.0001
217 #define DEFAULT_ND_JOINSEL 0.001
218 
222 #define FALLBACK_ND_SEL 0.2
223 #define FALLBACK_ND_JOINSEL 0.3
224 
230 typedef struct ND_BOX_T
231 {
232  float4 min[ND_DIMS];
233  float4 max[ND_DIMS];
235 
239 typedef struct ND_IBOX_T
240 {
241  int min[ND_DIMS];
242  int max[ND_DIMS];
244 
245 
252 typedef struct ND_STATS_T
253 {
254  /* Dimensionality of the histogram. */
255  float4 ndims;
256 
257  /* Size of n-d histogram in each dimension. */
258  float4 size[ND_DIMS];
259 
260  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
262 
263  /* How many rows in the table itself? */
265 
266  /* How many rows were in the sample that built this histogram? */
268 
269  /* How many not-Null/Empty features were in the sample? */
271 
272  /* How many features actually got sampled in the histogram? */
274 
275  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
277 
278  /* How many cells did those histogram features cover? */
279  /* Since we are pro-rating coverage, this number should */
280  /* now always equal histogram_features */
282 
283  /* Variable length # of floats for histogram */
284  float4 value[1];
286 
287 typedef struct {
288  /* Saved state from std_typanalyze() */
289  AnalyzeAttrComputeStatsFunc std_compute_stats;
292 
299 static int
300 gbox_ndims(const GBOX* gbox)
301 {
302  int dims = 2;
303  if ( FLAGS_GET_GEODETIC(gbox->flags) )
304  return 3;
305  if ( FLAGS_GET_Z(gbox->flags) )
306  dims++;
307  if ( FLAGS_GET_M(gbox->flags) )
308  dims++;
309  return dims;
310 }
311 
317 static int
318 text_p_get_mode(const text *txt)
319 {
320  int mode = 2;
321  char *modestr;
322  if (VARSIZE_ANY_EXHDR(txt) <= 0)
323  return mode;
324  modestr = (char*)VARDATA(txt);
325  if ( modestr[0] == 'N' )
326  mode = 0;
327  return mode;
328 }
329 
330 
334 static int
335 cmp_int (const void *a, const void *b)
336 {
337  int ia = *((const int*)a);
338  int ib = *((const int*)b);
339 
340  if ( ia == ib )
341  return 0;
342  else if ( ia > ib )
343  return 1;
344  else
345  return -1;
346 }
347 
352 static int
353 range_quintile(int *vals, int nvals)
354 {
355  qsort(vals, nvals, sizeof(int), cmp_int);
356  return vals[4*nvals/5] - vals[nvals/5];
357 }
358 
362 static double
363 total_double(const double *vals, int nvals)
364 {
365  int i;
366  float total = 0;
367  /* Calculate total */
368  for ( i = 0; i < nvals; i++ )
369  total += vals[i];
370 
371  return total;
372 }
373 
374 #if POSTGIS_DEBUG_LEVEL >= 3
375 
379 static int
380 total_int(const int *vals, int nvals)
381 {
382  int i;
383  int total = 0;
384  /* Calculate total */
385  for ( i = 0; i < nvals; i++ )
386  total += vals[i];
387 
388  return total;
389 }
390 
394 static double
395 avg(const int *vals, int nvals)
396 {
397  int t = total_int(vals, nvals);
398  return (double)t / (double)nvals;
399 }
400 
404 static double
405 stddev(const int *vals, int nvals)
406 {
407  int i;
408  double sigma2 = 0;
409  double mean = avg(vals, nvals);
410 
411  /* Calculate sigma2 */
412  for ( i = 0; i < nvals; i++ )
413  {
414  double v = (double)(vals[i]);
415  sigma2 += (mean - v) * (mean - v);
416  }
417  return sqrt(sigma2 / nvals);
418 }
419 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
420 
425 static int
426 nd_stats_value_index(const ND_STATS *stats, int *indexes)
427 {
428  int d;
429  int accum = 1, vdx = 0;
430 
431  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
432  /* n-d histogram coordinate implies. */
433  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
434  for ( d = 0; d < (int)(stats->ndims); d++ )
435  {
436  int size = (int)(stats->size[d]);
437  if ( indexes[d] < 0 || indexes[d] >= size )
438  {
439  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
440  return -1;
441  }
442  vdx += indexes[d] * accum;
443  accum *= size;
444  }
445  return vdx;
446 }
447 
451 static char*
452 nd_box_to_json(const ND_BOX *nd_box, int ndims)
453 {
454  char *rv;
455  int i;
457 
458  stringbuffer_append(sb, "{\"min\":[");
459  for ( i = 0; i < ndims; i++ )
460  {
461  if ( i ) stringbuffer_append(sb, ",");
462  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
463  }
464  stringbuffer_append(sb, "],\"max\":[");
465  for ( i = 0; i < ndims; i++ )
466  {
467  if ( i ) stringbuffer_append(sb, ",");
468  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
469  }
470  stringbuffer_append(sb, "]}");
471 
474  return rv;
475 }
476 
477 
482 static char*
483 nd_stats_to_json(const ND_STATS *nd_stats)
484 {
485  char *json_extent, *str;
486  int d;
488  int ndims = (int)roundf(nd_stats->ndims);
489 
490  stringbuffer_append(sb, "{");
491  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
492 
493  /* Size */
494  stringbuffer_append(sb, "\"size\":[");
495  for ( d = 0; d < ndims; d++ )
496  {
497  if ( d ) stringbuffer_append(sb, ",");
498  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
499  }
500  stringbuffer_append(sb, "],");
501 
502  /* Extent */
503  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
504  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
505  pfree(json_extent);
506 
507  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
508  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
509  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
510  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
511  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
512  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
513  stringbuffer_append(sb, "}");
514 
517  return str;
518 }
519 
520 
526 // static char*
527 // nd_stats_to_grid(const ND_STATS *stats)
528 // {
529 // char *rv;
530 // int j, k;
531 // int sizex = (int)roundf(stats->size[0]);
532 // int sizey = (int)roundf(stats->size[1]);
533 // stringbuffer_t *sb = stringbuffer_create();
534 //
535 // for ( k = 0; k < sizey; k++ )
536 // {
537 // for ( j = 0; j < sizex; j++ )
538 // {
539 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
540 // }
541 // stringbuffer_append(sb, "\n");
542 // }
543 //
544 // rv = stringbuffer_getstringcopy(sb);
545 // stringbuffer_destroy(sb);
546 // return rv;
547 // }
548 
549 
551 static int
552 nd_box_merge(const ND_BOX *source, ND_BOX *target)
553 {
554  int d;
555  for ( d = 0; d < ND_DIMS; d++ )
556  {
557  target->min[d] = Min(target->min[d], source->min[d]);
558  target->max[d] = Max(target->max[d], source->max[d]);
559  }
560  return true;
561 }
562 
564 static int
566 {
567  memset(a, 0, sizeof(ND_BOX));
568  return true;
569 }
570 
576 static int
578 {
579  int d;
580  for ( d = 0; d < ND_DIMS; d++ )
581  {
582  a->min[d] = FLT_MAX;
583  a->max[d] = -1 * FLT_MAX;
584  }
585  return true;
586 }
587 
589 static void
590 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
591 {
592  int d = 0;
593  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
594 
595  nd_box_init(nd_box);
596  nd_box->min[d] = gbox->xmin;
597  nd_box->max[d] = gbox->xmax;
598  d++;
599  nd_box->min[d] = gbox->ymin;
600  nd_box->max[d] = gbox->ymax;
601  d++;
602  if ( FLAGS_GET_GEODETIC(gbox->flags) )
603  {
604  nd_box->min[d] = gbox->zmin;
605  nd_box->max[d] = gbox->zmax;
606  return;
607  }
608  if ( FLAGS_GET_Z(gbox->flags) )
609  {
610  nd_box->min[d] = gbox->zmin;
611  nd_box->max[d] = gbox->zmax;
612  d++;
613  }
614  if ( FLAGS_GET_M(gbox->flags) )
615  {
616  nd_box->min[d] = gbox->mmin;
617  nd_box->max[d] = gbox->mmax;
618  d++;
619  }
620  return;
621 }
622 
626 static int
627 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
628 {
629  int d;
630  for ( d = 0; d < ndims; d++ )
631  {
632  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
633  return false;
634  }
635  return true;
636 }
637 
641 static int
642 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
643 {
644  int d;
645  for ( d = 0; d < ndims; d++ )
646  {
647  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
648  return false;
649  }
650  return true;
651 }
652 
657 static int
658 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
659 {
660  int d;
661  double size;
662  for ( d = 0; d < ND_DIMS; d++ )
663  {
664  size = nd_box->max[d] - nd_box->min[d];
665  /* Avoid expanding boxes that are either too wide or too narrow*/
666  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
667  continue;
668  nd_box->min[d] -= size * expansion_factor / 2;
669  nd_box->max[d] += size * expansion_factor / 2;
670  }
671  return true;
672 }
673 
678 static inline int
679 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
680 {
681  int d;
682 
683  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
684 
685  /* Initialize ibox */
686  memset(nd_ibox, 0, sizeof(ND_IBOX));
687 
688  /* In each dimension... */
689  for ( d = 0; d < nd_stats->ndims; d++ )
690  {
691  double smin = nd_stats->extent.min[d];
692  double smax = nd_stats->extent.max[d];
693  double width = smax - smin;
694 
695  if (width < MIN_DIMENSION_WIDTH)
696  {
697  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
698  }
699  else
700  {
701  int size = (int)roundf(nd_stats->size[d]);
702 
703  /* ... find cells the box overlaps with in this dimension */
704  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
705  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
706 
707  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
708  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
709 
710  /* Push any out-of range values into range */
711  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
712  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
713  }
714  }
715  return true;
716 }
717 
721 static inline double
722 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
723 {
724  int d;
725  bool covered = true;
726  double ivol = 1.0;
727  double vol2 = 1.0;
728  double vol1 = 1.0;
729 
730  (void)vol1;
731 
732  for ( d = 0 ; d < ndims; d++ )
733  {
734  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
735  return 0.0; /* Disjoint */
736 
737  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
738  covered = false;
739  }
740 
741  if ( covered )
742  return 1.0;
743 
744  for ( d = 0; d < ndims; d++ )
745  {
746  double width1 = b1->max[d] - b1->min[d];
747  double width2 = b2->max[d] - b2->min[d];
748  double imin, imax, iwidth;
749 
750  vol1 *= width1;
751  vol2 *= width2;
752 
753  imin = Max(b1->min[d], b2->min[d]);
754  imax = Min(b1->max[d], b2->max[d]);
755  iwidth = imax - imin;
756  iwidth = Max(0.0, iwidth);
757 
758  ivol *= iwidth;
759  }
760 
761  if ( vol2 == 0.0 )
762  return vol2;
763 
764  return ivol / vol2;
765 }
766 
767 /* How many bins shall we use in figuring out the distribution? */
768 #define NUM_BINS 50
769 
785 static int
786 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
787 {
788  int d, i, k, range;
789  int counts[NUM_BINS];
790  double smin, smax; /* Spatial min, spatial max */
791  double swidth; /* Spatial width of dimension */
792 #if POSTGIS_DEBUG_LEVEL >= 3
793  double average, sdev, sdev_ratio;
794 #endif
795  int bmin, bmax; /* Bin min, bin max */
796  const ND_BOX *ndb;
797 
798  /* For each dimension... */
799  for ( d = 0; d < ndims; d++ )
800  {
801  /* Initialize counts for this dimension */
802  memset(counts, 0, sizeof(counts));
803 
804  smin = extent->min[d];
805  smax = extent->max[d];
806  swidth = smax - smin;
807 
808  /* Don't try and calculate distribution of overly narrow */
809  /* or overly wide dimensions. Here we're being pretty geographical, */
810  /* expecting "normal" planar or geographic coordinates. */
811  /* Otherwise we have to "handle" +/- Inf bounded features and */
812  /* the assumptions needed for that are as bad as this hack. */
813  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
814  {
815  distribution[d] = 0;
816  continue;
817  }
818 
819  /* Sum up the overlaps of each feature with the dimensional bins */
820  for ( i = 0; i < num_boxes; i++ )
821  {
822  double minoffset, maxoffset;
823 
824  /* Skip null entries */
825  ndb = nd_boxes[i];
826  if ( ! ndb ) continue;
827 
828  /* Where does box fall relative to the working range */
829  minoffset = ndb->min[d] - smin;
830  maxoffset = ndb->max[d] - smin;
831 
832  /* Skip boxes that our outside our working range */
833  if ( minoffset < 0 || minoffset > swidth ||
834  maxoffset < 0 || maxoffset > swidth )
835  {
836  continue;
837  }
838 
839  /* What bins does this range correspond to? */
840  bmin = floor(NUM_BINS * minoffset / swidth);
841  bmax = floor(NUM_BINS * maxoffset / swidth);
842 
843  /* Should only happen when maxoffset==swidth */
844  if (bmax >= NUM_BINS)
845  bmax = NUM_BINS-1;
846 
847  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
848 
849  /* Increment the counts in all the bins this feature overlaps */
850  for ( k = bmin; k <= bmax; k++ )
851  {
852  counts[k] += 1;
853  }
854 
855  }
856 
857  /* How dispersed is the distribution of features across bins? */
858  range = range_quintile(counts, NUM_BINS);
859 
860 #if POSTGIS_DEBUG_LEVEL >= 3
861  average = avg(counts, NUM_BINS);
862  sdev = stddev(counts, NUM_BINS);
863  sdev_ratio = sdev/average;
864 
865  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
866  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
867  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
868  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
869 #endif
870 
871  distribution[d] = range;
872  }
873 
874  return true;
875 }
876 
882 static inline int
883 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
884 {
885  int d = 0;
886 
887  while ( d < ndims )
888  {
889  if ( counter[d] < ibox->max[d] )
890  {
891  counter[d] += 1;
892  break;
893  }
894  counter[d] = ibox->min[d];
895  d++;
896  }
897  /* That's it, cannot increment any more! */
898  if ( d == ndims )
899  return false;
900 
901  /* Increment complete! */
902  return true;
903 }
904 
905 static ND_STATS*
906 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
907 {
908  int stats_kind = STATISTIC_KIND_ND;
909  int rv;
910  ND_STATS *nd_stats;
911 
912  /* If we're in 2D mode, set the kind appropriately */
913  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
914 
915  /* Then read the geom status histogram from that */
916 
917 #if POSTGIS_PGSQL_VERSION < 100
918  {
919  float4 *floatptr;
920  int nvalues;
921 
922  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
923  NULL, NULL, NULL, &floatptr, &nvalues);
924 
925  if ( ! rv ) {
926  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
927  return NULL;
928  }
929 
930  /* Clone the stats here so we can release the attstatsslot immediately */
931  nd_stats = palloc(sizeof(float) * nvalues);
932  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
933 
934  /* Clean up */
935  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
936  }
937 #else /* PostgreSQL 10 or higher */
938  {
939  AttStatsSlot sslot;
940  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
941  ATTSTATSSLOT_NUMBERS);
942  if ( ! rv ) {
943  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
944  return NULL;
945  }
946 
947  /* Clone the stats here so we can release the attstatsslot immediately */
948  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
949  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
950 
951  free_attstatsslot(&sslot);
952  }
953 #endif
954 
955  return nd_stats;
956 }
957 
962 static ND_STATS*
963 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
964 {
965  HeapTuple stats_tuple = NULL;
966  ND_STATS *nd_stats;
967 
968  /* First pull the stats tuple for the whole tree */
969  if ( ! only_parent )
970  {
971  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
972  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
973  if ( stats_tuple )
974  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
975  }
976  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
977  if ( only_parent || ! stats_tuple )
978  {
979  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
980  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
981  if ( stats_tuple )
982  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
983  }
984  if ( ! stats_tuple )
985  {
986  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
987  return NULL;
988  }
989 
990  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
991  ReleaseSysCache(stats_tuple);
992  if ( ! nd_stats )
993  {
994  POSTGIS_DEBUGF(2,
995  "histogram for attribute %d of table \"%s\" does not exist?",
996  att_num, get_rel_name(table_oid));
997  }
998 
999  return nd_stats;
1000 }
1001 
1010 static ND_STATS*
1011 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
1012 {
1013  const char *att_name = text_to_cstring(att_text);
1014  AttrNumber att_num;
1015 
1016  /* We know the name? Look up the num */
1017  if ( att_text )
1018  {
1019  /* Get the attribute number */
1020  att_num = get_attnum(table_oid, att_name);
1021  if ( ! att_num ) {
1022  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1023  return NULL;
1024  }
1025  }
1026  else
1027  {
1028  elog(ERROR, "attribute name is null");
1029  return NULL;
1030  }
1031 
1032  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1033 }
1034 
1048 static float8
1050 {
1051  int ncells1, ncells2;
1052  int ndims1, ndims2, ndims;
1053  double ntuples_max;
1054  double ntuples_not_null1, ntuples_not_null2;
1055 
1056  ND_BOX extent1, extent2;
1057  ND_IBOX ibox1, ibox2;
1058  int at1[ND_DIMS];
1059  int at2[ND_DIMS];
1060  double min1[ND_DIMS];
1061  double width1[ND_DIMS];
1062  double cellsize1[ND_DIMS];
1063  int size2[ND_DIMS];
1064  double min2[ND_DIMS];
1065  double width2[ND_DIMS];
1066  double cellsize2[ND_DIMS];
1067  int size1[ND_DIMS];
1068  int d;
1069  double val = 0;
1070  float8 selectivity;
1071 
1072  /* Drop out on null inputs */
1073  if ( ! ( s1 && s2 ) )
1074  {
1075  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1076  return FALLBACK_ND_SEL;
1077  }
1078 
1079  /* We need to know how many cells each side has... */
1080  ncells1 = (int)roundf(s1->histogram_cells);
1081  ncells2 = (int)roundf(s2->histogram_cells);
1082 
1083  /* ...so that we can drive the summation loop with the smaller histogram. */
1084  if ( ncells1 > ncells2 )
1085  {
1086  const ND_STATS *stats_tmp = s1;
1087  s1 = s2;
1088  s2 = stats_tmp;
1089  }
1090 
1091  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1092  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1093 
1094  /* Re-read that info after the swap */
1095  ncells1 = (int)roundf(s1->histogram_cells);
1096  ncells2 = (int)roundf(s2->histogram_cells);
1097 
1098  /* Q: What's the largest possible join size these relations can create? */
1099  /* A: The product of the # of non-null rows in each relation. */
1100  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1101  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1102  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1103 
1104  /* Get the ndims as ints */
1105  ndims1 = (int)roundf(s1->ndims);
1106  ndims2 = (int)roundf(s2->ndims);
1107  ndims = Max(ndims1, ndims2);
1108 
1109  /* Get the extents */
1110  extent1 = s1->extent;
1111  extent2 = s2->extent;
1112 
1113  /* If relation stats do not intersect, join is very very selective. */
1114  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1115  {
1116  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1117  PG_RETURN_FLOAT8(0.0);
1118  }
1119 
1120  /*
1121  * First find the index range of the part of the smaller
1122  * histogram that overlaps the larger one.
1123  */
1124  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1125  {
1126  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1127  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1128  }
1129 
1130  /* Initialize counters / constants on s1 */
1131  for ( d = 0; d < ndims1; d++ )
1132  {
1133  at1[d] = ibox1.min[d];
1134  min1[d] = s1->extent.min[d];
1135  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1136  size1[d] = (int)roundf(s1->size[d]);
1137  cellsize1[d] = width1[d] / size1[d];
1138  }
1139 
1140  /* Initialize counters / constants on s2 */
1141  for ( d = 0; d < ndims2; d++ )
1142  {
1143  min2[d] = s2->extent.min[d];
1144  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1145  size2[d] = (int)roundf(s2->size[d]);
1146  cellsize2[d] = width2[d] / size2[d];
1147  }
1148 
1149  /* For each affected cell of s1... */
1150  do
1151  {
1152  double val1;
1153  /* Construct the bounds of this cell */
1154  ND_BOX nd_cell1;
1155  nd_box_init(&nd_cell1);
1156  for ( d = 0; d < ndims1; d++ )
1157  {
1158  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1159  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1160  }
1161 
1162  /* Find the cells of s2 that cell1 overlaps.. */
1163  nd_box_overlap(s2, &nd_cell1, &ibox2);
1164 
1165  /* Initialize counter */
1166  for ( d = 0; d < ndims2; d++ )
1167  {
1168  at2[d] = ibox2.min[d];
1169  }
1170 
1171  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1172 
1173  /* Get the value at this cell */
1174  val1 = s1->value[nd_stats_value_index(s1, at1)];
1175 
1176  /* For each overlapped cell of s2... */
1177  do
1178  {
1179  double ratio2;
1180  double val2;
1181 
1182  /* Construct the bounds of this cell */
1183  ND_BOX nd_cell2;
1184  nd_box_init(&nd_cell2);
1185  for ( d = 0; d < ndims2; d++ )
1186  {
1187  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1188  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1189  }
1190 
1191  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1192 
1193  /* Calculate overlap ratio of the cells */
1194  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1195 
1196  /* Multiply the cell counts, scaled by overlap ratio */
1197  val2 = s2->value[nd_stats_value_index(s2, at2)];
1198  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1199  val += val1 * (val2 * ratio2);
1200  }
1201  while ( nd_increment(&ibox2, ndims2, at2) );
1202 
1203  }
1204  while( nd_increment(&ibox1, ndims1, at1) );
1205 
1206  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1207 
1208  /*
1209  * In order to compare our total cell count "val" to the
1210  * ntuples_max, we need to scale val up to reflect a full
1211  * table estimate. So, multiply by ratio of table size to
1212  * sample size.
1213  */
1214  val *= (s1->table_features / s1->sample_features);
1215  val *= (s2->table_features / s2->sample_features);
1216 
1217  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1218 
1219  /*
1220  * Because the cell counts are over-determined due to
1221  * double counting of features that overlap multiple cells
1222  * (see the compute_gserialized_stats routine)
1223  * we also have to scale our cell count "val" *down*
1224  * to adjust for the double counting.
1225  */
1226 // val /= (s1->cells_covered / s1->histogram_features);
1227 // val /= (s2->cells_covered / s2->histogram_features);
1228 
1229  /*
1230  * Finally, the selectivity is the estimated number of
1231  * rows to be returned divided by the maximum possible
1232  * number of rows that can be returned.
1233  */
1234  selectivity = val / ntuples_max;
1235 
1236  /* Guard against over-estimates and crazy numbers :) */
1237  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1238  {
1239  selectivity = DEFAULT_ND_JOINSEL;
1240  }
1241  else if ( selectivity > 1.0 )
1242  {
1243  selectivity = 1.0;
1244  }
1245 
1246  return selectivity;
1247 }
1248 
1254 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1255 {
1256  PG_RETURN_DATUM(DirectFunctionCall5(
1258  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1259  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1260  Int32GetDatum(0) /* ND mode */
1261  ));
1262 }
1263 
1269 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1270 {
1271  PG_RETURN_DATUM(DirectFunctionCall5(
1273  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1274  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1275  Int32GetDatum(2) /* 2D mode */
1276  ));
1277 }
1278 
1279 double
1280 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1281 {
1282  float8 selectivity;
1283  Oid relid1, relid2;
1284  ND_STATS *stats1, *stats2;
1285  Node *arg1 = (Node*) linitial(args);
1286  Node *arg2 = (Node*) lsecond(args);
1287  Var *var1 = (Var*) arg1;
1288  Var *var2 = (Var*) arg2;
1289 
1290  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1291 
1292  /* We only do column joins right now, no functional joins */
1293  /* TODO: handle g1 && ST_Expand(g2) */
1294  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1295  {
1296  POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1297  return DEFAULT_ND_JOINSEL;
1298  }
1299 
1300  /* What are the Oids of our tables/relations? */
1301  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1302  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1303 
1304  /* Pull the stats from the stats system. */
1305  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1306  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1307 
1308  /* If we can't get stats, we have to stop here! */
1309  if (!stats1)
1310  {
1311  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1312  return DEFAULT_ND_JOINSEL;
1313  }
1314  else if (!stats2)
1315  {
1316  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1317  return DEFAULT_ND_JOINSEL;
1318  }
1319 
1320  selectivity = estimate_join_selectivity(stats1, stats2);
1321  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1322  pfree(stats1);
1323  pfree(stats2);
1324  return selectivity;
1325 }
1326 
1336 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1337 {
1338  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1339  /* Oid operator = PG_GETARG_OID(1); */
1340  List *args = (List *) PG_GETARG_POINTER(2);
1341  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1342  int mode = PG_GETARG_INT32(4);
1343 
1344  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1345 
1346  /* Check length of args and punt on > 2 */
1347  if (list_length(args) != 2)
1348  {
1349  POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1350  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1351  }
1352 
1353  /* Only respond to an inner join/unknown context join */
1354  if (jointype != JOIN_INNER)
1355  {
1356  POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1357  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1358  }
1359 
1360  PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1361 }
1362 
1381 static void
1382 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1383  int sample_rows, double total_rows, int mode)
1384 {
1385  MemoryContext old_context;
1386  int d, i; /* Counters */
1387  int notnull_cnt = 0; /* # not null rows in the sample */
1388  int null_cnt = 0; /* # null rows in the sample */
1389  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1390 
1391  ND_STATS *nd_stats; /* Our histogram */
1392  size_t nd_stats_size; /* Size to allocate */
1393 
1394  double total_width = 0; /* # of bytes used by sample */
1395  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1396  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1397 
1398  ND_BOX sum; /* Sum of extents of sample boxes */
1399  ND_BOX avg; /* Avg of extents of sample boxes */
1400  ND_BOX stddev; /* StdDev of extents of sample boxes */
1401 
1402  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1403  ND_BOX sample_extent; /* Extent of the raw sample */
1404  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1405  ND_BOX histo_extent; /* Spatial extent of the histogram */
1406  ND_BOX histo_extent_new; /* Temporary variable */
1407  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1408  int histo_cells; /* Number of cells in the histogram */
1409  int histo_cells_new = 1; /* Temporary variable */
1410 
1411  int ndims = 2; /* Dimensionality of the sample */
1412  int histo_ndims = 0; /* Dimensionality of the histogram */
1413  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1414  double total_distribution; /* Total of sample_distribution */
1415 
1416  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1417  int stats_kind; /* And this is what? (2D vs ND) */
1418 
1419  (void)total_sample_volume;
1420 
1421  /* Initialize sum and stddev */
1422  nd_box_init(&sum);
1423  nd_box_init(&stddev);
1424  nd_box_init(&avg);
1425  nd_box_init(&histo_extent);
1426  nd_box_init(&histo_extent_new);
1427 
1428  /*
1429  * This is where gserialized_analyze_nd
1430  * should put its' custom parameters.
1431  */
1432  /* void *mystats = stats->extra_data; */
1433 
1434  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1435  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1436  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1437 
1438  /*
1439  * We might need less space, but don't think
1440  * its worth saving...
1441  */
1442  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1443 
1444  /*
1445  * First scan:
1446  * o read boxes
1447  * o find dimensionality of the sample
1448  * o find extent of the sample
1449  * o count null-infinite/not-null values
1450  * o compute total_width
1451  * o compute total features's box area (for avgFeatureArea)
1452  * o sum features box coordinates (for standard deviation)
1453  */
1454  for ( i = 0; i < sample_rows; i++ )
1455  {
1456  Datum datum;
1457  GBOX gbox = {0};
1458  ND_BOX *nd_box;
1459  bool is_null;
1460 
1461  datum = fetchfunc(stats, i, &is_null);
1462 
1463  /* Skip all NULLs. */
1464  if ( is_null )
1465  {
1466  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1467  null_cnt++;
1468  continue;
1469  }
1470 
1471  /* Read the bounds from the gserialized. */
1472  if (LW_FAILURE == gserialized_datum_get_gbox_p(datum, &gbox))
1473  {
1474  /* Skip empties too. */
1475  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1476  continue;
1477  }
1478 
1479  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1480  if ( mode == 2 )
1481  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1482 
1483  /* Check bounds for validity (finite and not NaN) */
1484  if ( ! gbox_is_valid(&gbox) )
1485  {
1486  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1487  continue;
1488  }
1489 
1490  /*
1491  * In N-D mode, set the ndims to the maximum dimensionality found
1492  * in the sample. Otherwise, leave at ndims == 2.
1493  */
1494  if ( mode != 2 )
1495  ndims = Max(gbox_ndims(&gbox), ndims);
1496 
1497  /* Convert gbox to n-d box */
1498  nd_box = palloc(sizeof(ND_BOX));
1499  nd_box_from_gbox(&gbox, nd_box);
1500 
1501  /* Cache n-d bounding box */
1502  sample_boxes[notnull_cnt] = nd_box;
1503 
1504  /* Initialize sample extent before merging first entry */
1505  if ( ! notnull_cnt )
1506  nd_box_init_bounds(&sample_extent);
1507 
1508  /* Add current sample to overall sample extent */
1509  nd_box_merge(nd_box, &sample_extent);
1510 
1511  /* How many bytes does this sample use? */
1512  total_width += toast_raw_datum_size(datum);
1513 
1514  /* Add bounds coordinates to sums for stddev calculation */
1515  for ( d = 0; d < ndims; d++ )
1516  {
1517  sum.min[d] += nd_box->min[d];
1518  sum.max[d] += nd_box->max[d];
1519  }
1520 
1521  /* Increment our "good feature" count */
1522  notnull_cnt++;
1523 
1524  /* Give backend a chance of interrupting us */
1525  vacuum_delay_point();
1526  }
1527 
1528  /*
1529  * We'll build a histogram having stats->attr->attstattarget cells
1530  * on each side, within reason... we'll use ndims*10000 as the
1531  * maximum number of cells.
1532  * Also, if we're sampling a relatively small table, we'll try to ensure that
1533  * we have an average of 5 features for each cell so the histogram isn't
1534  * so sparse.
1535  */
1536  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1537  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1538  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1539  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1540  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1541 
1542  /* If there's no useful features, we can't work out stats */
1543  if ( ! notnull_cnt )
1544  {
1545  Oid relation_oid = stats->attr->attrelid;
1546  char *relation_name = get_rel_name(relation_oid);
1547  elog(NOTICE,
1548  "PostGIS: Unable to compute statistics for \"%s.%s\": No non-null/empty features",
1549  relation_name ? relation_name : "(NULL)",
1550  stats->attr->attname.data);
1551  stats->stats_valid = false;
1552  return;
1553  }
1554 
1555  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1556 
1557  /*
1558  * Second scan:
1559  * o compute standard deviation
1560  */
1561  for ( d = 0; d < ndims; d++ )
1562  {
1563  /* Calculate average bounds values */
1564  avg.min[d] = sum.min[d] / notnull_cnt;
1565  avg.max[d] = sum.max[d] / notnull_cnt;
1566 
1567  /* Calculate standard deviation for this dimension bounds */
1568  for ( i = 0; i < notnull_cnt; i++ )
1569  {
1570  const ND_BOX *ndb = sample_boxes[i];
1571  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1572  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1573  }
1574  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1575  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1576 
1577  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1578  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1579  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1580  }
1581 
1582  /*
1583  * Third scan:
1584  * o skip hard deviants
1585  * o compute new histogram box
1586  */
1587  nd_box_init_bounds(&histo_extent_new);
1588  for ( i = 0; i < notnull_cnt; i++ )
1589  {
1590  const ND_BOX *ndb = sample_boxes[i];
1591  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1592  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1593  {
1594  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1595  sample_boxes[i] = NULL;
1596  continue;
1597  }
1598  /* Expand our new box to fit all the other features. */
1599  nd_box_merge(ndb, &histo_extent_new);
1600  }
1601  /*
1602  * Expand the box slightly (1%) to avoid edge effects
1603  * with objects that are on the boundary
1604  */
1605  nd_box_expand(&histo_extent_new, 0.01);
1606  histo_extent = histo_extent_new;
1607 
1608  /*
1609  * How should we allocate our histogram cells to the
1610  * different dimensions? We can't do it by raw dimensional width,
1611  * because in x/y/z space, the z can have different units
1612  * from the x/y. Similarly for x/y/t space.
1613  * So, we instead calculate how much features overlap
1614  * each other in their dimension to figure out which
1615  * dimensions have useful selectivity characteristics (more
1616  * variability in density) and therefor would find
1617  * more cells useful (to distinguish between dense places and
1618  * homogeneous places).
1619  */
1620  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1621  sample_distribution);
1622 
1623  /*
1624  * The sample_distribution array now tells us how spread out the
1625  * data is in each dimension, so we use that data to allocate
1626  * the histogram cells we have available.
1627  * At this point, histo_cells_target is the approximate target number
1628  * of cells.
1629  */
1630 
1631  /*
1632  * Some dimensions have basically a uniform distribution, we want
1633  * to allocate no cells to those dimensions, only to dimensions
1634  * that have some interesting differences in data distribution.
1635  * Here we count up the number of interesting dimensions
1636  */
1637  for ( d = 0; d < ndims; d++ )
1638  {
1639  if ( sample_distribution[d] > 0 )
1640  histo_ndims++;
1641  }
1642 
1643  if ( histo_ndims == 0 )
1644  {
1645  /* Special case: all our dimensions had low variability! */
1646  /* We just divide the cells up evenly */
1647  POSTGIS_DEBUG(3, " special case: no axes have variability");
1648  histo_cells_new = 1;
1649  for ( d = 0; d < ndims; d++ )
1650  {
1651  histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1652  if ( ! histo_size[d] )
1653  histo_size[d] = 1;
1654  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1655  histo_cells_new *= histo_size[d];
1656  }
1657  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1658  }
1659  else
1660  {
1661  /*
1662  * We're going to express the amount of variability in each dimension
1663  * as a proportion of the total variability and allocate cells in that
1664  * dimension relative to that proportion.
1665  */
1666  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1667  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1668  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1669  histo_cells_new = 1; /* For the number of cells in the final histogram */
1670  for ( d = 0; d < ndims; d++ )
1671  {
1672  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1673  {
1674  histo_size[d] = 1;
1675  }
1676  else /* Interesting dimension */
1677  {
1678  /* How does this dims variability compare to the total? */
1679  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1680  /*
1681  * Scale the target cells number by the # of dims and ratio,
1682  * then take the appropriate root to get the estimated number of cells
1683  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1684  */
1685  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1686  /* If something goes awry, just give this dim one slot */
1687  if ( ! histo_size[d] )
1688  histo_size[d] = 1;
1689  }
1690  histo_cells_new *= histo_size[d];
1691  }
1692  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1693  }
1694 
1695  /* Update histo_cells to the actual number of cells we need to allocate */
1696  histo_cells = histo_cells_new;
1697  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1698 
1699  /*
1700  * Create the histogram (ND_STATS) in the stats memory context
1701  */
1702  old_context = MemoryContextSwitchTo(stats->anl_context);
1703  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1704  nd_stats = palloc(nd_stats_size);
1705  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1706  MemoryContextSwitchTo(old_context);
1707 
1708  /* Initialize the #ND_STATS objects */
1709  nd_stats->ndims = ndims;
1710  nd_stats->extent = histo_extent;
1711  nd_stats->sample_features = sample_rows;
1712  nd_stats->table_features = total_rows;
1713  nd_stats->not_null_features = notnull_cnt;
1714  /* Copy in the histogram dimensions */
1715  for ( d = 0; d < ndims; d++ )
1716  nd_stats->size[d] = histo_size[d];
1717 
1718  /*
1719  * Fourth scan:
1720  * o fill histogram values with the proportion of
1721  * features' bbox overlaps: a feature's bvol
1722  * can fully overlap (1) or partially overlap
1723  * (fraction of 1) an histogram cell.
1724  *
1725  * Note that we are filling each cell with the "portion of
1726  * the feature's box that overlaps the cell". So, if we sum
1727  * up the values in the histogram, we could get the
1728  * histogram feature count.
1729  *
1730  */
1731  for ( i = 0; i < notnull_cnt; i++ )
1732  {
1733  const ND_BOX *nd_box;
1734  ND_IBOX nd_ibox;
1735  int at[ND_DIMS];
1736  int d;
1737  double num_cells = 0;
1738  double tmp_volume = 1.0;
1739  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1740  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1741  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1742 
1743  nd_box = sample_boxes[i];
1744  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1745 
1746  /* Give backend a chance of interrupting us */
1747  vacuum_delay_point();
1748 
1749  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1750  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1751  memset(at, 0, sizeof(int)*ND_DIMS);
1752 
1753  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1754  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1755  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1756 
1757  for ( d = 0; d < nd_stats->ndims; d++ )
1758  {
1759  /* Initialize the starting values */
1760  at[d] = nd_ibox.min[d];
1761  min[d] = nd_stats->extent.min[d];
1762  max[d] = nd_stats->extent.max[d];
1763  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1764 
1765  /* What's the volume (area) of this feature's box? */
1766  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1767  }
1768 
1769  /* Add feature volume (area) to our total */
1770  total_sample_volume += tmp_volume;
1771 
1772  /*
1773  * Move through all the overlaped histogram cells values and
1774  * add the box overlap proportion to them.
1775  */
1776  do
1777  {
1778  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1779  double ratio;
1780  /* Create a box for this histogram cell */
1781  for ( d = 0; d < nd_stats->ndims; d++ )
1782  {
1783  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1784  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1785  }
1786 
1787  /*
1788  * If a feature box is completely inside one cell the ratio will be
1789  * 1.0. If a feature box is 50% in two cells, each cell will get
1790  * 0.5 added on.
1791  */
1792  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1793  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1794  num_cells += ratio;
1795  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1796  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1797  }
1798  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1799 
1800  /* Keep track of overall number of overlaps counted */
1801  total_cell_count += num_cells;
1802  /* How many features have we added to this histogram? */
1803  histogram_features++;
1804  }
1805 
1806  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1807  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1808  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1809 
1810  /* Error out if we got no sample information */
1811  if ( ! histogram_features )
1812  {
1813  POSTGIS_DEBUG(3, " no stats have been gathered");
1814  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1815  stats->stats_valid = false;
1816  return;
1817  }
1818 
1819  nd_stats->histogram_features = histogram_features;
1820  nd_stats->histogram_cells = histo_cells;
1821  nd_stats->cells_covered = total_cell_count;
1822 
1823  /* Put this histogram data into the right slot/kind */
1824  if ( mode == 2 )
1825  {
1826  stats_slot = STATISTIC_SLOT_2D;
1827  stats_kind = STATISTIC_KIND_2D;
1828  }
1829  else
1830  {
1831  stats_slot = STATISTIC_SLOT_ND;
1832  stats_kind = STATISTIC_KIND_ND;
1833  }
1834 
1835  /* Write the statistics data */
1836  stats->stakind[stats_slot] = stats_kind;
1837  stats->staop[stats_slot] = InvalidOid;
1838  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1839  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1840  stats->stanullfrac = (float4)null_cnt/sample_rows;
1841  stats->stawidth = total_width/notnull_cnt;
1842  stats->stadistinct = -1.0;
1843  stats->stats_valid = true;
1844 
1845  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1846  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1847  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1848  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1849  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1850  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1851  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1852  /*
1853  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1854  */
1855 
1856  return;
1857 }
1858 
1859 
1877 static void
1878 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1879  int sample_rows, double total_rows)
1880 {
1881  GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
1882  /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
1883  stats->extra_data = extra_data->std_extra_data;
1884  extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
1885  stats->extra_data = extra_data;
1886 
1887  /* 2D Mode */
1888  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1889 
1890  if (stats->stats_valid)
1891  {
1892  /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1893  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1894  }
1895 }
1896 
1897 
1925 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1926 {
1927  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1928  GserializedAnalyzeExtraData *extra_data =
1930 
1931  /* Ask for standard analyze to fill in as much as possible */
1932  if (!std_typanalyze(stats))
1933  PG_RETURN_BOOL(false);
1934 
1935  /* Save old compute_stats and extra_data for scalar statistics ... */
1936  extra_data->std_compute_stats = stats->compute_stats;
1937  extra_data->std_extra_data = stats->extra_data;
1938  /* ... and replace with our info */
1939  stats->compute_stats = compute_gserialized_stats;
1940  stats->extra_data = extra_data;
1941 
1942  /* Indicate we are done successfully */
1943  PG_RETURN_BOOL(true);
1944 }
1945 
1958 static float8
1959 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1960 {
1961  int d; /* counter */
1962  float8 selectivity;
1963  ND_BOX nd_box;
1964  ND_IBOX nd_ibox;
1965  int at[ND_DIMS];
1966  double cell_size[ND_DIMS];
1967  double min[ND_DIMS];
1968  double max[ND_DIMS];
1969  double total_count = 0.0;
1970  int ndims_max;
1971 
1972  /* Calculate the overlap of the box on the histogram */
1973  if ( ! nd_stats )
1974  {
1975  elog(NOTICE, " estimate_selectivity called with null input");
1976  return FALLBACK_ND_SEL;
1977  }
1978 
1979  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1980 
1981  /* Initialize nd_box. */
1982  nd_box_from_gbox(box, &nd_box);
1983 
1984  /*
1985  * To return 2D stats on an ND sample, we need to make the
1986  * 2D box cover the full range of the other dimensions in the
1987  * histogram.
1988  */
1989  POSTGIS_DEBUGF(3, " mode: %d", mode);
1990  if ( mode == 2 )
1991  {
1992  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1993  ndims_max = 2;
1994  }
1995 
1996  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1997  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1998 
1999  /*
2000  * Search box completely misses histogram extent?
2001  * We have to intersect in all N dimensions or else we have
2002  * zero interaction under the &&& operator. It's important
2003  * to short circuit in this case, as some of the tests below
2004  * will return junk results when run on non-intersecting inputs.
2005  */
2006  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
2007  {
2008  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
2009  return 0.0;
2010  }
2011 
2012  /* Search box completely contains histogram extent! */
2013  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
2014  {
2015  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
2016  return 1.0;
2017  }
2018 
2019  /* Calculate the overlap of the box on the histogram */
2020  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2021  {
2022  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2023  return FALLBACK_ND_SEL;
2024  }
2025 
2026  /* Work out some measurements of the histogram */
2027  for ( d = 0; d < nd_stats->ndims; d++ )
2028  {
2029  /* Cell size in each dim */
2030  min[d] = nd_stats->extent.min[d];
2031  max[d] = nd_stats->extent.max[d];
2032  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2033  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2034 
2035  /* Initialize the counter */
2036  at[d] = nd_ibox.min[d];
2037  }
2038 
2039  /* Move through all the overlap values and sum them */
2040  do
2041  {
2042  float cell_count, ratio;
2043  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2044 
2045  /* We have to pro-rate partially overlapped cells. */
2046  for ( d = 0; d < nd_stats->ndims; d++ )
2047  {
2048  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2049  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2050  }
2051 
2052  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2053  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2054 
2055  /* Add the pro-rated count for this cell to the overall total */
2056  total_count += cell_count * ratio;
2057  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2058  }
2059  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2060 
2061  /* Scale by the number of features in our histogram to get the proportion */
2062  selectivity = total_count / nd_stats->histogram_features;
2063 
2064  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2065  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2066  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2067  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2068 
2069  /* Prevent rounding overflows */
2070  if (selectivity > 1.0) selectivity = 1.0;
2071  else if (selectivity < 0.0) selectivity = 0.0;
2072 
2073  return selectivity;
2074 }
2075 
2076 
2077 
2083 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2084 {
2085  Oid table_oid = PG_GETARG_OID(0);
2086  text *att_text = PG_GETARG_TEXT_P(1);
2087  ND_STATS *nd_stats;
2088  char *str;
2089  text *json;
2090  int mode = 2; /* default to 2D mode */
2091  bool only_parent = false; /* default to whole tree stats */
2092 
2093  /* Check if we've been asked to not use 2d mode */
2094  if ( ! PG_ARGISNULL(2) )
2095  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2096 
2097  /* Retrieve the stats object */
2098  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2099  if ( ! nd_stats )
2100  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2101 
2102  /* Convert to JSON */
2103  str = nd_stats_to_json(nd_stats);
2104  json = cstring_to_text(str);
2105  pfree(str);
2106  pfree(nd_stats);
2107  PG_RETURN_TEXT_P(json);
2108 }
2109 
2110 
2116 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2117 {
2118  Oid table_oid = PG_GETARG_OID(0);
2119  text *att_text = PG_GETARG_TEXT_P(1);
2120  Datum geom_datum = PG_GETARG_DATUM(2);
2121  GBOX gbox; /* search box read from gserialized datum */
2122  float8 selectivity = 0;
2123  ND_STATS *nd_stats;
2124  int mode = 2; /* 2D mode by default */
2125 
2126  /* Check if we've been asked to not use 2d mode */
2127  if ( ! PG_ARGISNULL(3) )
2128  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2129 
2130  /* Retrieve the stats object */
2131  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2132 
2133  if ( ! nd_stats )
2134  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2135 
2136  /* Calculate the gbox */
2137  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2138  elog(ERROR, "unable to calculate bounding box from geometry");
2139 
2140  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2141 
2142  /* Do the estimation */
2143  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2144 
2145  pfree(nd_stats);
2146  PG_RETURN_FLOAT8(selectivity);
2147 }
2148 
2149 
2155 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2156 {
2157  Oid table_oid1 = PG_GETARG_OID(0);
2158  text *att_text1 = PG_GETARG_TEXT_P(1);
2159  Oid table_oid2 = PG_GETARG_OID(2);
2160  text *att_text2 = PG_GETARG_TEXT_P(3);
2161  ND_STATS *nd_stats1, *nd_stats2;
2162  float8 selectivity = 0;
2163  int mode = 2; /* 2D mode by default */
2164 
2165 
2166  /* Retrieve the stats object */
2167  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2168  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2169 
2170  if ( ! nd_stats1 )
2171  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2172 
2173  if ( ! nd_stats2 )
2174  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2175 
2176  /* Check if we've been asked to not use 2d mode */
2177  if ( ! PG_ARGISNULL(4) )
2178  {
2179  text *modetxt = PG_GETARG_TEXT_P(4);
2180  char *modestr = text_to_cstring(modetxt);
2181  if ( modestr[0] == 'N' )
2182  mode = 0;
2183  }
2184 
2185  /* Do the estimation */
2186  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2187 
2188  pfree(nd_stats1);
2189  pfree(nd_stats2);
2190  PG_RETURN_FLOAT8(selectivity);
2191 }
2192 
2198 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2199 {
2200  PG_RETURN_DATUM(DirectFunctionCall5(
2202  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2203  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2204  Int32GetDatum(2) /* 2-D mode */
2205  ));
2206 }
2207 
2213 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2214 {
2215  PG_RETURN_DATUM(DirectFunctionCall5(
2217  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2218  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2219  Int32GetDatum(0) /* N-D mode */
2220  ));
2221 }
2222 
2223 
2238 float8
2239 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2240 {
2241  VariableStatData vardata;
2242  Node *other = NULL;
2243  bool varonleft;
2244  ND_STATS *nd_stats = NULL;
2245 
2246  GBOX search_box;
2247  float8 selectivity = 0;
2248  Const *otherConst;
2249 
2250  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2251 
2252  if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2253  {
2254  POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2255  return DEFAULT_ND_SEL;
2256  }
2257 
2258  if (!IsA(other, Const))
2259  {
2260  ReleaseVariableStats(vardata);
2261  POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2262  return DEFAULT_ND_SEL;
2263  }
2264 
2265  otherConst = (Const*)other;
2266  if ((!otherConst) || otherConst->constisnull)
2267  {
2268  ReleaseVariableStats(vardata);
2269  POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2270  return DEFAULT_ND_SEL;
2271  }
2272 
2273  if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2274  {
2275  ReleaseVariableStats(vardata);
2276  POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2277  return 0.0;
2278  }
2279 
2280  if (!vardata.statsTuple)
2281  {
2282  POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2283  return DEFAULT_ND_SEL;
2284  }
2285 
2286  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2287  ReleaseVariableStats(vardata);
2288  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2289  if (nd_stats) pfree(nd_stats);
2290  return selectivity;
2291 }
2292 
2294 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2295 {
2296  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2297  // Oid operator_oid = PG_GETARG_OID(1);
2298  List *args = (List *) PG_GETARG_POINTER(2);
2299  int varRelid = PG_GETARG_INT32(3);
2300  int mode = PG_GETARG_INT32(4);
2301  float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2302  POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2303  PG_RETURN_FLOAT8(selectivity);
2304 }
2305 
2306 
2307 
2314 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2315 {
2316  char *nsp = NULL;
2317  char *tbl = NULL;
2318  text *col = NULL;
2319  char *nsp_tbl = NULL;
2320  Oid tbl_oid, idx_oid = 0;
2321  ND_STATS *nd_stats;
2322  GBOX *gbox = NULL;
2323  bool only_parent = false;
2324  int key_type, att_num;
2325  size_t sz;
2326 
2327  /* We need to initialize the internal cache to access it later via postgis_oid() */
2328  postgis_initialize_cache();
2329 
2330  if ( PG_NARGS() == 4 )
2331  {
2332  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2333  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2334  col = PG_GETARG_TEXT_P(2);
2335  only_parent = PG_GETARG_BOOL(3);
2336  sz = strlen(nsp) + strlen(tbl) + 6;
2337  nsp_tbl = palloc(sz);
2338  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2339  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2340  pfree(nsp_tbl);
2341  }
2342  else if ( PG_NARGS() == 3 )
2343  {
2344  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2345  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2346  col = PG_GETARG_TEXT_P(2);
2347  sz = strlen(nsp) + strlen(tbl) + 6;
2348  nsp_tbl = palloc(sz);
2349  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2350  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2351  pfree(nsp_tbl);
2352  }
2353  else if ( PG_NARGS() == 2 )
2354  {
2355  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2356  col = PG_GETARG_TEXT_P(1);
2357  sz = strlen(tbl) + 3;
2358  nsp_tbl = palloc(sz);
2359  snprintf(nsp_tbl, sz, "\"%s\"", tbl);
2360  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2361  pfree(nsp_tbl);
2362  }
2363  else
2364  {
2365  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2366  PG_RETURN_NULL();
2367  }
2368 
2369  /* Read the extent from the head of the spatial index, if there is one */
2370 
2371  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2372  if (idx_oid)
2373  {
2374  /* TODO: how about only_parent ? */
2375  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2376  POSTGIS_DEBUGF(2, "index for \"%s.%s\" exists, reading gbox from there", tbl, text_to_cstring(col));
2377  if ( ! gbox ) PG_RETURN_NULL();
2378  }
2379  else
2380  {
2381  POSTGIS_DEBUGF(2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2382 
2383  /* Fall back to reading the stats, if no index is found */
2384 
2385  /* Estimated extent only returns 2D bounds, so use mode 2 */
2386  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2387 
2388  /* Error out on no stats */
2389  if ( ! nd_stats ) {
2390  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2391  PG_RETURN_NULL();
2392  }
2393 
2394  /* Construct the box */
2395  gbox = palloc(sizeof(GBOX));
2396  FLAGS_SET_GEODETIC(gbox->flags, 0);
2397  FLAGS_SET_Z(gbox->flags, 0);
2398  FLAGS_SET_M(gbox->flags, 0);
2399  gbox->xmin = nd_stats->extent.min[0];
2400  gbox->xmax = nd_stats->extent.max[0];
2401  gbox->ymin = nd_stats->extent.min[1];
2402  gbox->ymax = nd_stats->extent.max[1];
2403  pfree(nd_stats);
2404  }
2405 
2406  PG_RETURN_POINTER(gbox);
2407 }
2408 
2416 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2417 {
2418  if ( PG_NARGS() == 3 )
2419  {
2420  PG_RETURN_DATUM(
2421  DirectFunctionCall3(gserialized_estimated_extent,
2422  PG_GETARG_DATUM(0),
2423  PG_GETARG_DATUM(1),
2424  PG_GETARG_DATUM(2)));
2425  }
2426  else if ( PG_NARGS() == 2 )
2427  {
2428  PG_RETURN_DATUM(
2429  DirectFunctionCall2(gserialized_estimated_extent,
2430  PG_GETARG_DATUM(0),
2431  PG_GETARG_DATUM(1)));
2432  }
2433 
2434  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2435  PG_RETURN_NULL();
2436 }
2437 
2438 /************************************************************************/
2439 
2440 static Oid
2441 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
2442 {
2443  Relation tbl_rel;
2444  ListCell *lc;
2445  List *idx_list;
2446  Oid result = InvalidOid;
2447  char *colname = text_to_cstring(col);
2448 
2449  /* Lookup our spatial index key types */
2450  Oid b2d_oid = postgis_oid(BOX2DFOID);
2451  Oid gdx_oid = postgis_oid(BOX3DOID);
2452 
2453  if (!(b2d_oid && gdx_oid))
2454  return InvalidOid;
2455 
2456  tbl_rel = RelationIdGetRelation(tbl_oid);
2457  idx_list = RelationGetIndexList(tbl_rel);
2458  RelationClose(tbl_rel);
2459 
2460  /* For each index associated with this table... */
2461  foreach(lc, idx_list)
2462  {
2463  Form_pg_class idx_form;
2464  HeapTuple idx_tup;
2465  int idx_relam;
2466  Oid idx_oid = lfirst_oid(lc);
2467 
2468  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2469  if (!HeapTupleIsValid(idx_tup))
2470  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2471  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2472  idx_relam = idx_form->relam;
2473  ReleaseSysCache(idx_tup);
2474 
2475  /* Does the index use a GIST access method? */
2476  if (idx_relam == GIST_AM_OID)
2477  {
2478  Form_pg_attribute att;
2479  Oid atttypid;
2480  int attnum;
2481  /* Is the index on the column name we are looking for? */
2482  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2483  ObjectIdGetDatum(idx_oid),
2484  PointerGetDatum(colname));
2485  if (!HeapTupleIsValid(att_tup))
2486  continue;
2487 
2488  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2489  atttypid = att->atttypid;
2490  attnum = att->attnum;
2491  ReleaseSysCache(att_tup);
2492 
2493  /* Is the column actually spatial? */
2494  if (b2d_oid == atttypid || gdx_oid == atttypid)
2495  {
2496  /* Save result, clean up, and break out */
2497  result = idx_oid;
2498  if (att_num)
2499  *att_num = attnum;
2500  if (key_type)
2501  *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
2502  break;
2503  }
2504  }
2505  }
2506  return result;
2507 }
2508 
2509 static GBOX *
2510 spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
2511 {
2512  BOX2DF *bounds_2df = NULL;
2513  GIDX *bounds_gidx = NULL;
2514  GBOX *gbox = NULL;
2515  Relation idx_rel;
2516  Buffer buffer;
2517  Page page;
2518  OffsetNumber offset;
2519  unsigned long offset_max;
2520 
2521  if (!idx_oid)
2522  return NULL;
2523 
2524  idx_rel = index_open(idx_oid, AccessShareLock);
2525  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2526  page = (Page) BufferGetPage(buffer);
2527  offset = FirstOffsetNumber;
2528  offset_max = PageGetMaxOffsetNumber(page);
2529  while (offset <= offset_max)
2530  {
2531  ItemId iid = PageGetItemId(page, offset);
2532  IndexTuple ituple;
2533  if (!iid)
2534  {
2535  ReleaseBuffer(buffer);
2536  index_close(idx_rel, AccessShareLock);
2537  return NULL;
2538  }
2539  ituple = (IndexTuple) PageGetItem(page, iid);
2540  if (!GistTupleIsInvalid(ituple))
2541  {
2542  bool isnull;
2543  Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2544  if (!isnull)
2545  {
2546  if (key_type == STATISTIC_KIND_2D)
2547  {
2548  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2549  if (bounds_2df)
2550  box2df_merge(bounds_2df, b);
2551  else
2552  bounds_2df = box2df_copy(b);
2553  }
2554  else
2555  {
2556  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2557  if (bounds_gidx)
2558  gidx_merge(&bounds_gidx, b);
2559  else
2560  bounds_gidx = gidx_copy(b);
2561  }
2562  }
2563  }
2564  offset++;
2565  }
2566 
2567  ReleaseBuffer(buffer);
2568  index_close(idx_rel, AccessShareLock);
2569 
2570  if (key_type == STATISTIC_KIND_2D && bounds_2df)
2571  {
2572  if (box2df_is_empty(bounds_2df))
2573  return NULL;
2574  gbox = gbox_new(0);
2575  box2df_to_gbox_p(bounds_2df, gbox);
2576  }
2577  else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
2578  {
2579  if (gidx_is_unknown(bounds_gidx))
2580  return NULL;
2581  gbox = gbox_new(0);
2582  gbox_from_gidx(bounds_gidx, gbox, 0);
2583  }
2584  else
2585  return NULL;
2586 
2587  return gbox;
2588 }
2589 
2590 /*
2591 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2592  RETURNS box2d
2593  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2594  LANGUAGE 'c' STABLE STRICT;
2595 */
2596 
2598 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2599 {
2600  GBOX *gbox = NULL;
2601  int key_type;
2602  int att_num;
2603  Oid tbl_oid = PG_GETARG_DATUM(0);
2604  text *col = PG_GETARG_TEXT_P(1);
2605  Oid idx_oid;
2606 
2607  if(!tbl_oid)
2608  PG_RETURN_NULL();
2609 
2610  /* We need to initialize the internal cache to access it later via postgis_oid() */
2611  postgis_initialize_cache();
2612 
2613  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2614  if (!idx_oid)
2615  PG_RETURN_NULL();
2616 
2617  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2618  if (!gbox)
2619  PG_RETURN_NULL();
2620  else
2621  PG_RETURN_POINTER(gbox);
2622 }
2623 
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition: cu_print.c:267
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: gbox.c:32
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: gbox.c:392
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:110
#define FLAGS_GET_Z(flags)
Definition: liblwgeom.h:179
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:180
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:189
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:187
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:186
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:182
This library is the generic geometry handling section of PostGIS.
#define str(s)
args
Definition: ovdump.py:45
Datum buffer(PG_FUNCTION_ARGS)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:228
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:76
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:124
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:103
double ymax
Definition: liblwgeom.h:371
double zmax
Definition: liblwgeom.h:373
double xmax
Definition: liblwgeom.h:369
double zmin
Definition: liblwgeom.h:372
double mmax
Definition: liblwgeom.h:375
double ymin
Definition: liblwgeom.h:370
double xmin
Definition: liblwgeom.h:368
double mmin
Definition: liblwgeom.h:374
lwflags_t flags
Definition: liblwgeom.h:367
AnalyzeAttrComputeStatsFunc std_compute_stats
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.