PostGIS  3.3.9dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #if PG_VERSION_NUM < 130000
70 #include "access/tuptoaster.h" /* For toast_raw_datum_size */
71 #else
72 #include "access/detoast.h" /* For toast_raw_datum_size */
73 #endif
74 #include "utils/datum.h"
75 #include "access/heapam.h"
76 #include "catalog/index.h"
77 #include "catalog/pg_am.h"
78 #include "miscadmin.h"
79 #include "storage/lmgr.h"
80 #include "catalog/namespace.h"
81 #include "catalog/indexing.h"
82 #if PG_VERSION_NUM >= 100000
83 #include "utils/regproc.h"
84 #include "utils/varlena.h"
85 #endif
86 #include "utils/builtins.h"
87 #include "utils/datum.h"
88 #include "utils/snapmgr.h"
89 #include "utils/fmgroids.h"
90 #include "funcapi.h"
91 #include "access/heapam.h"
92 #include "catalog/pg_type.h"
93 #include "access/relscan.h"
94 
95 #include "executor/spi.h"
96 #include "fmgr.h"
97 #include "commands/vacuum.h"
98 #if PG_VERSION_NUM < 120000
99 #include "nodes/relation.h"
100 #else
101 #include "nodes/pathnodes.h"
102 #endif
103 #include "parser/parsetree.h"
104 #include "utils/array.h"
105 #include "utils/lsyscache.h"
106 #include "utils/builtins.h"
107 #include "utils/syscache.h"
108 #include "utils/rel.h"
109 #include "utils/selfuncs.h"
110 
111 #include "../postgis_config.h"
112 
113 #include "access/htup_details.h"
114 
115 #include "stringbuffer.h"
116 #include "liblwgeom.h"
117 #include "lwgeom_pg.h" /* For debugging macros. */
118 #include "gserialized_gist.h" /* For index common functions */
119 
120 #include <math.h>
121 #if HAVE_IEEEFP_H
122 #include <ieeefp.h>
123 #endif
124 #include <float.h>
125 #include <string.h>
126 #include <stdio.h>
127 #include <ctype.h>
128 
129 
130 /************************************************************************/
131 
132 
133 /* Prototypes */
134 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
135 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
136 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
137 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
138 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
139 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
140 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
141 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
142 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
143 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
144 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
145 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
146 
147 /* Local prototypes */
148 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num);
149 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num);
150 
151 
152 /* Other prototypes */
153 float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
154 float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161  * Assign a number to the n-dimensional statistics kind
162  *
163  * tgl suggested:
164  *
165  * 1-100: reserved for assignment by the core Postgres project
166  * 100-199: reserved for assignment by PostGIS
167  * 200-9999: reserved for other globally-known stats kinds
168  * 10000-32767: reserved for private site-local use
169  */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 
173 /*
174  * Postgres does not pin its slots and uses them as they come.
175  * We need to preserve its Correlation for brin to work
176  * 0 may be MCV
177  * 1 may be Histogram
178  * 2 may be Correlation
179  * We take 3 and 4.
180  */
181 #define STATISTIC_SLOT_ND 3
182 #define STATISTIC_SLOT_2D 4
183 
184 /*
185 * The SD factor restricts the side of the statistics histogram
186 * based on the standard deviation of the extent of the data.
187 * SDFACTOR is the number of standard deviations from the mean
188 * the histogram will extend.
189 */
190 #define SDFACTOR 3.25
191 
197 #define ND_DIMS 4
198 
205 #define MIN_DIMENSION_WIDTH 0.000000001
206 
211 #define MAX_DIMENSION_WIDTH 1.0E+20
212 
216 #define DEFAULT_ND_SEL 0.0001
217 #define DEFAULT_ND_JOINSEL 0.001
218 
222 #define FALLBACK_ND_SEL 0.2
223 #define FALLBACK_ND_JOINSEL 0.3
224 
230 typedef struct ND_BOX_T
231 {
232  float4 min[ND_DIMS];
233  float4 max[ND_DIMS];
235 
239 typedef struct ND_IBOX_T
240 {
241  int min[ND_DIMS];
242  int max[ND_DIMS];
244 
245 
252 typedef struct ND_STATS_T
253 {
254  /* Dimensionality of the histogram. */
255  float4 ndims;
256 
257  /* Size of n-d histogram in each dimension. */
258  float4 size[ND_DIMS];
259 
260  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
262 
263  /* How many rows in the table itself? */
265 
266  /* How many rows were in the sample that built this histogram? */
268 
269  /* How many not-Null/Empty features were in the sample? */
271 
272  /* How many features actually got sampled in the histogram? */
274 
275  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
277 
278  /* How many cells did those histogram features cover? */
279  /* Since we are pro-rating coverage, this number should */
280  /* now always equal histogram_features */
282 
283  /* Variable length # of floats for histogram */
284  float4 value[1];
286 
287 typedef struct {
288  /* Saved state from std_typanalyze() */
289  AnalyzeAttrComputeStatsFunc std_compute_stats;
292 
299 static int
300 gbox_ndims(const GBOX* gbox)
301 {
302  int dims = 2;
303  if ( FLAGS_GET_GEODETIC(gbox->flags) )
304  return 3;
305  if ( FLAGS_GET_Z(gbox->flags) )
306  dims++;
307  if ( FLAGS_GET_M(gbox->flags) )
308  dims++;
309  return dims;
310 }
311 
317 static int
318 text_p_get_mode(const text *txt)
319 {
320  int mode = 2;
321  char *modestr;
322  if (VARSIZE_ANY_EXHDR(txt) <= 0)
323  return mode;
324  modestr = (char*)VARDATA(txt);
325  if ( modestr[0] == 'N' )
326  mode = 0;
327  return mode;
328 }
329 
330 
334 static int
335 cmp_int (const void *a, const void *b)
336 {
337  int ia = *((const int*)a);
338  int ib = *((const int*)b);
339 
340  if ( ia == ib )
341  return 0;
342  else if ( ia > ib )
343  return 1;
344  else
345  return -1;
346 }
347 
352 static int
353 range_quintile(int *vals, int nvals)
354 {
355  qsort(vals, nvals, sizeof(int), cmp_int);
356  return vals[4*nvals/5] - vals[nvals/5];
357 }
358 
362 static double
363 total_double(const double *vals, int nvals)
364 {
365  int i;
366  float total = 0;
367  /* Calculate total */
368  for ( i = 0; i < nvals; i++ )
369  total += vals[i];
370 
371  return total;
372 }
373 
374 #if POSTGIS_DEBUG_LEVEL >= 3
375 
379 static int
380 total_int(const int *vals, int nvals)
381 {
382  int i;
383  int total = 0;
384  /* Calculate total */
385  for ( i = 0; i < nvals; i++ )
386  total += vals[i];
387 
388  return total;
389 }
390 
394 static double
395 avg(const int *vals, int nvals)
396 {
397  int t = total_int(vals, nvals);
398  return (double)t / (double)nvals;
399 }
400 
404 static double
405 stddev(const int *vals, int nvals)
406 {
407  int i;
408  double sigma2 = 0;
409  double mean = avg(vals, nvals);
410 
411  /* Calculate sigma2 */
412  for ( i = 0; i < nvals; i++ )
413  {
414  double v = (double)(vals[i]);
415  sigma2 += (mean - v) * (mean - v);
416  }
417  return sqrt(sigma2 / nvals);
418 }
419 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
420 
425 static int
426 nd_stats_value_index(const ND_STATS *stats, int *indexes)
427 {
428  int d;
429  int accum = 1, vdx = 0;
430 
431  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
432  /* n-d histogram coordinate implies. */
433  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
434  for ( d = 0; d < (int)(stats->ndims); d++ )
435  {
436  int size = (int)(stats->size[d]);
437  if ( indexes[d] < 0 || indexes[d] >= size )
438  {
439  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
440  return -1;
441  }
442  vdx += indexes[d] * accum;
443  accum *= size;
444  }
445  return vdx;
446 }
447 
451 static char*
452 nd_box_to_json(const ND_BOX *nd_box, int ndims)
453 {
454  char *rv;
455  int i;
457 
458  stringbuffer_append(sb, "{\"min\":[");
459  for ( i = 0; i < ndims; i++ )
460  {
461  if ( i ) stringbuffer_append(sb, ",");
462  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
463  }
464  stringbuffer_append(sb, "],\"max\":[");
465  for ( i = 0; i < ndims; i++ )
466  {
467  if ( i ) stringbuffer_append(sb, ",");
468  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
469  }
470  stringbuffer_append(sb, "]}");
471 
474  return rv;
475 }
476 
477 
482 static char*
483 nd_stats_to_json(const ND_STATS *nd_stats)
484 {
485  char *json_extent, *str;
486  int d;
488  int ndims = (int)roundf(nd_stats->ndims);
489 
490  stringbuffer_append(sb, "{");
491  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
492 
493  /* Size */
494  stringbuffer_append(sb, "\"size\":[");
495  for ( d = 0; d < ndims; d++ )
496  {
497  if ( d ) stringbuffer_append(sb, ",");
498  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
499  }
500  stringbuffer_append(sb, "],");
501 
502  /* Extent */
503  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
504  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
505  pfree(json_extent);
506 
507  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
508  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
509  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
510  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
511  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
512  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
513  stringbuffer_append(sb, "}");
514 
517  return str;
518 }
519 
520 
526 // static char*
527 // nd_stats_to_grid(const ND_STATS *stats)
528 // {
529 // char *rv;
530 // int j, k;
531 // int sizex = (int)roundf(stats->size[0]);
532 // int sizey = (int)roundf(stats->size[1]);
533 // stringbuffer_t *sb = stringbuffer_create();
534 //
535 // for ( k = 0; k < sizey; k++ )
536 // {
537 // for ( j = 0; j < sizex; j++ )
538 // {
539 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
540 // }
541 // stringbuffer_append(sb, "\n");
542 // }
543 //
544 // rv = stringbuffer_getstringcopy(sb);
545 // stringbuffer_destroy(sb);
546 // return rv;
547 // }
548 
549 
551 static int
552 nd_box_merge(const ND_BOX *source, ND_BOX *target)
553 {
554  int d;
555  for ( d = 0; d < ND_DIMS; d++ )
556  {
557  target->min[d] = Min(target->min[d], source->min[d]);
558  target->max[d] = Max(target->max[d], source->max[d]);
559  }
560  return true;
561 }
562 
564 static int
566 {
567  memset(a, 0, sizeof(ND_BOX));
568  return true;
569 }
570 
576 static int
578 {
579  int d;
580  for ( d = 0; d < ND_DIMS; d++ )
581  {
582  a->min[d] = FLT_MAX;
583  a->max[d] = -1 * FLT_MAX;
584  }
585  return true;
586 }
587 
589 static void
590 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
591 {
592  volatile int d = 0;
593  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
594 
595  nd_box_init(nd_box);
596  nd_box->min[d] = gbox->xmin;
597  nd_box->max[d] = gbox->xmax;
598  d++;
599  nd_box->min[d] = gbox->ymin;
600  nd_box->max[d] = gbox->ymax;
601  d++;
602  if ( FLAGS_GET_GEODETIC(gbox->flags) )
603  {
604  nd_box->min[d] = gbox->zmin;
605  nd_box->max[d] = gbox->zmax;
606  return;
607  }
608  if ( FLAGS_GET_Z(gbox->flags) )
609  {
610  nd_box->min[d] = gbox->zmin;
611  nd_box->max[d] = gbox->zmax;
612  d++;
613  }
614  if ( FLAGS_GET_M(gbox->flags) )
615  {
616  nd_box->min[d] = gbox->mmin;
617  nd_box->max[d] = gbox->mmax;
618  d++;
619  }
620  return;
621 }
622 
626 static int
627 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
628 {
629  int d;
630  for ( d = 0; d < ndims; d++ )
631  {
632  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
633  return false;
634  }
635  return true;
636 }
637 
641 static int
642 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
643 {
644  int d;
645  for ( d = 0; d < ndims; d++ )
646  {
647  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
648  return false;
649  }
650  return true;
651 }
652 
657 static int
658 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
659 {
660  int d;
661  double size;
662  for ( d = 0; d < ND_DIMS; d++ )
663  {
664  size = nd_box->max[d] - nd_box->min[d];
665  /* Avoid expanding boxes that are either too wide or too narrow*/
666  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
667  continue;
668  nd_box->min[d] -= size * expansion_factor / 2;
669  nd_box->max[d] += size * expansion_factor / 2;
670  }
671  return true;
672 }
673 
678 static inline int
679 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
680 {
681  int d;
682 
683  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
684 
685  /* Initialize ibox */
686  memset(nd_ibox, 0, sizeof(ND_IBOX));
687 
688  /* In each dimension... */
689  for ( d = 0; d < nd_stats->ndims; d++ )
690  {
691  double smin = nd_stats->extent.min[d];
692  double smax = nd_stats->extent.max[d];
693  double width = smax - smin;
694 
695  if (width < MIN_DIMENSION_WIDTH)
696  {
697  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
698  }
699  else
700  {
701  int size = (int)roundf(nd_stats->size[d]);
702 
703  /* ... find cells the box overlaps with in this dimension */
704  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
705  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
706 
707  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
708  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
709 
710  /* Push any out-of range values into range */
711  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
712  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
713  }
714  }
715  return true;
716 }
717 
721 static inline double
722 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
723 {
724  int d;
725  bool covered = true;
726  double ivol = 1.0;
727  double vol2 = 1.0;
728 
729  for ( d = 0 ; d < ndims; d++ )
730  {
731  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
732  return 0.0; /* Disjoint */
733 
734  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
735  covered = false;
736  }
737 
738  if ( covered )
739  return 1.0;
740 
741  for ( d = 0; d < ndims; d++ )
742  {
743  double width2 = b2->max[d] - b2->min[d];
744  double imin, imax, iwidth;
745 
746  vol2 *= width2;
747 
748  imin = Max(b1->min[d], b2->min[d]);
749  imax = Min(b1->max[d], b2->max[d]);
750  iwidth = imax - imin;
751  iwidth = Max(0.0, iwidth);
752 
753  ivol *= iwidth;
754  }
755 
756  if ( vol2 == 0.0 )
757  return vol2;
758 
759  return ivol / vol2;
760 }
761 
762 /* How many bins shall we use in figuring out the distribution? */
763 #define NUM_BINS 50
764 
780 static int
781 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
782 {
783  int d, i, k, range;
784  int counts[NUM_BINS];
785  double smin, smax; /* Spatial min, spatial max */
786  double swidth; /* Spatial width of dimension */
787 #if POSTGIS_DEBUG_LEVEL >= 3
788  double average, sdev, sdev_ratio;
789 #endif
790  int bmin, bmax; /* Bin min, bin max */
791  const ND_BOX *ndb;
792 
793  /* For each dimension... */
794  for ( d = 0; d < ndims; d++ )
795  {
796  /* Initialize counts for this dimension */
797  memset(counts, 0, sizeof(counts));
798 
799  smin = extent->min[d];
800  smax = extent->max[d];
801  swidth = smax - smin;
802 
803  /* Don't try and calculate distribution of overly narrow */
804  /* or overly wide dimensions. Here we're being pretty geographical, */
805  /* expecting "normal" planar or geographic coordinates. */
806  /* Otherwise we have to "handle" +/- Inf bounded features and */
807  /* the assumptions needed for that are as bad as this hack. */
808  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
809  {
810  distribution[d] = 0;
811  continue;
812  }
813 
814  /* Sum up the overlaps of each feature with the dimensional bins */
815  for ( i = 0; i < num_boxes; i++ )
816  {
817  double minoffset, maxoffset;
818 
819  /* Skip null entries */
820  ndb = nd_boxes[i];
821  if ( ! ndb ) continue;
822 
823  /* Where does box fall relative to the working range */
824  minoffset = ndb->min[d] - smin;
825  maxoffset = ndb->max[d] - smin;
826 
827  /* Skip boxes that our outside our working range */
828  if ( minoffset < 0 || minoffset > swidth ||
829  maxoffset < 0 || maxoffset > swidth )
830  {
831  continue;
832  }
833 
834  /* What bins does this range correspond to? */
835  bmin = floor(NUM_BINS * minoffset / swidth);
836  bmax = floor(NUM_BINS * maxoffset / swidth);
837 
838  /* Should only happen when maxoffset==swidth */
839  if (bmax >= NUM_BINS)
840  bmax = NUM_BINS-1;
841 
842  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
843 
844  /* Increment the counts in all the bins this feature overlaps */
845  for ( k = bmin; k <= bmax; k++ )
846  {
847  counts[k] += 1;
848  }
849 
850  }
851 
852  /* How dispersed is the distribution of features across bins? */
853  range = range_quintile(counts, NUM_BINS);
854 
855 #if POSTGIS_DEBUG_LEVEL >= 3
856  average = avg(counts, NUM_BINS);
857  sdev = stddev(counts, NUM_BINS);
858  sdev_ratio = sdev/average;
859 
860  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
861  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
862  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
863  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
864 #endif
865 
866  distribution[d] = range;
867  }
868 
869  return true;
870 }
871 
877 static inline int
878 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
879 {
880  int d = 0;
881 
882  while ( d < ndims )
883  {
884  if ( counter[d] < ibox->max[d] )
885  {
886  counter[d] += 1;
887  break;
888  }
889  counter[d] = ibox->min[d];
890  d++;
891  }
892  /* That's it, cannot increment any more! */
893  if ( d == ndims )
894  return false;
895 
896  /* Increment complete! */
897  return true;
898 }
899 
900 static ND_STATS*
901 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
902 {
903  int stats_kind = STATISTIC_KIND_ND;
904  int rv;
905  ND_STATS *nd_stats;
906 
907  /* If we're in 2D mode, set the kind appropriately */
908  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
909 
910  /* Then read the geom status histogram from that */
911 
912 #if POSTGIS_PGSQL_VERSION < 100
913  {
914  float4 *floatptr;
915  int nvalues;
916 
917  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
918  NULL, NULL, NULL, &floatptr, &nvalues);
919 
920  if ( ! rv ) {
921  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
922  return NULL;
923  }
924 
925  /* Clone the stats here so we can release the attstatsslot immediately */
926  nd_stats = palloc(sizeof(float) * nvalues);
927  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
928 
929  /* Clean up */
930  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
931  }
932 #else /* PostgreSQL 10 or higher */
933  {
934  AttStatsSlot sslot;
935  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
936  ATTSTATSSLOT_NUMBERS);
937  if ( ! rv ) {
938  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
939  return NULL;
940  }
941 
942  /* Clone the stats here so we can release the attstatsslot immediately */
943  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
944  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
945 
946  free_attstatsslot(&sslot);
947  }
948 #endif
949 
950  return nd_stats;
951 }
952 
957 static ND_STATS*
958 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
959 {
960  HeapTuple stats_tuple = NULL;
961  ND_STATS *nd_stats;
962 
963  /* First pull the stats tuple for the whole tree */
964  if ( ! only_parent )
965  {
966  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
967  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
968  if ( stats_tuple )
969  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
970  }
971  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
972  if ( only_parent || ! stats_tuple )
973  {
974  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
975  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
976  if ( stats_tuple )
977  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
978  }
979  if ( ! stats_tuple )
980  {
981  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
982  return NULL;
983  }
984 
985  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
986  ReleaseSysCache(stats_tuple);
987  if ( ! nd_stats )
988  {
989  POSTGIS_DEBUGF(2,
990  "histogram for attribute %d of table \"%s\" does not exist?",
991  att_num, get_rel_name(table_oid));
992  }
993 
994  return nd_stats;
995 }
996 
1005 static ND_STATS*
1006 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
1007 {
1008  const char *att_name = text_to_cstring(att_text);
1009  AttrNumber att_num;
1010 
1011  /* We know the name? Look up the num */
1012  if ( att_text )
1013  {
1014  /* Get the attribute number */
1015  att_num = get_attnum(table_oid, att_name);
1016  if ( ! att_num ) {
1017  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1018  return NULL;
1019  }
1020  }
1021  else
1022  {
1023  elog(ERROR, "attribute name is null");
1024  return NULL;
1025  }
1026 
1027  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1028 }
1029 
1043 static float8
1045 {
1046  int ncells1, ncells2;
1047  int ndims1, ndims2, ndims;
1048  double ntuples_max;
1049  double ntuples_not_null1, ntuples_not_null2;
1050 
1051  ND_BOX extent1, extent2;
1052  ND_IBOX ibox1, ibox2;
1053  int at1[ND_DIMS];
1054  int at2[ND_DIMS];
1055  double min1[ND_DIMS];
1056  double width1[ND_DIMS];
1057  double cellsize1[ND_DIMS];
1058  int size2[ND_DIMS];
1059  double min2[ND_DIMS];
1060  double width2[ND_DIMS];
1061  double cellsize2[ND_DIMS];
1062  int size1[ND_DIMS];
1063  int d;
1064  double val = 0;
1065  float8 selectivity;
1066 
1067  /* Drop out on null inputs */
1068  if ( ! ( s1 && s2 ) )
1069  {
1070  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1071  return FALLBACK_ND_SEL;
1072  }
1073 
1074  /* We need to know how many cells each side has... */
1075  ncells1 = (int)roundf(s1->histogram_cells);
1076  ncells2 = (int)roundf(s2->histogram_cells);
1077 
1078  /* ...so that we can drive the summation loop with the smaller histogram. */
1079  if ( ncells1 > ncells2 )
1080  {
1081  const ND_STATS *stats_tmp = s1;
1082  s1 = s2;
1083  s2 = stats_tmp;
1084  }
1085 
1086  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1087  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1088 
1089  /* Re-read that info after the swap */
1090  ncells1 = (int)roundf(s1->histogram_cells);
1091  ncells2 = (int)roundf(s2->histogram_cells);
1092 
1093  /* Q: What's the largest possible join size these relations can create? */
1094  /* A: The product of the # of non-null rows in each relation. */
1095  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1096  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1097  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1098 
1099  /* Get the ndims as ints */
1100  ndims1 = (int)roundf(s1->ndims);
1101  ndims2 = (int)roundf(s2->ndims);
1102  ndims = Max(ndims1, ndims2);
1103 
1104  /* Get the extents */
1105  extent1 = s1->extent;
1106  extent2 = s2->extent;
1107 
1108  /* If relation stats do not intersect, join is very very selective. */
1109  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1110  {
1111  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1112  PG_RETURN_FLOAT8(0.0);
1113  }
1114 
1115  /*
1116  * First find the index range of the part of the smaller
1117  * histogram that overlaps the larger one.
1118  */
1119  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1120  {
1121  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1122  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1123  }
1124 
1125  /* Initialize counters / constants on s1 */
1126  for ( d = 0; d < ndims1; d++ )
1127  {
1128  at1[d] = ibox1.min[d];
1129  min1[d] = s1->extent.min[d];
1130  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1131  size1[d] = (int)roundf(s1->size[d]);
1132  cellsize1[d] = width1[d] / size1[d];
1133  }
1134 
1135  /* Initialize counters / constants on s2 */
1136  for ( d = 0; d < ndims2; d++ )
1137  {
1138  min2[d] = s2->extent.min[d];
1139  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1140  size2[d] = (int)roundf(s2->size[d]);
1141  cellsize2[d] = width2[d] / size2[d];
1142  }
1143 
1144  /* For each affected cell of s1... */
1145  do
1146  {
1147  double val1;
1148  /* Construct the bounds of this cell */
1149  ND_BOX nd_cell1;
1150  nd_box_init(&nd_cell1);
1151  for ( d = 0; d < ndims1; d++ )
1152  {
1153  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1154  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1155  }
1156 
1157  /* Find the cells of s2 that cell1 overlaps.. */
1158  nd_box_overlap(s2, &nd_cell1, &ibox2);
1159 
1160  /* Initialize counter */
1161  for ( d = 0; d < ndims2; d++ )
1162  {
1163  at2[d] = ibox2.min[d];
1164  }
1165 
1166  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1167 
1168  /* Get the value at this cell */
1169  val1 = s1->value[nd_stats_value_index(s1, at1)];
1170 
1171  /* For each overlapped cell of s2... */
1172  do
1173  {
1174  double ratio2;
1175  double val2;
1176 
1177  /* Construct the bounds of this cell */
1178  ND_BOX nd_cell2;
1179  nd_box_init(&nd_cell2);
1180  for ( d = 0; d < ndims2; d++ )
1181  {
1182  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1183  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1184  }
1185 
1186  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1187 
1188  /* Calculate overlap ratio of the cells */
1189  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1190 
1191  /* Multiply the cell counts, scaled by overlap ratio */
1192  val2 = s2->value[nd_stats_value_index(s2, at2)];
1193  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1194  val += val1 * (val2 * ratio2);
1195  }
1196  while ( nd_increment(&ibox2, ndims2, at2) );
1197 
1198  }
1199  while( nd_increment(&ibox1, ndims1, at1) );
1200 
1201  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1202 
1203  /*
1204  * In order to compare our total cell count "val" to the
1205  * ntuples_max, we need to scale val up to reflect a full
1206  * table estimate. So, multiply by ratio of table size to
1207  * sample size.
1208  */
1209  val *= (s1->table_features / s1->sample_features);
1210  val *= (s2->table_features / s2->sample_features);
1211 
1212  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1213 
1214  /*
1215  * Because the cell counts are over-determined due to
1216  * double counting of features that overlap multiple cells
1217  * (see the compute_gserialized_stats routine)
1218  * we also have to scale our cell count "val" *down*
1219  * to adjust for the double counting.
1220  */
1221 // val /= (s1->cells_covered / s1->histogram_features);
1222 // val /= (s2->cells_covered / s2->histogram_features);
1223 
1224  /*
1225  * Finally, the selectivity is the estimated number of
1226  * rows to be returned divided by the maximum possible
1227  * number of rows that can be returned.
1228  */
1229  selectivity = val / ntuples_max;
1230 
1231  /* Guard against over-estimates and crazy numbers :) */
1232  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1233  {
1234  selectivity = DEFAULT_ND_JOINSEL;
1235  }
1236  else if ( selectivity > 1.0 )
1237  {
1238  selectivity = 1.0;
1239  }
1240 
1241  return selectivity;
1242 }
1243 
1249 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1250 {
1251  PG_RETURN_DATUM(DirectFunctionCall5(
1253  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1254  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1255  Int32GetDatum(0) /* ND mode */
1256  ));
1257 }
1258 
1264 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1265 {
1266  PG_RETURN_DATUM(DirectFunctionCall5(
1268  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1269  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1270  Int32GetDatum(2) /* 2D mode */
1271  ));
1272 }
1273 
1274 double
1275 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1276 {
1277  float8 selectivity;
1278  Oid relid1, relid2;
1279  ND_STATS *stats1, *stats2;
1280  Node *arg1 = (Node*) linitial(args);
1281  Node *arg2 = (Node*) lsecond(args);
1282  Var *var1 = (Var*) arg1;
1283  Var *var2 = (Var*) arg2;
1284 
1285  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1286 
1287  /* We only do column joins right now, no functional joins */
1288  /* TODO: handle g1 && ST_Expand(g2) */
1289  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1290  {
1291  POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1292  return DEFAULT_ND_JOINSEL;
1293  }
1294 
1295  /* What are the Oids of our tables/relations? */
1296  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1297  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1298 
1299  /* Pull the stats from the stats system. */
1300  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1301  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1302 
1303  /* If we can't get stats, we have to stop here! */
1304  if (!stats1)
1305  {
1306  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1307  return DEFAULT_ND_JOINSEL;
1308  }
1309  else if (!stats2)
1310  {
1311  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1312  return DEFAULT_ND_JOINSEL;
1313  }
1314 
1315  selectivity = estimate_join_selectivity(stats1, stats2);
1316  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1317  pfree(stats1);
1318  pfree(stats2);
1319  return selectivity;
1320 }
1321 
1331 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1332 {
1333  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1334  /* Oid operator = PG_GETARG_OID(1); */
1335  List *args = (List *) PG_GETARG_POINTER(2);
1336  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1337  int mode = PG_GETARG_INT32(4);
1338 
1339  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1340 
1341  /* Check length of args and punt on > 2 */
1342  if (list_length(args) != 2)
1343  {
1344  POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1345  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1346  }
1347 
1348  /* Only respond to an inner join/unknown context join */
1349  if (jointype != JOIN_INNER)
1350  {
1351  POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1352  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1353  }
1354 
1355  PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1356 }
1357 
1376 static void
1377 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1378  int sample_rows, double total_rows, int mode)
1379 {
1380  MemoryContext old_context;
1381  int d, i; /* Counters */
1382  int notnull_cnt = 0; /* # not null rows in the sample */
1383  int null_cnt = 0; /* # null rows in the sample */
1384  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1385 
1386  ND_STATS *nd_stats; /* Our histogram */
1387  size_t nd_stats_size; /* Size to allocate */
1388 
1389  double total_width = 0; /* # of bytes used by sample */
1390  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1391 
1392  ND_BOX sum; /* Sum of extents of sample boxes */
1393  ND_BOX avg; /* Avg of extents of sample boxes */
1394  ND_BOX stddev; /* StdDev of extents of sample boxes */
1395 
1396  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1397  ND_BOX sample_extent; /* Extent of the raw sample */
1398  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1399  ND_BOX histo_extent; /* Spatial extent of the histogram */
1400  ND_BOX histo_extent_new; /* Temporary variable */
1401  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1402  int histo_cells; /* Number of cells in the histogram */
1403  int histo_cells_new = 1; /* Temporary variable */
1404 
1405  int ndims = 2; /* Dimensionality of the sample */
1406  int histo_ndims = 0; /* Dimensionality of the histogram */
1407  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1408  double total_distribution; /* Total of sample_distribution */
1409 
1410  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1411  int stats_kind; /* And this is what? (2D vs ND) */
1412 
1413  /* Initialize sum and stddev */
1414  nd_box_init(&sum);
1415  nd_box_init(&stddev);
1416  nd_box_init(&avg);
1417  nd_box_init(&histo_extent);
1418  nd_box_init(&histo_extent_new);
1419 
1420  /*
1421  * This is where gserialized_analyze_nd
1422  * should put its' custom parameters.
1423  */
1424  /* void *mystats = stats->extra_data; */
1425 
1426  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1427  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1428  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1429 
1430  /*
1431  * We might need less space, but don't think
1432  * its worth saving...
1433  */
1434  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1435 
1436  /*
1437  * First scan:
1438  * o read boxes
1439  * o find dimensionality of the sample
1440  * o find extent of the sample
1441  * o count null-infinite/not-null values
1442  * o compute total_width
1443  * o compute total features's box area (for avgFeatureArea)
1444  * o sum features box coordinates (for standard deviation)
1445  */
1446  for ( i = 0; i < sample_rows; i++ )
1447  {
1448  Datum datum;
1449  GBOX gbox = {0};
1450  ND_BOX *nd_box;
1451  bool is_null;
1452 
1453  datum = fetchfunc(stats, i, &is_null);
1454 
1455  /* Skip all NULLs. */
1456  if ( is_null )
1457  {
1458  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1459  null_cnt++;
1460  continue;
1461  }
1462 
1463  /* Read the bounds from the gserialized. */
1464  if (LW_FAILURE == gserialized_datum_get_gbox_p(datum, &gbox))
1465  {
1466  /* Skip empties too. */
1467  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1468  continue;
1469  }
1470 
1471  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1472  if ( mode == 2 )
1473  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1474 
1475  /* Check bounds for validity (finite and not NaN) */
1476  if ( ! gbox_is_valid(&gbox) )
1477  {
1478  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1479  continue;
1480  }
1481 
1482  /*
1483  * In N-D mode, set the ndims to the maximum dimensionality found
1484  * in the sample. Otherwise, leave at ndims == 2.
1485  */
1486  if ( mode != 2 )
1487  ndims = Max(gbox_ndims(&gbox), ndims);
1488 
1489  /* Convert gbox to n-d box */
1490  nd_box = palloc(sizeof(ND_BOX));
1491  nd_box_from_gbox(&gbox, nd_box);
1492 
1493  /* Cache n-d bounding box */
1494  sample_boxes[notnull_cnt] = nd_box;
1495 
1496  /* Initialize sample extent before merging first entry */
1497  if ( ! notnull_cnt )
1498  nd_box_init_bounds(&sample_extent);
1499 
1500  /* Add current sample to overall sample extent */
1501  nd_box_merge(nd_box, &sample_extent);
1502 
1503  /* How many bytes does this sample use? */
1504  total_width += toast_raw_datum_size(datum);
1505 
1506  /* Add bounds coordinates to sums for stddev calculation */
1507  for ( d = 0; d < ndims; d++ )
1508  {
1509  sum.min[d] += nd_box->min[d];
1510  sum.max[d] += nd_box->max[d];
1511  }
1512 
1513  /* Increment our "good feature" count */
1514  notnull_cnt++;
1515 
1516  /* Give backend a chance of interrupting us */
1517  vacuum_delay_point();
1518  }
1519 
1520  /*
1521  * We'll build a histogram having stats->attr->attstattarget cells
1522  * on each side, within reason... we'll use ndims*10000 as the
1523  * maximum number of cells.
1524  * Also, if we're sampling a relatively small table, we'll try to ensure that
1525  * we have an average of 5 features for each cell so the histogram isn't
1526  * so sparse.
1527  */
1528 #if POSTGIS_PGSQL_VERSION >= 170
1529  histo_cells_target = (int)pow((double)(stats->attstattarget), (double)ndims);
1530 #else
1531  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1532 #endif
1533  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1534  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1535  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1536  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1537 
1538  /* If there's no useful features, we can't work out stats */
1539  if ( ! notnull_cnt )
1540  {
1541  stats->stats_valid = false;
1542  return;
1543  }
1544 
1545  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1546 
1547  /*
1548  * Second scan:
1549  * o compute standard deviation
1550  */
1551  for ( d = 0; d < ndims; d++ )
1552  {
1553  /* Calculate average bounds values */
1554  avg.min[d] = sum.min[d] / notnull_cnt;
1555  avg.max[d] = sum.max[d] / notnull_cnt;
1556 
1557  /* Calculate standard deviation for this dimension bounds */
1558  for ( i = 0; i < notnull_cnt; i++ )
1559  {
1560  const ND_BOX *ndb = sample_boxes[i];
1561  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1562  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1563  }
1564  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1565  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1566 
1567  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1568  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1569  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1570  }
1571 
1572  /*
1573  * Third scan:
1574  * o skip hard deviants
1575  * o compute new histogram box
1576  */
1577  nd_box_init_bounds(&histo_extent_new);
1578  for ( i = 0; i < notnull_cnt; i++ )
1579  {
1580  const ND_BOX *ndb = sample_boxes[i];
1581  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1582  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1583  {
1584  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1585  sample_boxes[i] = NULL;
1586  continue;
1587  }
1588  /* Expand our new box to fit all the other features. */
1589  nd_box_merge(ndb, &histo_extent_new);
1590  }
1591  /*
1592  * Expand the box slightly (1%) to avoid edge effects
1593  * with objects that are on the boundary
1594  */
1595  nd_box_expand(&histo_extent_new, 0.01);
1596  histo_extent = histo_extent_new;
1597 
1598  /*
1599  * How should we allocate our histogram cells to the
1600  * different dimensions? We can't do it by raw dimensional width,
1601  * because in x/y/z space, the z can have different units
1602  * from the x/y. Similarly for x/y/t space.
1603  * So, we instead calculate how much features overlap
1604  * each other in their dimension to figure out which
1605  * dimensions have useful selectivity characteristics (more
1606  * variability in density) and therefor would find
1607  * more cells useful (to distinguish between dense places and
1608  * homogeneous places).
1609  */
1610  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1611  sample_distribution);
1612 
1613  /*
1614  * The sample_distribution array now tells us how spread out the
1615  * data is in each dimension, so we use that data to allocate
1616  * the histogram cells we have available.
1617  * At this point, histo_cells_target is the approximate target number
1618  * of cells.
1619  */
1620 
1621  /*
1622  * Some dimensions have basically a uniform distribution, we want
1623  * to allocate no cells to those dimensions, only to dimensions
1624  * that have some interesting differences in data distribution.
1625  * Here we count up the number of interesting dimensions
1626  */
1627  for ( d = 0; d < ndims; d++ )
1628  {
1629  if ( sample_distribution[d] > 0 )
1630  histo_ndims++;
1631  }
1632 
1633  if ( histo_ndims == 0 )
1634  {
1635  /* Special case: all our dimensions had low variability! */
1636  /* We just divide the cells up evenly */
1637  POSTGIS_DEBUG(3, " special case: no axes have variability");
1638  histo_cells_new = 1;
1639  for ( d = 0; d < ndims; d++ )
1640  {
1641  histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1642  if ( ! histo_size[d] )
1643  histo_size[d] = 1;
1644  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1645  histo_cells_new *= histo_size[d];
1646  }
1647  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1648  }
1649  else
1650  {
1651  /*
1652  * We're going to express the amount of variability in each dimension
1653  * as a proportion of the total variability and allocate cells in that
1654  * dimension relative to that proportion.
1655  */
1656  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1657  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1658  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1659  histo_cells_new = 1; /* For the number of cells in the final histogram */
1660  for ( d = 0; d < ndims; d++ )
1661  {
1662  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1663  {
1664  histo_size[d] = 1;
1665  }
1666  else /* Interesting dimension */
1667  {
1668  /* How does this dims variability compare to the total? */
1669  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1670  /*
1671  * Scale the target cells number by the # of dims and ratio,
1672  * then take the appropriate root to get the estimated number of cells
1673  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1674  */
1675  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1676  /* If something goes awry, just give this dim one slot */
1677  if ( ! histo_size[d] )
1678  histo_size[d] = 1;
1679  }
1680  histo_cells_new *= histo_size[d];
1681  }
1682  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1683  }
1684 
1685  /* Update histo_cells to the actual number of cells we need to allocate */
1686  histo_cells = histo_cells_new;
1687  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1688 
1689  /*
1690  * Create the histogram (ND_STATS) in the stats memory context
1691  */
1692  old_context = MemoryContextSwitchTo(stats->anl_context);
1693  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1694  nd_stats = palloc(nd_stats_size);
1695  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1696  MemoryContextSwitchTo(old_context);
1697 
1698  /* Initialize the #ND_STATS objects */
1699  nd_stats->ndims = ndims;
1700  nd_stats->extent = histo_extent;
1701  nd_stats->sample_features = sample_rows;
1702  nd_stats->table_features = total_rows;
1703  nd_stats->not_null_features = notnull_cnt;
1704  /* Copy in the histogram dimensions */
1705  for ( d = 0; d < ndims; d++ )
1706  nd_stats->size[d] = histo_size[d];
1707 
1708  /*
1709  * Fourth scan:
1710  * o fill histogram values with the proportion of
1711  * features' bbox overlaps: a feature's bvol
1712  * can fully overlap (1) or partially overlap
1713  * (fraction of 1) an histogram cell.
1714  *
1715  * Note that we are filling each cell with the "portion of
1716  * the feature's box that overlaps the cell". So, if we sum
1717  * up the values in the histogram, we could get the
1718  * histogram feature count.
1719  *
1720  */
1721  for ( i = 0; i < notnull_cnt; i++ )
1722  {
1723  const ND_BOX *nd_box;
1724  ND_IBOX nd_ibox;
1725  int at[ND_DIMS];
1726  double num_cells = 0;
1727  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1728  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1729  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1730 
1731  nd_box = sample_boxes[i];
1732  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1733 
1734  /* Give backend a chance of interrupting us */
1735  vacuum_delay_point();
1736 
1737  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1738  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1739  memset(at, 0, sizeof(int)*ND_DIMS);
1740 
1741  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1742  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1743  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1744 
1745  for ( d = 0; d < nd_stats->ndims; d++ )
1746  {
1747  /* Initialize the starting values */
1748  at[d] = nd_ibox.min[d];
1749  min[d] = nd_stats->extent.min[d];
1750  max[d] = nd_stats->extent.max[d];
1751  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1752  }
1753 
1754  /*
1755  * Move through all the overlaped histogram cells values and
1756  * add the box overlap proportion to them.
1757  */
1758  do
1759  {
1760  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1761  double ratio;
1762  /* Create a box for this histogram cell */
1763  for ( d = 0; d < nd_stats->ndims; d++ )
1764  {
1765  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1766  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1767  }
1768 
1769  /*
1770  * If a feature box is completely inside one cell the ratio will be
1771  * 1.0. If a feature box is 50% in two cells, each cell will get
1772  * 0.5 added on.
1773  */
1774  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1775  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1776  num_cells += ratio;
1777  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1778  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1779  }
1780  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1781 
1782  /* Keep track of overall number of overlaps counted */
1783  total_cell_count += num_cells;
1784  /* How many features have we added to this histogram? */
1785  histogram_features++;
1786  }
1787 
1788  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1789  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1790  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1791 
1792  /* Error out if we got no sample information */
1793  if ( ! histogram_features )
1794  {
1795  POSTGIS_DEBUG(3, " no stats have been gathered");
1796  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1797  stats->stats_valid = false;
1798  return;
1799  }
1800 
1801  nd_stats->histogram_features = histogram_features;
1802  nd_stats->histogram_cells = histo_cells;
1803  nd_stats->cells_covered = total_cell_count;
1804 
1805  /* Put this histogram data into the right slot/kind */
1806  if ( mode == 2 )
1807  {
1808  stats_slot = STATISTIC_SLOT_2D;
1809  stats_kind = STATISTIC_KIND_2D;
1810  }
1811  else
1812  {
1813  stats_slot = STATISTIC_SLOT_ND;
1814  stats_kind = STATISTIC_KIND_ND;
1815  }
1816 
1817  /* Write the statistics data */
1818  stats->stakind[stats_slot] = stats_kind;
1819  stats->staop[stats_slot] = InvalidOid;
1820  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1821  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1822  stats->stanullfrac = (float4)null_cnt/sample_rows;
1823  stats->stawidth = total_width/notnull_cnt;
1824  stats->stadistinct = -1.0;
1825  stats->stats_valid = true;
1826 
1827  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1828  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1829  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1830  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1831  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1832  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1833  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1834  /*
1835  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1836  */
1837 
1838  return;
1839 }
1840 
1841 
1859 static void
1860 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1861  int sample_rows, double total_rows)
1862 {
1863  GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
1864  /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
1865  stats->extra_data = extra_data->std_extra_data;
1866  extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
1867  stats->extra_data = extra_data;
1868 
1869  /* 2D Mode */
1870  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1871 
1872  if (stats->stats_valid)
1873  {
1874  /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1875  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1876  }
1877 }
1878 
1879 
1907 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1908 {
1909  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1910  GserializedAnalyzeExtraData *extra_data =
1912 
1913  /* Ask for standard analyze to fill in as much as possible */
1914  if (!std_typanalyze(stats))
1915  PG_RETURN_BOOL(false);
1916 
1917  /* Save old compute_stats and extra_data for scalar statistics ... */
1918  extra_data->std_compute_stats = stats->compute_stats;
1919  extra_data->std_extra_data = stats->extra_data;
1920  /* ... and replace with our info */
1921  stats->compute_stats = compute_gserialized_stats;
1922  stats->extra_data = extra_data;
1923 
1924  /* Indicate we are done successfully */
1925  PG_RETURN_BOOL(true);
1926 }
1927 
1940 static float8
1941 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1942 {
1943  int d; /* counter */
1944  float8 selectivity;
1945  ND_BOX nd_box;
1946  ND_IBOX nd_ibox;
1947  int at[ND_DIMS];
1948  double cell_size[ND_DIMS];
1949  double min[ND_DIMS];
1950  double max[ND_DIMS];
1951  double total_count = 0.0;
1952  int ndims_max;
1953 
1954  /* Calculate the overlap of the box on the histogram */
1955  if ( ! nd_stats )
1956  {
1957  elog(NOTICE, " estimate_selectivity called with null input");
1958  return FALLBACK_ND_SEL;
1959  }
1960 
1961  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1962 
1963  /* Initialize nd_box. */
1964  nd_box_from_gbox(box, &nd_box);
1965 
1966  /*
1967  * To return 2D stats on an ND sample, we need to make the
1968  * 2D box cover the full range of the other dimensions in the
1969  * histogram.
1970  */
1971  POSTGIS_DEBUGF(3, " mode: %d", mode);
1972  if ( mode == 2 )
1973  {
1974  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1975  ndims_max = 2;
1976  }
1977 
1978  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1979  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1980 
1981  /*
1982  * Search box completely misses histogram extent?
1983  * We have to intersect in all N dimensions or else we have
1984  * zero interaction under the &&& operator. It's important
1985  * to short circuit in this case, as some of the tests below
1986  * will return junk results when run on non-intersecting inputs.
1987  */
1988  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1989  {
1990  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1991  return 0.0;
1992  }
1993 
1994  /* Search box completely contains histogram extent! */
1995  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1996  {
1997  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1998  return 1.0;
1999  }
2000 
2001  /* Calculate the overlap of the box on the histogram */
2002  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2003  {
2004  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2005  return FALLBACK_ND_SEL;
2006  }
2007 
2008  /* Work out some measurements of the histogram */
2009  for ( d = 0; d < nd_stats->ndims; d++ )
2010  {
2011  /* Cell size in each dim */
2012  min[d] = nd_stats->extent.min[d];
2013  max[d] = nd_stats->extent.max[d];
2014  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2015  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2016 
2017  /* Initialize the counter */
2018  at[d] = nd_ibox.min[d];
2019  }
2020 
2021  /* Move through all the overlap values and sum them */
2022  do
2023  {
2024  float cell_count, ratio;
2025  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2026 
2027  /* We have to pro-rate partially overlapped cells. */
2028  for ( d = 0; d < nd_stats->ndims; d++ )
2029  {
2030  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2031  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2032  }
2033 
2034  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2035  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2036 
2037  /* Add the pro-rated count for this cell to the overall total */
2038  total_count += cell_count * ratio;
2039  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2040  }
2041  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2042 
2043  /* Scale by the number of features in our histogram to get the proportion */
2044  selectivity = total_count / nd_stats->histogram_features;
2045 
2046  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2047  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2048  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2049  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2050 
2051  /* Prevent rounding overflows */
2052  if (selectivity > 1.0) selectivity = 1.0;
2053  else if (selectivity < 0.0) selectivity = 0.0;
2054 
2055  return selectivity;
2056 }
2057 
2058 
2059 
2065 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2066 {
2067  Oid table_oid = PG_GETARG_OID(0);
2068  text *att_text = PG_GETARG_TEXT_P(1);
2069  ND_STATS *nd_stats;
2070  char *str;
2071  text *json;
2072  int mode = 2; /* default to 2D mode */
2073  bool only_parent = false; /* default to whole tree stats */
2074 
2075  /* Check if we've been asked to not use 2d mode */
2076  if ( ! PG_ARGISNULL(2) )
2077  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2078 
2079  /* Retrieve the stats object */
2080  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2081  if ( ! nd_stats )
2082  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2083 
2084  /* Convert to JSON */
2085  str = nd_stats_to_json(nd_stats);
2086  json = cstring_to_text(str);
2087  pfree(str);
2088  pfree(nd_stats);
2089  PG_RETURN_TEXT_P(json);
2090 }
2091 
2092 
2098 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2099 {
2100  Oid table_oid = PG_GETARG_OID(0);
2101  text *att_text = PG_GETARG_TEXT_P(1);
2102  Datum geom_datum = PG_GETARG_DATUM(2);
2103  GBOX gbox; /* search box read from gserialized datum */
2104  float8 selectivity = 0;
2105  ND_STATS *nd_stats;
2106  int mode = 2; /* 2D mode by default */
2107 
2108  /* Check if we've been asked to not use 2d mode */
2109  if ( ! PG_ARGISNULL(3) )
2110  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2111 
2112  /* Retrieve the stats object */
2113  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2114 
2115  if ( ! nd_stats )
2116  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2117 
2118  /* Calculate the gbox */
2119  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2120  elog(ERROR, "unable to calculate bounding box from geometry");
2121 
2122  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2123 
2124  /* Do the estimation */
2125  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2126 
2127  pfree(nd_stats);
2128  PG_RETURN_FLOAT8(selectivity);
2129 }
2130 
2131 
2137 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2138 {
2139  Oid table_oid1 = PG_GETARG_OID(0);
2140  text *att_text1 = PG_GETARG_TEXT_P(1);
2141  Oid table_oid2 = PG_GETARG_OID(2);
2142  text *att_text2 = PG_GETARG_TEXT_P(3);
2143  ND_STATS *nd_stats1, *nd_stats2;
2144  float8 selectivity = 0;
2145  int mode = 2; /* 2D mode by default */
2146 
2147 
2148  /* Retrieve the stats object */
2149  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2150  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2151 
2152  if ( ! nd_stats1 )
2153  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2154 
2155  if ( ! nd_stats2 )
2156  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2157 
2158  /* Check if we've been asked to not use 2d mode */
2159  if ( ! PG_ARGISNULL(4) )
2160  {
2161  text *modetxt = PG_GETARG_TEXT_P(4);
2162  char *modestr = text_to_cstring(modetxt);
2163  if ( modestr[0] == 'N' )
2164  mode = 0;
2165  }
2166 
2167  /* Do the estimation */
2168  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2169 
2170  pfree(nd_stats1);
2171  pfree(nd_stats2);
2172  PG_RETURN_FLOAT8(selectivity);
2173 }
2174 
2180 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2181 {
2182  PG_RETURN_DATUM(DirectFunctionCall5(
2184  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2185  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2186  Int32GetDatum(2) /* 2-D mode */
2187  ));
2188 }
2189 
2195 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2196 {
2197  PG_RETURN_DATUM(DirectFunctionCall5(
2199  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2200  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2201  Int32GetDatum(0) /* N-D mode */
2202  ));
2203 }
2204 
2205 
2220 float8
2221 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2222 {
2223  VariableStatData vardata;
2224  Node *other = NULL;
2225  bool varonleft;
2226  ND_STATS *nd_stats = NULL;
2227 
2228  GBOX search_box;
2229  float8 selectivity = 0;
2230  Const *otherConst;
2231 
2232  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2233 
2234  if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2235  {
2236  POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2237  return DEFAULT_ND_SEL;
2238  }
2239 
2240  if (!IsA(other, Const))
2241  {
2242  ReleaseVariableStats(vardata);
2243  POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2244  return DEFAULT_ND_SEL;
2245  }
2246 
2247  otherConst = (Const*)other;
2248  if ((!otherConst) || otherConst->constisnull)
2249  {
2250  ReleaseVariableStats(vardata);
2251  POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2252  return DEFAULT_ND_SEL;
2253  }
2254 
2255  if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2256  {
2257  ReleaseVariableStats(vardata);
2258  POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2259  return 0.0;
2260  }
2261 
2262  if (!vardata.statsTuple)
2263  {
2264  POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2265  return DEFAULT_ND_SEL;
2266  }
2267 
2268  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2269  ReleaseVariableStats(vardata);
2270  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2271  if (nd_stats) pfree(nd_stats);
2272  return selectivity;
2273 }
2274 
2276 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2277 {
2278  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2279  // Oid operator_oid = PG_GETARG_OID(1);
2280  List *args = (List *) PG_GETARG_POINTER(2);
2281  int varRelid = PG_GETARG_INT32(3);
2282  int mode = PG_GETARG_INT32(4);
2283  float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2284  POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2285  PG_RETURN_FLOAT8(selectivity);
2286 }
2287 
2288 
2289 
2296 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2297 {
2298  char *nsp = NULL;
2299  char *tbl = NULL;
2300  text *col = NULL;
2301  char *nsp_tbl = NULL;
2302  Oid tbl_oid, idx_oid = 0;
2303  ND_STATS *nd_stats;
2304  GBOX *gbox = NULL;
2305  bool only_parent = false;
2306  int key_type, att_num;
2307  size_t sz;
2308 
2309  /* We need to initialize the internal cache to access it later via postgis_oid() */
2310  postgis_initialize_cache();
2311 
2312  if ( PG_NARGS() == 4 )
2313  {
2314  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2315  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2316  col = PG_GETARG_TEXT_P(2);
2317  only_parent = PG_GETARG_BOOL(3);
2318  sz = strlen(nsp) + strlen(tbl) + 6;
2319  nsp_tbl = palloc(sz);
2320  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2321  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2322  pfree(nsp_tbl);
2323  }
2324  else if ( PG_NARGS() == 3 )
2325  {
2326  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2327  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2328  col = PG_GETARG_TEXT_P(2);
2329  sz = strlen(nsp) + strlen(tbl) + 6;
2330  nsp_tbl = palloc(sz);
2331  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2332  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2333  pfree(nsp_tbl);
2334  }
2335  else if ( PG_NARGS() == 2 )
2336  {
2337  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2338  col = PG_GETARG_TEXT_P(1);
2339  sz = strlen(tbl) + 3;
2340  nsp_tbl = palloc(sz);
2341  snprintf(nsp_tbl, sz, "\"%s\"", tbl);
2342  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2343  pfree(nsp_tbl);
2344  }
2345  else
2346  {
2347  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2348  PG_RETURN_NULL();
2349  }
2350 
2351  /* Read the extent from the head of the spatial index, if there is one */
2352 
2353  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2354  if (idx_oid)
2355  {
2356  /* TODO: how about only_parent ? */
2357  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2358  POSTGIS_DEBUGF(2, "index for \"%s.%s\" exists, reading gbox from there", tbl, text_to_cstring(col));
2359  if ( ! gbox ) PG_RETURN_NULL();
2360  }
2361  else
2362  {
2363  POSTGIS_DEBUGF(2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2364 
2365  /* Fall back to reading the stats, if no index is found */
2366 
2367  /* Estimated extent only returns 2D bounds, so use mode 2 */
2368  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2369 
2370  /* Error out on no stats */
2371  if ( ! nd_stats ) {
2372  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2373  PG_RETURN_NULL();
2374  }
2375 
2376  /* Construct the box */
2377  gbox = palloc(sizeof(GBOX));
2378  FLAGS_SET_GEODETIC(gbox->flags, 0);
2379  FLAGS_SET_Z(gbox->flags, 0);
2380  FLAGS_SET_M(gbox->flags, 0);
2381  gbox->xmin = nd_stats->extent.min[0];
2382  gbox->xmax = nd_stats->extent.max[0];
2383  gbox->ymin = nd_stats->extent.min[1];
2384  gbox->ymax = nd_stats->extent.max[1];
2385  pfree(nd_stats);
2386  }
2387 
2388  PG_RETURN_POINTER(gbox);
2389 }
2390 
2398 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2399 {
2400  if ( PG_NARGS() == 3 )
2401  {
2402  PG_RETURN_DATUM(
2403  DirectFunctionCall3(gserialized_estimated_extent,
2404  PG_GETARG_DATUM(0),
2405  PG_GETARG_DATUM(1),
2406  PG_GETARG_DATUM(2)));
2407  }
2408  else if ( PG_NARGS() == 2 )
2409  {
2410  PG_RETURN_DATUM(
2411  DirectFunctionCall2(gserialized_estimated_extent,
2412  PG_GETARG_DATUM(0),
2413  PG_GETARG_DATUM(1)));
2414  }
2415 
2416  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2417  PG_RETURN_NULL();
2418 }
2419 
2420 /************************************************************************/
2421 
2422 static Oid
2423 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
2424 {
2425  Relation tbl_rel;
2426  ListCell *lc;
2427  List *idx_list;
2428  Oid result = InvalidOid;
2429  char *colname = text_to_cstring(col);
2430 
2431  /* Lookup our spatial index key types */
2432  Oid b2d_oid = postgis_oid(BOX2DFOID);
2433  Oid gdx_oid = postgis_oid(BOX3DOID);
2434 
2435  if (!(b2d_oid && gdx_oid))
2436  return InvalidOid;
2437 
2438  tbl_rel = RelationIdGetRelation(tbl_oid);
2439  idx_list = RelationGetIndexList(tbl_rel);
2440  RelationClose(tbl_rel);
2441 
2442  /* For each index associated with this table... */
2443  foreach(lc, idx_list)
2444  {
2445  Form_pg_class idx_form;
2446  HeapTuple idx_tup;
2447  int idx_relam;
2448  Oid idx_oid = lfirst_oid(lc);
2449 
2450  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2451  if (!HeapTupleIsValid(idx_tup))
2452  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2453  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2454  idx_relam = idx_form->relam;
2455  ReleaseSysCache(idx_tup);
2456 
2457  /* Does the index use a GIST access method? */
2458  if (idx_relam == GIST_AM_OID)
2459  {
2460  Form_pg_attribute att;
2461  Oid atttypid;
2462  int attnum;
2463  /* Is the index on the column name we are looking for? */
2464  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2465  ObjectIdGetDatum(idx_oid),
2466  PointerGetDatum(colname));
2467  if (!HeapTupleIsValid(att_tup))
2468  continue;
2469 
2470  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2471  atttypid = att->atttypid;
2472  attnum = att->attnum;
2473  ReleaseSysCache(att_tup);
2474 
2475  /* Is the column actually spatial? */
2476  if (b2d_oid == atttypid || gdx_oid == atttypid)
2477  {
2478  /* Save result, clean up, and break out */
2479  result = idx_oid;
2480  if (att_num)
2481  *att_num = attnum;
2482  if (key_type)
2483  *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
2484  break;
2485  }
2486  }
2487  }
2488  return result;
2489 }
2490 
2491 static GBOX *
2492 spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
2493 {
2494  BOX2DF *bounds_2df = NULL;
2495  GIDX *bounds_gidx = NULL;
2496  GBOX *gbox = NULL;
2497  Relation idx_rel;
2498  Buffer buffer;
2499  Page page;
2500  OffsetNumber offset;
2501  unsigned long offset_max;
2502 
2503  if (!idx_oid)
2504  return NULL;
2505 
2506  idx_rel = index_open(idx_oid, AccessShareLock);
2507  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2508  page = (Page) BufferGetPage(buffer);
2509  offset = FirstOffsetNumber;
2510  offset_max = PageGetMaxOffsetNumber(page);
2511  while (offset <= offset_max)
2512  {
2513  ItemId iid = PageGetItemId(page, offset);
2514  IndexTuple ituple;
2515  if (!iid)
2516  {
2517  ReleaseBuffer(buffer);
2518  index_close(idx_rel, AccessShareLock);
2519  return NULL;
2520  }
2521  ituple = (IndexTuple) PageGetItem(page, iid);
2522  if (!GistTupleIsInvalid(ituple))
2523  {
2524  bool isnull;
2525  Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2526  if (!isnull)
2527  {
2528  if (key_type == STATISTIC_KIND_2D)
2529  {
2530  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2531  if (bounds_2df)
2532  box2df_merge(bounds_2df, b);
2533  else
2534  bounds_2df = box2df_copy(b);
2535  }
2536  else
2537  {
2538  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2539  if (bounds_gidx)
2540  gidx_merge(&bounds_gidx, b);
2541  else
2542  bounds_gidx = gidx_copy(b);
2543  }
2544  }
2545  }
2546  offset++;
2547  }
2548 
2549  ReleaseBuffer(buffer);
2550  index_close(idx_rel, AccessShareLock);
2551 
2552  if (key_type == STATISTIC_KIND_2D && bounds_2df)
2553  {
2554  if (box2df_is_empty(bounds_2df))
2555  return NULL;
2556  gbox = gbox_new(0);
2557  box2df_to_gbox_p(bounds_2df, gbox);
2558  }
2559  else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
2560  {
2561  if (gidx_is_unknown(bounds_gidx))
2562  return NULL;
2563  gbox = gbox_new(0);
2564  gbox_from_gidx(bounds_gidx, gbox, 0);
2565  }
2566  else
2567  return NULL;
2568 
2569  return gbox;
2570 }
2571 
2572 /*
2573 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2574  RETURNS box2d
2575  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2576  LANGUAGE 'c' STABLE STRICT;
2577 */
2578 
2580 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2581 {
2582  GBOX *gbox = NULL;
2583  int key_type;
2584  int att_num;
2585  Oid tbl_oid = PG_GETARG_DATUM(0);
2586  text *col = PG_GETARG_TEXT_P(1);
2587  Oid idx_oid;
2588 
2589  if(!tbl_oid)
2590  PG_RETURN_NULL();
2591 
2592  /* We need to initialize the internal cache to access it later via postgis_oid() */
2593  postgis_initialize_cache();
2594 
2595  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2596  if (!idx_oid)
2597  PG_RETURN_NULL();
2598 
2599  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2600  if (!gbox)
2601  PG_RETURN_NULL();
2602  else
2603  PG_RETURN_POINTER(gbox);
2604 }
2605 
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition: cu_print.c:267
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: gbox.c:32
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: gbox.c:392
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:111
#define FLAGS_GET_Z(flags)
Definition: liblwgeom.h:180
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:181
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:190
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:188
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:187
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:183
This library is the generic geometry handling section of PostGIS.
#define str(s)
args
Definition: ovdump.py:45
Datum buffer(PG_FUNCTION_ARGS)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:230
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:76
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:124
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:103
double ymax
Definition: liblwgeom.h:372
double zmax
Definition: liblwgeom.h:374
double xmax
Definition: liblwgeom.h:370
double zmin
Definition: liblwgeom.h:373
double mmax
Definition: liblwgeom.h:376
double ymin
Definition: liblwgeom.h:371
double xmin
Definition: liblwgeom.h:369
double mmin
Definition: liblwgeom.h:375
lwflags_t flags
Definition: liblwgeom.h:368
AnalyzeAttrComputeStatsFunc std_compute_stats
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.