PostGIS  3.4.0dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #if PG_VERSION_NUM < 130000
70 #include "access/tuptoaster.h" /* For toast_raw_datum_size */
71 #else
72 #include "access/detoast.h" /* For toast_raw_datum_size */
73 #endif
74 #include "utils/datum.h"
75 #include "access/heapam.h"
76 #include "catalog/index.h"
77 #include "catalog/pg_am.h"
78 #include "miscadmin.h"
79 #include "storage/lmgr.h"
80 #include "catalog/namespace.h"
81 #include "catalog/indexing.h"
82 #if PG_VERSION_NUM >= 100000
83 #include "utils/regproc.h"
84 #include "utils/varlena.h"
85 #endif
86 #include "utils/builtins.h"
87 #include "utils/datum.h"
88 #include "utils/snapmgr.h"
89 #include "utils/fmgroids.h"
90 #include "funcapi.h"
91 #include "access/heapam.h"
92 #include "catalog/pg_type.h"
93 #include "access/relscan.h"
94 
95 #include "executor/spi.h"
96 #include "fmgr.h"
97 #include "commands/vacuum.h"
98 #if PG_VERSION_NUM < 120000
99 #include "nodes/relation.h"
100 #else
101 #include "nodes/pathnodes.h"
102 #endif
103 #include "parser/parsetree.h"
104 #include "utils/array.h"
105 #include "utils/lsyscache.h"
106 #include "utils/builtins.h"
107 #include "utils/syscache.h"
108 #include "utils/rel.h"
109 #include "utils/selfuncs.h"
110 
111 #include "../postgis_config.h"
112 
113 #include "access/htup_details.h"
114 
115 #include "stringbuffer.h"
116 #include "liblwgeom.h"
117 #include "lwgeom_pg.h" /* For debugging macros. */
118 #include "gserialized_gist.h" /* For index common functions */
119 
120 #include <math.h>
121 #if HAVE_IEEEFP_H
122 #include <ieeefp.h>
123 #endif
124 #include <float.h>
125 #include <string.h>
126 #include <stdio.h>
127 #include <ctype.h>
128 
129 
130 /************************************************************************/
131 
132 
133 /* Prototypes */
134 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
135 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
136 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
137 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
138 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
139 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
140 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
141 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
142 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
143 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
144 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
145 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
146 
147 /* Local prototypes */
148 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num);
149 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num);
150 
151 
152 /* Other prototypes */
153 float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
154 float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
155 
156 
157 /* Old Prototype */
158 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
159 
160 /*
161  * Assign a number to the n-dimensional statistics kind
162  *
163  * tgl suggested:
164  *
165  * 1-100: reserved for assignment by the core Postgres project
166  * 100-199: reserved for assignment by PostGIS
167  * 200-9999: reserved for other globally-known stats kinds
168  * 10000-32767: reserved for private site-local use
169  */
170 #define STATISTIC_KIND_ND 102
171 #define STATISTIC_KIND_2D 103
172 
173 /*
174  * Postgres does not pin its slots and uses them as they come.
175  * We need to preserve its Correlation for brin to work
176  * 0 may be MCV
177  * 1 may be Histogram
178  * 2 may be Correlation
179  * We take 3 and 4.
180  */
181 #define STATISTIC_SLOT_ND 3
182 #define STATISTIC_SLOT_2D 4
183 
184 /*
185 * The SD factor restricts the side of the statistics histogram
186 * based on the standard deviation of the extent of the data.
187 * SDFACTOR is the number of standard deviations from the mean
188 * the histogram will extend.
189 */
190 #define SDFACTOR 3.25
191 
197 #define ND_DIMS 4
198 
205 #define MIN_DIMENSION_WIDTH 0.000000001
206 
211 #define MAX_DIMENSION_WIDTH 1.0E+20
212 
216 #define DEFAULT_ND_SEL 0.0001
217 #define DEFAULT_ND_JOINSEL 0.001
218 
222 #define FALLBACK_ND_SEL 0.2
223 #define FALLBACK_ND_JOINSEL 0.3
224 
230 typedef struct ND_BOX_T
231 {
232  float4 min[ND_DIMS];
233  float4 max[ND_DIMS];
235 
239 typedef struct ND_IBOX_T
240 {
241  int min[ND_DIMS];
242  int max[ND_DIMS];
244 
245 
252 typedef struct ND_STATS_T
253 {
254  /* Dimensionality of the histogram. */
255  float4 ndims;
256 
257  /* Size of n-d histogram in each dimension. */
258  float4 size[ND_DIMS];
259 
260  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
262 
263  /* How many rows in the table itself? */
265 
266  /* How many rows were in the sample that built this histogram? */
268 
269  /* How many not-Null/Empty features were in the sample? */
271 
272  /* How many features actually got sampled in the histogram? */
274 
275  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
277 
278  /* How many cells did those histogram features cover? */
279  /* Since we are pro-rating coverage, this number should */
280  /* now always equal histogram_features */
282 
283  /* Variable length # of floats for histogram */
284  float4 value[1];
286 
287 typedef struct {
288  /* Saved state from std_typanalyze() */
289  AnalyzeAttrComputeStatsFunc std_compute_stats;
292 
299 static int
300 gbox_ndims(const GBOX* gbox)
301 {
302  int dims = 2;
303  if ( FLAGS_GET_GEODETIC(gbox->flags) )
304  return 3;
305  if ( FLAGS_GET_Z(gbox->flags) )
306  dims++;
307  if ( FLAGS_GET_M(gbox->flags) )
308  dims++;
309  return dims;
310 }
311 
317 static int
318 text_p_get_mode(const text *txt)
319 {
320  int mode = 2;
321  char *modestr;
322  if (VARSIZE_ANY_EXHDR(txt) <= 0)
323  return mode;
324  modestr = (char*)VARDATA(txt);
325  if ( modestr[0] == 'N' )
326  mode = 0;
327  return mode;
328 }
329 
330 
334 static int
335 cmp_int (const void *a, const void *b)
336 {
337  int ia = *((const int*)a);
338  int ib = *((const int*)b);
339 
340  if ( ia == ib )
341  return 0;
342  else if ( ia > ib )
343  return 1;
344  else
345  return -1;
346 }
347 
352 // static int
353 // range_quintile(int *vals, int nvals)
354 // {
355 // qsort(vals, nvals, sizeof(int), cmp_int);
356 // return vals[4*nvals/5] - vals[nvals/5];
357 // }
358 
362 static int
363 range_full(int *vals, int nvals)
364 {
365  qsort(vals, nvals, sizeof(int), cmp_int);
366  return vals[nvals-1] - vals[0];
367 }
368 
372 static double
373 total_double(const double *vals, int nvals)
374 {
375  int i;
376  float total = 0;
377  /* Calculate total */
378  for ( i = 0; i < nvals; i++ )
379  total += vals[i];
380 
381  return total;
382 }
383 
384 #if POSTGIS_DEBUG_LEVEL >= 3
385 
389 static int
390 total_int(const int *vals, int nvals)
391 {
392  int i;
393  int total = 0;
394  /* Calculate total */
395  for ( i = 0; i < nvals; i++ )
396  total += vals[i];
397 
398  return total;
399 }
400 
404 static double
405 avg(const int *vals, int nvals)
406 {
407  int t = total_int(vals, nvals);
408  return (double)t / (double)nvals;
409 }
410 
414 static double
415 stddev(const int *vals, int nvals)
416 {
417  int i;
418  double sigma2 = 0;
419  double mean = avg(vals, nvals);
420 
421  /* Calculate sigma2 */
422  for ( i = 0; i < nvals; i++ )
423  {
424  double v = (double)(vals[i]);
425  sigma2 += (mean - v) * (mean - v);
426  }
427  return sqrt(sigma2 / nvals);
428 }
429 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
430 
435 static int
436 nd_stats_value_index(const ND_STATS *stats, int *indexes)
437 {
438  int d;
439  int accum = 1, vdx = 0;
440 
441  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
442  /* n-d histogram coordinate implies. */
443  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
444  for ( d = 0; d < (int)(stats->ndims); d++ )
445  {
446  int size = (int)(stats->size[d]);
447  if ( indexes[d] < 0 || indexes[d] >= size )
448  {
449  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
450  return -1;
451  }
452  vdx += indexes[d] * accum;
453  accum *= size;
454  }
455  return vdx;
456 }
457 
461 static char*
462 nd_box_to_json(const ND_BOX *nd_box, int ndims)
463 {
464  char *rv;
465  int i;
467 
468  stringbuffer_append(sb, "{\"min\":[");
469  for ( i = 0; i < ndims; i++ )
470  {
471  if ( i ) stringbuffer_append(sb, ",");
472  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
473  }
474  stringbuffer_append(sb, "],\"max\":[");
475  for ( i = 0; i < ndims; i++ )
476  {
477  if ( i ) stringbuffer_append(sb, ",");
478  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
479  }
480  stringbuffer_append(sb, "]}");
481 
484  return rv;
485 }
486 
487 
492 static char*
493 nd_stats_to_json(const ND_STATS *nd_stats)
494 {
495  char *json_extent, *str;
496  int d;
498  int ndims = (int)roundf(nd_stats->ndims);
499 
500  stringbuffer_append(sb, "{");
501  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
502 
503  /* Size */
504  stringbuffer_append(sb, "\"size\":[");
505  for ( d = 0; d < ndims; d++ )
506  {
507  if ( d ) stringbuffer_append(sb, ",");
508  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
509  }
510  stringbuffer_append(sb, "],");
511 
512  /* Extent */
513  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
514  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
515  pfree(json_extent);
516 
517  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
518  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
519  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
520  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
521  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
522  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
523  stringbuffer_append(sb, "}");
524 
527  return str;
528 }
529 
530 
536 static char*
538 {
539  char *rv;
540  int j, k;
541  int sizex = (int)roundf(stats->size[0]);
542  int sizey = (int)roundf(stats->size[1]);
544 
545  for ( k = 0; k < sizey; k++ )
546  {
547  for ( j = 0; j < sizex; j++ )
548  {
549  stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
550  }
551  stringbuffer_append(sb, "\n");
552  }
553 
556  return rv;
557 }
558 
559 
561 static int
562 nd_box_merge(const ND_BOX *source, ND_BOX *target)
563 {
564  int d;
565  for ( d = 0; d < ND_DIMS; d++ )
566  {
567  target->min[d] = Min(target->min[d], source->min[d]);
568  target->max[d] = Max(target->max[d], source->max[d]);
569  }
570  return true;
571 }
572 
574 static int
576 {
577  memset(a, 0, sizeof(ND_BOX));
578  return true;
579 }
580 
586 static int
588 {
589  int d;
590  for ( d = 0; d < ND_DIMS; d++ )
591  {
592  a->min[d] = FLT_MAX;
593  a->max[d] = -1 * FLT_MAX;
594  }
595  return true;
596 }
597 
599 static void
600 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
601 {
602  volatile int d = 0;
603  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
604 
605  nd_box_init(nd_box);
606  nd_box->min[d] = gbox->xmin;
607  nd_box->max[d] = gbox->xmax;
608  d++;
609  nd_box->min[d] = gbox->ymin;
610  nd_box->max[d] = gbox->ymax;
611  d++;
612  if ( FLAGS_GET_GEODETIC(gbox->flags) )
613  {
614  nd_box->min[d] = gbox->zmin;
615  nd_box->max[d] = gbox->zmax;
616  return;
617  }
618  if ( FLAGS_GET_Z(gbox->flags) )
619  {
620  nd_box->min[d] = gbox->zmin;
621  nd_box->max[d] = gbox->zmax;
622  d++;
623  }
624  if ( FLAGS_GET_M(gbox->flags) )
625  {
626  nd_box->min[d] = gbox->mmin;
627  nd_box->max[d] = gbox->mmax;
628  d++;
629  }
630  return;
631 }
632 
636 static int
637 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
638 {
639  int d;
640  for ( d = 0; d < ndims; d++ )
641  {
642  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
643  return false;
644  }
645  return true;
646 }
647 
651 static int
652 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
653 {
654  int d;
655  for ( d = 0; d < ndims; d++ )
656  {
657  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
658  return false;
659  }
660  return true;
661 }
662 
667 static int
668 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
669 {
670  int d;
671  double size;
672  for ( d = 0; d < ND_DIMS; d++ )
673  {
674  size = nd_box->max[d] - nd_box->min[d];
675  /* Avoid expanding boxes that are either too wide or too narrow*/
676  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
677  continue;
678  nd_box->min[d] -= size * expansion_factor / 2;
679  nd_box->max[d] += size * expansion_factor / 2;
680  }
681  return true;
682 }
683 
688 static inline int
689 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
690 {
691  int d;
692 
693  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
694 
695  /* Initialize ibox */
696  memset(nd_ibox, 0, sizeof(ND_IBOX));
697 
698  /* In each dimension... */
699  for ( d = 0; d < nd_stats->ndims; d++ )
700  {
701  double smin = nd_stats->extent.min[d];
702  double smax = nd_stats->extent.max[d];
703  double width = smax - smin;
704 
705  if (width < MIN_DIMENSION_WIDTH)
706  {
707  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
708  }
709  else
710  {
711  int size = (int)roundf(nd_stats->size[d]);
712 
713  /* ... find cells the box overlaps with in this dimension */
714  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
715  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
716 
717  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
718  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
719 
720  /* Push any out-of range values into range */
721  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
722  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
723  }
724  }
725  return true;
726 }
727 
731 static inline double
732 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
733 {
734  int d;
735  bool covered = true;
736  double ivol = 1.0;
737  double vol2 = 1.0;
738 
739  for ( d = 0 ; d < ndims; d++ )
740  {
741  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
742  return 0.0; /* Disjoint */
743 
744  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
745  covered = false;
746  }
747 
748  if ( covered )
749  return 1.0;
750 
751  for ( d = 0; d < ndims; d++ )
752  {
753  double width2 = b2->max[d] - b2->min[d];
754  double imin, imax, iwidth;
755 
756  vol2 *= width2;
757 
758  imin = Max(b1->min[d], b2->min[d]);
759  imax = Min(b1->max[d], b2->max[d]);
760  iwidth = imax - imin;
761  iwidth = Max(0.0, iwidth);
762 
763  ivol *= iwidth;
764  }
765 
766  if ( vol2 == 0.0 )
767  return vol2;
768 
769  return ivol / vol2;
770 }
771 
772 /* How many bins shall we use in figuring out the distribution? */
773 #define MAX_NUM_BINS 50
774 #define BIN_MIN_SIZE 10
775 
791 static int
792 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
793 {
794  int d, i, k, range;
795  int *counts;
796  double smin, smax; /* Spatial min, spatial max */
797  double swidth; /* Spatial width of dimension */
798 #if POSTGIS_DEBUG_LEVEL >= 3
799  double average, sdev, sdev_ratio;
800 #endif
801  int bmin, bmax; /* Bin min, bin max */
802  const ND_BOX *ndb;
803 
804  int num_bins = Min(Max(2, num_boxes/BIN_MIN_SIZE), MAX_NUM_BINS);
805  counts = palloc0(num_bins * sizeof(int));
806 
807  /* For each dimension... */
808  for ( d = 0; d < ndims; d++ )
809  {
810  /* Initialize counts for this dimension */
811  memset(counts, 0, num_bins * sizeof(int));
812 
813 
814  smin = extent->min[d];
815  smax = extent->max[d];
816  swidth = smax - smin;
817 
818  /* Don't try and calculate distribution of overly narrow */
819  /* or overly wide dimensions. Here we're being pretty geographical, */
820  /* expecting "normal" planar or geographic coordinates. */
821  /* Otherwise we have to "handle" +/- Inf bounded features and */
822  /* the assumptions needed for that are as bad as this hack. */
823  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
824  {
825  distribution[d] = 0;
826  continue;
827  }
828 
829  /* Sum up the overlaps of each feature with the dimensional bins */
830  for ( i = 0; i < num_boxes; i++ )
831  {
832  double minoffset, maxoffset;
833 
834  /* Skip null entries */
835  ndb = nd_boxes[i];
836  if ( ! ndb ) continue;
837 
838  /* Where does box fall relative to the working range */
839  minoffset = ndb->min[d] - smin;
840  maxoffset = ndb->max[d] - smin;
841 
842  /* Skip boxes that our outside our working range */
843  if ( minoffset < 0 || minoffset > swidth ||
844  maxoffset < 0 || maxoffset > swidth )
845  {
846  continue;
847  }
848 
849  /* What bins does this range correspond to? */
850  bmin = floor(num_bins * minoffset / swidth);
851  bmax = floor(num_bins * maxoffset / swidth);
852 
853  /* Should only happen when maxoffset==swidth */
854  if (bmax >= num_bins)
855  bmax = num_bins-1;
856 
857  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
858 
859  /* Increment the counts in all the bins this feature overlaps */
860  for ( k = bmin; k <= bmax; k++ )
861  {
862  counts[k] += 1;
863  }
864 
865  }
866 
867  /* How dispersed is the distribution of features across bins? */
868  // range = range_quintile(counts, num_bins);
869  range = range_full(counts, num_bins);
870 
871 #if POSTGIS_DEBUG_LEVEL >= 3
872  average = avg(counts, num_bins);
873  sdev = stddev(counts, num_bins);
874  sdev_ratio = sdev/average;
875 
876  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
877  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
878  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
879  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
880 #endif
881 
882  distribution[d] = range;
883  }
884 
885  pfree(counts);
886 
887  return true;
888 }
889 
895 static inline int
896 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
897 {
898  int d = 0;
899 
900  while ( d < ndims )
901  {
902  if ( counter[d] < ibox->max[d] )
903  {
904  counter[d] += 1;
905  break;
906  }
907  counter[d] = ibox->min[d];
908  d++;
909  }
910  /* That's it, cannot increment any more! */
911  if ( d == ndims )
912  return false;
913 
914  /* Increment complete! */
915  return true;
916 }
917 
918 static ND_STATS*
919 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
920 {
921  int stats_kind = STATISTIC_KIND_ND;
922  int rv;
923  ND_STATS *nd_stats;
924 
925  /* If we're in 2D mode, set the kind appropriately */
926  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
927 
928  /* Then read the geom status histogram from that */
929  {
930  AttStatsSlot sslot;
931  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
932  ATTSTATSSLOT_NUMBERS);
933  if ( ! rv ) {
934  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
935  return NULL;
936  }
937 
938  /* Clone the stats here so we can release the attstatsslot immediately */
939  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
940  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
941 
942  free_attstatsslot(&sslot);
943  }
944  return nd_stats;
945 }
946 
951 static ND_STATS*
952 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
953 {
954  HeapTuple stats_tuple = NULL;
955  ND_STATS *nd_stats;
956 
957  /* First pull the stats tuple for the whole tree */
958  if ( ! only_parent )
959  {
960  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
961  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
962  if ( stats_tuple )
963  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
964  }
965  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
966  if ( only_parent || ! stats_tuple )
967  {
968  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
969  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
970  if ( stats_tuple )
971  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
972  }
973  if ( ! stats_tuple )
974  {
975  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
976  return NULL;
977  }
978 
979  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
980  ReleaseSysCache(stats_tuple);
981  if ( ! nd_stats )
982  {
983  POSTGIS_DEBUGF(2,
984  "histogram for attribute %d of table \"%s\" does not exist?",
985  att_num, get_rel_name(table_oid));
986  }
987 
988  return nd_stats;
989 }
990 
999 static ND_STATS*
1000 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
1001 {
1002  const char *att_name = text_to_cstring(att_text);
1003  AttrNumber att_num;
1004 
1005  /* We know the name? Look up the num */
1006  if ( att_text )
1007  {
1008  /* Get the attribute number */
1009  att_num = get_attnum(table_oid, att_name);
1010  if ( ! att_num ) {
1011  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1012  return NULL;
1013  }
1014  }
1015  else
1016  {
1017  elog(ERROR, "attribute name is null");
1018  return NULL;
1019  }
1020 
1021  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1022 }
1023 
1037 static float8
1039 {
1040  int ncells1, ncells2;
1041  int ndims1, ndims2, ndims;
1042  double ntuples_max;
1043  double ntuples_not_null1, ntuples_not_null2;
1044 
1045  ND_BOX extent1, extent2;
1046  ND_IBOX ibox1, ibox2;
1047  int at1[ND_DIMS];
1048  int at2[ND_DIMS];
1049  double min1[ND_DIMS];
1050  double width1[ND_DIMS];
1051  double cellsize1[ND_DIMS];
1052  int size2[ND_DIMS];
1053  double min2[ND_DIMS];
1054  double width2[ND_DIMS];
1055  double cellsize2[ND_DIMS];
1056  int size1[ND_DIMS];
1057  int d;
1058  double val = 0;
1059  float8 selectivity;
1060 
1061  /* Drop out on null inputs */
1062  if ( ! ( s1 && s2 ) )
1063  {
1064  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1065  return FALLBACK_ND_SEL;
1066  }
1067 
1068  /* We need to know how many cells each side has... */
1069  ncells1 = (int)roundf(s1->histogram_cells);
1070  ncells2 = (int)roundf(s2->histogram_cells);
1071 
1072  /* ...so that we can drive the summation loop with the smaller histogram. */
1073  if ( ncells1 > ncells2 )
1074  {
1075  const ND_STATS *stats_tmp = s1;
1076  s1 = s2;
1077  s2 = stats_tmp;
1078  }
1079 
1080  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1081  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1082 
1083  /* Re-read that info after the swap */
1084  ncells1 = (int)roundf(s1->histogram_cells);
1085  ncells2 = (int)roundf(s2->histogram_cells);
1086 
1087  /* Q: What's the largest possible join size these relations can create? */
1088  /* A: The product of the # of non-null rows in each relation. */
1089  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1090  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1091  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1092 
1093  /* Get the ndims as ints */
1094  ndims1 = (int)roundf(s1->ndims);
1095  ndims2 = (int)roundf(s2->ndims);
1096  ndims = Max(ndims1, ndims2);
1097 
1098  /* Get the extents */
1099  extent1 = s1->extent;
1100  extent2 = s2->extent;
1101 
1102  /* If relation stats do not intersect, join is very very selective. */
1103  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1104  {
1105  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1106  PG_RETURN_FLOAT8(0.0);
1107  }
1108 
1109  /*
1110  * First find the index range of the part of the smaller
1111  * histogram that overlaps the larger one.
1112  */
1113  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1114  {
1115  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1116  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1117  }
1118 
1119  /* Initialize counters / constants on s1 */
1120  for ( d = 0; d < ndims1; d++ )
1121  {
1122  at1[d] = ibox1.min[d];
1123  min1[d] = s1->extent.min[d];
1124  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1125  size1[d] = (int)roundf(s1->size[d]);
1126  cellsize1[d] = width1[d] / size1[d];
1127  }
1128 
1129  /* Initialize counters / constants on s2 */
1130  for ( d = 0; d < ndims2; d++ )
1131  {
1132  min2[d] = s2->extent.min[d];
1133  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1134  size2[d] = (int)roundf(s2->size[d]);
1135  cellsize2[d] = width2[d] / size2[d];
1136  }
1137 
1138  /* For each affected cell of s1... */
1139  do
1140  {
1141  double val1;
1142  /* Construct the bounds of this cell */
1143  ND_BOX nd_cell1;
1144  nd_box_init(&nd_cell1);
1145  for ( d = 0; d < ndims1; d++ )
1146  {
1147  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1148  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1149  }
1150 
1151  /* Find the cells of s2 that cell1 overlaps.. */
1152  nd_box_overlap(s2, &nd_cell1, &ibox2);
1153 
1154  /* Initialize counter */
1155  for ( d = 0; d < ndims2; d++ )
1156  {
1157  at2[d] = ibox2.min[d];
1158  }
1159 
1160  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1161 
1162  /* Get the value at this cell */
1163  val1 = s1->value[nd_stats_value_index(s1, at1)];
1164 
1165  /* For each overlapped cell of s2... */
1166  do
1167  {
1168  double ratio2;
1169  double val2;
1170 
1171  /* Construct the bounds of this cell */
1172  ND_BOX nd_cell2;
1173  nd_box_init(&nd_cell2);
1174  for ( d = 0; d < ndims2; d++ )
1175  {
1176  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1177  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1178  }
1179 
1180  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1181 
1182  /* Calculate overlap ratio of the cells */
1183  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1184 
1185  /* Multiply the cell counts, scaled by overlap ratio */
1186  val2 = s2->value[nd_stats_value_index(s2, at2)];
1187  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1188  val += val1 * (val2 * ratio2);
1189  }
1190  while ( nd_increment(&ibox2, ndims2, at2) );
1191 
1192  }
1193  while( nd_increment(&ibox1, ndims1, at1) );
1194 
1195  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1196 
1197  /*
1198  * In order to compare our total cell count "val" to the
1199  * ntuples_max, we need to scale val up to reflect a full
1200  * table estimate. So, multiply by ratio of table size to
1201  * sample size.
1202  */
1203  val *= (s1->table_features / s1->sample_features);
1204  val *= (s2->table_features / s2->sample_features);
1205 
1206  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1207 
1208  /*
1209  * Because the cell counts are over-determined due to
1210  * double counting of features that overlap multiple cells
1211  * (see the compute_gserialized_stats routine)
1212  * we also have to scale our cell count "val" *down*
1213  * to adjust for the double counting.
1214  */
1215 // val /= (s1->cells_covered / s1->histogram_features);
1216 // val /= (s2->cells_covered / s2->histogram_features);
1217 
1218  /*
1219  * Finally, the selectivity is the estimated number of
1220  * rows to be returned divided by the maximum possible
1221  * number of rows that can be returned.
1222  */
1223  selectivity = val / ntuples_max;
1224 
1225  /* Guard against over-estimates and crazy numbers :) */
1226  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1227  {
1228  selectivity = DEFAULT_ND_JOINSEL;
1229  }
1230  else if ( selectivity > 1.0 )
1231  {
1232  selectivity = 1.0;
1233  }
1234 
1235  return selectivity;
1236 }
1237 
1243 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1244 {
1245  PG_RETURN_DATUM(DirectFunctionCall5(
1247  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1248  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1249  Int32GetDatum(0) /* ND mode */
1250  ));
1251 }
1252 
1258 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1259 {
1260  PG_RETURN_DATUM(DirectFunctionCall5(
1262  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1263  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1264  Int32GetDatum(2) /* 2D mode */
1265  ));
1266 }
1267 
1268 double
1269 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1270 {
1271  float8 selectivity;
1272  Oid relid1, relid2;
1273  ND_STATS *stats1, *stats2;
1274  Node *arg1 = (Node*) linitial(args);
1275  Node *arg2 = (Node*) lsecond(args);
1276  Var *var1 = (Var*) arg1;
1277  Var *var2 = (Var*) arg2;
1278 
1279  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1280 
1281  /* We only do column joins right now, no functional joins */
1282  /* TODO: handle g1 && ST_Expand(g2) */
1283  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1284  {
1285  POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1286  return DEFAULT_ND_JOINSEL;
1287  }
1288 
1289  /* What are the Oids of our tables/relations? */
1290  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1291  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1292 
1293  /* Pull the stats from the stats system. */
1294  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1295  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1296 
1297  /* If we can't get stats, we have to stop here! */
1298  if (!stats1)
1299  {
1300  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1301  return DEFAULT_ND_JOINSEL;
1302  }
1303  else if (!stats2)
1304  {
1305  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1306  return DEFAULT_ND_JOINSEL;
1307  }
1308 
1309  selectivity = estimate_join_selectivity(stats1, stats2);
1310  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1311  pfree(stats1);
1312  pfree(stats2);
1313  return selectivity;
1314 }
1315 
1325 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1326 {
1327  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1328  /* Oid operator = PG_GETARG_OID(1); */
1329  List *args = (List *) PG_GETARG_POINTER(2);
1330  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1331  int mode = PG_GETARG_INT32(4);
1332 
1333  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1334 
1335  /* Check length of args and punt on > 2 */
1336  if (list_length(args) != 2)
1337  {
1338  POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1339  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1340  }
1341 
1342  /* Only respond to an inner join/unknown context join */
1343  if (jointype != JOIN_INNER)
1344  {
1345  POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1346  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1347  }
1348 
1349  PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1350 }
1351 
1370 static void
1371 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1372  int sample_rows, double total_rows, int mode)
1373 {
1374  MemoryContext old_context;
1375  int d, i; /* Counters */
1376  int notnull_cnt = 0; /* # not null rows in the sample */
1377  int null_cnt = 0; /* # null rows in the sample */
1378  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1379 
1380  ND_STATS *nd_stats; /* Our histogram */
1381  size_t nd_stats_size; /* Size to allocate */
1382 
1383  double total_width = 0; /* # of bytes used by sample */
1384  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1385 
1386  ND_BOX sum; /* Sum of extents of sample boxes */
1387  ND_BOX avg; /* Avg of extents of sample boxes */
1388  ND_BOX stddev; /* StdDev of extents of sample boxes */
1389 
1390  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1391  ND_BOX sample_extent; /* Extent of the raw sample */
1392  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1393  ND_BOX histo_extent; /* Spatial extent of the histogram */
1394  ND_BOX histo_extent_new; /* Temporary variable */
1395  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1396  int histo_cells; /* Number of cells in the histogram */
1397  int histo_cells_new = 1; /* Temporary variable */
1398 
1399  int ndims = 2; /* Dimensionality of the sample */
1400  int histo_ndims = 0; /* Dimensionality of the histogram */
1401  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1402  double total_distribution; /* Total of sample_distribution */
1403 
1404  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1405  int stats_kind; /* And this is what? (2D vs ND) */
1406 
1407  /* Initialize sum and stddev */
1408  nd_box_init(&sum);
1409  nd_box_init(&stddev);
1410  nd_box_init(&avg);
1411  nd_box_init(&histo_extent);
1412  nd_box_init(&histo_extent_new);
1413 
1414  /*
1415  * This is where gserialized_analyze_nd
1416  * should put its' custom parameters.
1417  */
1418  /* void *mystats = stats->extra_data; */
1419 
1420  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1421  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1422  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1423 
1424  /*
1425  * We might need less space, but don't think
1426  * its worth saving...
1427  */
1428  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1429 
1430  /*
1431  * First scan:
1432  * o read boxes
1433  * o find dimensionality of the sample
1434  * o find extent of the sample
1435  * o count null-infinite/not-null values
1436  * o compute total_width
1437  * o compute total features's box area (for avgFeatureArea)
1438  * o sum features box coordinates (for standard deviation)
1439  */
1440  for ( i = 0; i < sample_rows; i++ )
1441  {
1442  Datum datum;
1443  GBOX gbox = {0};
1444  ND_BOX *nd_box;
1445  bool is_null;
1446 
1447  datum = fetchfunc(stats, i, &is_null);
1448 
1449  /* Skip all NULLs. */
1450  if ( is_null )
1451  {
1452  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1453  null_cnt++;
1454  continue;
1455  }
1456 
1457  /* Read the bounds from the gserialized. */
1458  if (LW_FAILURE == gserialized_datum_get_gbox_p(datum, &gbox))
1459  {
1460  /* Skip empties too. */
1461  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1462  continue;
1463  }
1464 
1465  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1466  if ( mode == 2 )
1467  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1468 
1469  /* Check bounds for validity (finite and not NaN) */
1470  if ( ! gbox_is_valid(&gbox) )
1471  {
1472  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1473  continue;
1474  }
1475 
1476  /*
1477  * In N-D mode, set the ndims to the maximum dimensionality found
1478  * in the sample. Otherwise, leave at ndims == 2.
1479  */
1480  if ( mode != 2 )
1481  ndims = Max(gbox_ndims(&gbox), ndims);
1482 
1483  /* Convert gbox to n-d box */
1484  nd_box = palloc(sizeof(ND_BOX));
1485  nd_box_from_gbox(&gbox, nd_box);
1486 
1487  /* Cache n-d bounding box */
1488  sample_boxes[notnull_cnt] = nd_box;
1489 
1490  /* Initialize sample extent before merging first entry */
1491  if ( ! notnull_cnt )
1492  nd_box_init_bounds(&sample_extent);
1493 
1494  /* Add current sample to overall sample extent */
1495  nd_box_merge(nd_box, &sample_extent);
1496 
1497  /* How many bytes does this sample use? */
1498  total_width += toast_raw_datum_size(datum);
1499 
1500  /* Add bounds coordinates to sums for stddev calculation */
1501  for ( d = 0; d < ndims; d++ )
1502  {
1503  sum.min[d] += nd_box->min[d];
1504  sum.max[d] += nd_box->max[d];
1505  }
1506 
1507  /* Increment our "good feature" count */
1508  notnull_cnt++;
1509 
1510  /* Give backend a chance of interrupting us */
1511  vacuum_delay_point();
1512  }
1513 
1514  /*
1515  * We'll build a histogram having stats->attr->attstattarget
1516  * (default 100) cells on each side, within reason...
1517  * we'll use ndims*100000 as the maximum number of cells.
1518  * Also, if we're sampling a relatively small table, we'll try to ensure that
1519  * we have a smaller grid.
1520  */
1521 #if POSTGIS_PGSQL_VERSION >= 170
1522  histo_cells_target = (int)pow((double)(stats->attstattarget), (double)ndims);
1523  POSTGIS_DEBUGF(3, " stats->attstattarget: %d", stats->attstattarget);
1524 #else
1525  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1526  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1527 #endif
1528  histo_cells_target = Min(histo_cells_target, ndims * 100000);
1529  histo_cells_target = Min(histo_cells_target, (int)(10 * ndims * total_rows));
1530  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1531 
1532  /* If there's no useful features, we can't work out stats */
1533  if ( ! notnull_cnt )
1534  {
1535  stats->stats_valid = false;
1536  return;
1537  }
1538 
1539  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1540 
1541  /*
1542  * Second scan:
1543  * o compute standard deviation
1544  */
1545  for ( d = 0; d < ndims; d++ )
1546  {
1547  /* Calculate average bounds values */
1548  avg.min[d] = sum.min[d] / notnull_cnt;
1549  avg.max[d] = sum.max[d] / notnull_cnt;
1550 
1551  /* Calculate standard deviation for this dimension bounds */
1552  for ( i = 0; i < notnull_cnt; i++ )
1553  {
1554  const ND_BOX *ndb = sample_boxes[i];
1555  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1556  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1557  }
1558  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1559  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1560 
1561  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1562  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1563  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1564  }
1565 
1566  /*
1567  * Third scan:
1568  * o skip hard deviants
1569  * o compute new histogram box
1570  */
1571  nd_box_init_bounds(&histo_extent_new);
1572  for ( i = 0; i < notnull_cnt; i++ )
1573  {
1574  const ND_BOX *ndb = sample_boxes[i];
1575  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1576  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1577  {
1578  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1579  sample_boxes[i] = NULL;
1580  continue;
1581  }
1582  /* Expand our new box to fit all the other features. */
1583  nd_box_merge(ndb, &histo_extent_new);
1584  }
1585  /*
1586  * Expand the box slightly (1%) to avoid edge effects
1587  * with objects that are on the boundary
1588  */
1589  nd_box_expand(&histo_extent_new, 0.01);
1590  histo_extent = histo_extent_new;
1591 
1592  /*
1593  * How should we allocate our histogram cells to the
1594  * different dimensions? We can't do it by raw dimensional width,
1595  * because in x/y/z space, the z can have different units
1596  * from the x/y. Similarly for x/y/t space.
1597  * So, we instead calculate how much features overlap
1598  * each other in their dimension to figure out which
1599  * dimensions have useful selectivity characteristics (more
1600  * variability in density) and therefor would find
1601  * more cells useful (to distinguish between dense places and
1602  * homogeneous places).
1603  */
1604  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1605  sample_distribution);
1606 
1607  /*
1608  * The sample_distribution array now tells us how spread out the
1609  * data is in each dimension, so we use that data to allocate
1610  * the histogram cells we have available.
1611  * At this point, histo_cells_target is the approximate target number
1612  * of cells.
1613  */
1614 
1615  /*
1616  * Some dimensions have basically a uniform distribution, we want
1617  * to allocate no cells to those dimensions, only to dimensions
1618  * that have some interesting differences in data distribution.
1619  * Here we count up the number of interesting dimensions
1620  */
1621  for ( d = 0; d < ndims; d++ )
1622  {
1623  if ( sample_distribution[d] > 0 )
1624  histo_ndims++;
1625  }
1626 
1627  if ( histo_ndims == 0 )
1628  {
1629  /* Special case: all our dimensions had low variability! */
1630  /* We just divide the cells up evenly */
1631  POSTGIS_DEBUG(3, " special case: no axes have variability");
1632  histo_cells_new = 1;
1633  for ( d = 0; d < ndims; d++ )
1634  {
1635  histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1636  if ( ! histo_size[d] )
1637  histo_size[d] = 1;
1638  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1639  histo_cells_new *= histo_size[d];
1640  }
1641  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1642  }
1643  else
1644  {
1645  /*
1646  * We're going to express the amount of variability in each dimension
1647  * as a proportion of the total variability and allocate cells in that
1648  * dimension relative to that proportion.
1649  */
1650  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1651  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1652  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1653  histo_cells_new = 1; /* For the number of cells in the final histogram */
1654  for ( d = 0; d < ndims; d++ )
1655  {
1656  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1657  {
1658  histo_size[d] = 1;
1659  }
1660  else /* Interesting dimension */
1661  {
1662  /* How does this dims variability compare to the total? */
1663  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1664  /*
1665  * Scale the target cells number by the # of dims and ratio,
1666  * then take the appropriate root to get the estimated number of cells
1667  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1668  */
1669  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1670  /* If something goes awry, just give this dim one slot */
1671  if ( ! histo_size[d] )
1672  histo_size[d] = 1;
1673  }
1674  histo_cells_new *= histo_size[d];
1675  }
1676  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1677  }
1678 
1679  /* Update histo_cells to the actual number of cells we need to allocate */
1680  histo_cells = histo_cells_new;
1681  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1682 
1683  /*
1684  * Create the histogram (ND_STATS) in the stats memory context
1685  */
1686  old_context = MemoryContextSwitchTo(stats->anl_context);
1687  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1688  nd_stats = palloc(nd_stats_size);
1689  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1690  MemoryContextSwitchTo(old_context);
1691 
1692  /* Initialize the #ND_STATS objects */
1693  nd_stats->ndims = ndims;
1694  nd_stats->extent = histo_extent;
1695  nd_stats->sample_features = sample_rows;
1696  nd_stats->table_features = total_rows;
1697  nd_stats->not_null_features = notnull_cnt;
1698  /* Copy in the histogram dimensions */
1699  for ( d = 0; d < ndims; d++ )
1700  nd_stats->size[d] = histo_size[d];
1701 
1702  /*
1703  * Fourth scan:
1704  * o fill histogram values with the proportion of
1705  * features' bbox overlaps: a feature's bvol
1706  * can fully overlap (1) or partially overlap
1707  * (fraction of 1) an histogram cell.
1708  *
1709  * Note that we are filling each cell with the "portion of
1710  * the feature's box that overlaps the cell". So, if we sum
1711  * up the values in the histogram, we could get the
1712  * histogram feature count.
1713  *
1714  */
1715  for ( i = 0; i < notnull_cnt; i++ )
1716  {
1717  const ND_BOX *nd_box;
1718  ND_IBOX nd_ibox;
1719  int at[ND_DIMS];
1720  double num_cells = 0;
1721  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1722  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1723  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1724 
1725  nd_box = sample_boxes[i];
1726  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1727 
1728  /* Give backend a chance of interrupting us */
1729  vacuum_delay_point();
1730 
1731  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1732  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1733  memset(at, 0, sizeof(int)*ND_DIMS);
1734 
1735  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1736  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1737  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1738 
1739  for ( d = 0; d < nd_stats->ndims; d++ )
1740  {
1741  /* Initialize the starting values */
1742  at[d] = nd_ibox.min[d];
1743  min[d] = nd_stats->extent.min[d];
1744  max[d] = nd_stats->extent.max[d];
1745  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1746  }
1747 
1748  /*
1749  * Move through all the overlaped histogram cells values and
1750  * add the box overlap proportion to them.
1751  */
1752  do
1753  {
1754  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1755  double ratio;
1756  /* Create a box for this histogram cell */
1757  for ( d = 0; d < nd_stats->ndims; d++ )
1758  {
1759  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1760  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1761  }
1762 
1763  /*
1764  * If a feature box is completely inside one cell the ratio will be
1765  * 1.0. If a feature box is 50% in two cells, each cell will get
1766  * 0.5 added on.
1767  */
1768  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1769  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1770  num_cells += ratio;
1771  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1772  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1773  }
1774  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1775 
1776  /* Keep track of overall number of overlaps counted */
1777  total_cell_count += num_cells;
1778  /* How many features have we added to this histogram? */
1779  histogram_features++;
1780  }
1781 
1782  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1783  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1784  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1785 
1786  /* Error out if we got no sample information */
1787  if ( ! histogram_features )
1788  {
1789  POSTGIS_DEBUG(3, " no stats have been gathered");
1790  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1791  stats->stats_valid = false;
1792  return;
1793  }
1794 
1795  nd_stats->histogram_features = histogram_features;
1796  nd_stats->histogram_cells = histo_cells;
1797  nd_stats->cells_covered = total_cell_count;
1798 
1799  /* Put this histogram data into the right slot/kind */
1800  if ( mode == 2 )
1801  {
1802  stats_slot = STATISTIC_SLOT_2D;
1803  stats_kind = STATISTIC_KIND_2D;
1804  }
1805  else
1806  {
1807  stats_slot = STATISTIC_SLOT_ND;
1808  stats_kind = STATISTIC_KIND_ND;
1809  }
1810 
1811  /* Write the statistics data */
1812  stats->stakind[stats_slot] = stats_kind;
1813  stats->staop[stats_slot] = InvalidOid;
1814  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1815  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1816  stats->stanullfrac = (float4)null_cnt/sample_rows;
1817  stats->stawidth = total_width/notnull_cnt;
1818  stats->stadistinct = -1.0;
1819  stats->stats_valid = true;
1820 
1821  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1822  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1823  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1824  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1825  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1826  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1827  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1828  /*
1829  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1830  */
1831 
1832  return;
1833 }
1834 
1835 
1853 static void
1854 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1855  int sample_rows, double total_rows)
1856 {
1857  GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
1858  /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
1859  stats->extra_data = extra_data->std_extra_data;
1860  extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
1861  stats->extra_data = extra_data;
1862 
1863  /* 2D Mode */
1864  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1865 
1866  if (stats->stats_valid)
1867  {
1868  /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1869  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1870  }
1871 }
1872 
1873 
1901 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1902 {
1903  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1904  GserializedAnalyzeExtraData *extra_data =
1906 
1907  /* Ask for standard analyze to fill in as much as possible */
1908  if (!std_typanalyze(stats))
1909  PG_RETURN_BOOL(false);
1910 
1911  /* Save old compute_stats and extra_data for scalar statistics ... */
1912  extra_data->std_compute_stats = stats->compute_stats;
1913  extra_data->std_extra_data = stats->extra_data;
1914  /* ... and replace with our info */
1915  stats->compute_stats = compute_gserialized_stats;
1916  stats->extra_data = extra_data;
1917 
1918  /* Indicate we are done successfully */
1919  PG_RETURN_BOOL(true);
1920 }
1921 
1934 static float8
1935 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1936 {
1937  int d; /* counter */
1938  float8 selectivity;
1939  ND_BOX nd_box;
1940  ND_IBOX nd_ibox;
1941  int at[ND_DIMS];
1942  double cell_size[ND_DIMS];
1943  double min[ND_DIMS];
1944  double max[ND_DIMS];
1945  double total_count = 0.0;
1946  int ndims_max;
1947 
1948  /* Calculate the overlap of the box on the histogram */
1949  if ( ! nd_stats )
1950  {
1951  elog(NOTICE, " estimate_selectivity called with null input");
1952  return FALLBACK_ND_SEL;
1953  }
1954 
1955  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1956 
1957  /* Initialize nd_box. */
1958  nd_box_from_gbox(box, &nd_box);
1959 
1960  /*
1961  * To return 2D stats on an ND sample, we need to make the
1962  * 2D box cover the full range of the other dimensions in the
1963  * histogram.
1964  */
1965  POSTGIS_DEBUGF(3, " mode: %d", mode);
1966  if ( mode == 2 )
1967  {
1968  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1969  ndims_max = 2;
1970  }
1971 
1972  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1973  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1974 
1975  // elog(DEBUG1, "out histogram:\n%s", nd_stats_to_grid(nd_stats));
1976 
1977  /*
1978  * Search box completely misses histogram extent?
1979  * We have to intersect in all N dimensions or else we have
1980  * zero interaction under the &&& operator. It's important
1981  * to short circuit in this case, as some of the tests below
1982  * will return junk results when run on non-intersecting inputs.
1983  */
1984  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1985  {
1986  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1987  return 0.0;
1988  }
1989 
1990  /* Search box completely contains histogram extent! */
1991  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1992  {
1993  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1994  return 1.0;
1995  }
1996 
1997  /* Calculate the overlap of the box on the histogram */
1998  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1999  {
2000  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2001  return FALLBACK_ND_SEL;
2002  }
2003 
2004  /* Work out some measurements of the histogram */
2005  for ( d = 0; d < nd_stats->ndims; d++ )
2006  {
2007  /* Cell size in each dim */
2008  min[d] = nd_stats->extent.min[d];
2009  max[d] = nd_stats->extent.max[d];
2010  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2011  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2012 
2013  /* Initialize the counter */
2014  at[d] = nd_ibox.min[d];
2015  }
2016 
2017  /* Move through all the overlap values and sum them */
2018  do
2019  {
2020  float cell_count, ratio;
2021  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2022 
2023  /* We have to pro-rate partially overlapped cells. */
2024  for ( d = 0; d < nd_stats->ndims; d++ )
2025  {
2026  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2027  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2028  }
2029 
2030  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2031  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2032 
2033  /* Add the pro-rated count for this cell to the overall total */
2034  total_count += cell_count * ratio;
2035  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2036  }
2037  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2038 
2039  /* Scale by the number of features in our histogram to get the proportion */
2040  selectivity = total_count / nd_stats->histogram_features;
2041 
2042  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2043  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2044  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2045  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2046 
2047  /* Prevent rounding overflows */
2048  if (selectivity > 1.0) selectivity = 1.0;
2049  else if (selectivity < 0.0) selectivity = 0.0;
2050 
2051  return selectivity;
2052 }
2053 
2054 
2055 
2061 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2062 {
2063  Oid table_oid = PG_GETARG_OID(0);
2064  text *att_text = PG_GETARG_TEXT_P(1);
2065  ND_STATS *nd_stats;
2066  char *str;
2067  text *json;
2068  int mode = 2; /* default to 2D mode */
2069  bool only_parent = false; /* default to whole tree stats */
2070 
2071  /* Check if we've been asked to not use 2d mode */
2072  if ( ! PG_ARGISNULL(2) )
2073  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2074 
2075  /* Retrieve the stats object */
2076  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2077  if ( ! nd_stats )
2078  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2079 
2080  /* Convert to JSON */
2081  elog(DEBUG1, "stats grid:\n%s", nd_stats_to_grid(nd_stats));
2082  str = nd_stats_to_json(nd_stats);
2083  json = cstring_to_text(str);
2084  pfree(str);
2085  pfree(nd_stats);
2086 
2087  PG_RETURN_TEXT_P(json);
2088 }
2089 
2090 
2096 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2097 {
2098  Oid table_oid = PG_GETARG_OID(0);
2099  text *att_text = PG_GETARG_TEXT_P(1);
2100  Datum geom_datum = PG_GETARG_DATUM(2);
2101  GBOX gbox; /* search box read from gserialized datum */
2102  float8 selectivity = 0;
2103  ND_STATS *nd_stats;
2104  int mode = 2; /* 2D mode by default */
2105 
2106  /* Check if we've been asked to not use 2d mode */
2107  if ( ! PG_ARGISNULL(3) )
2108  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2109 
2110  /* Retrieve the stats object */
2111  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2112 
2113  if ( ! nd_stats )
2114  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2115 
2116  /* Calculate the gbox */
2117  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2118  elog(ERROR, "unable to calculate bounding box from geometry");
2119 
2120  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2121 
2122  /* Do the estimation */
2123  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2124 
2125  pfree(nd_stats);
2126  PG_RETURN_FLOAT8(selectivity);
2127 }
2128 
2129 
2135 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2136 {
2137  Oid table_oid1 = PG_GETARG_OID(0);
2138  text *att_text1 = PG_GETARG_TEXT_P(1);
2139  Oid table_oid2 = PG_GETARG_OID(2);
2140  text *att_text2 = PG_GETARG_TEXT_P(3);
2141  ND_STATS *nd_stats1, *nd_stats2;
2142  float8 selectivity = 0;
2143  int mode = 2; /* 2D mode by default */
2144 
2145 
2146  /* Retrieve the stats object */
2147  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2148  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2149 
2150  if ( ! nd_stats1 )
2151  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2152 
2153  if ( ! nd_stats2 )
2154  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2155 
2156  /* Check if we've been asked to not use 2d mode */
2157  if ( ! PG_ARGISNULL(4) )
2158  {
2159  text *modetxt = PG_GETARG_TEXT_P(4);
2160  char *modestr = text_to_cstring(modetxt);
2161  if ( modestr[0] == 'N' )
2162  mode = 0;
2163  }
2164 
2165  /* Do the estimation */
2166  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2167 
2168  pfree(nd_stats1);
2169  pfree(nd_stats2);
2170  PG_RETURN_FLOAT8(selectivity);
2171 }
2172 
2178 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2179 {
2180  PG_RETURN_DATUM(DirectFunctionCall5(
2182  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2183  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2184  Int32GetDatum(2) /* 2-D mode */
2185  ));
2186 }
2187 
2193 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2194 {
2195  PG_RETURN_DATUM(DirectFunctionCall5(
2197  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2198  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2199  Int32GetDatum(0) /* N-D mode */
2200  ));
2201 }
2202 
2203 
2218 float8
2219 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2220 {
2221  VariableStatData vardata;
2222  Node *other = NULL;
2223  bool varonleft;
2224  ND_STATS *nd_stats = NULL;
2225 
2226  GBOX search_box;
2227  float8 selectivity = 0;
2228  Const *otherConst;
2229 
2230  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2231 
2232  if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2233  {
2234  POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2235  return DEFAULT_ND_SEL;
2236  }
2237 
2238  if (!IsA(other, Const))
2239  {
2240  ReleaseVariableStats(vardata);
2241  POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2242  return DEFAULT_ND_SEL;
2243  }
2244 
2245  otherConst = (Const*)other;
2246  if ((!otherConst) || otherConst->constisnull)
2247  {
2248  ReleaseVariableStats(vardata);
2249  POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2250  return DEFAULT_ND_SEL;
2251  }
2252 
2253  if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2254  {
2255  ReleaseVariableStats(vardata);
2256  POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2257  return 0.0;
2258  }
2259 
2260  if (!vardata.statsTuple)
2261  {
2262  POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2263  return DEFAULT_ND_SEL;
2264  }
2265 
2266  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2267  ReleaseVariableStats(vardata);
2268  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2269  pfree(nd_stats);
2270  return selectivity;
2271 }
2272 
2274 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2275 {
2276  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2277  // Oid operator_oid = PG_GETARG_OID(1);
2278  List *args = (List *) PG_GETARG_POINTER(2);
2279  int varRelid = PG_GETARG_INT32(3);
2280  int mode = PG_GETARG_INT32(4);
2281  float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2282  POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2283  PG_RETURN_FLOAT8(selectivity);
2284 }
2285 
2286 
2287 
2294 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2295 {
2296  char *nsp = NULL;
2297  char *tbl = NULL;
2298  text *col = NULL;
2299  char *nsp_tbl = NULL;
2300  Oid tbl_oid, idx_oid = 0;
2301  ND_STATS *nd_stats;
2302  GBOX *gbox = NULL;
2303  bool only_parent = false;
2304  int key_type, att_num;
2305  size_t sz;
2306 
2307  /* We need to initialize the internal cache to access it later via postgis_oid() */
2308  postgis_initialize_cache();
2309 
2310  if ( PG_NARGS() == 4 )
2311  {
2312  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2313  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2314  col = PG_GETARG_TEXT_P(2);
2315  only_parent = PG_GETARG_BOOL(3);
2316  sz = strlen(nsp) + strlen(tbl) + 6;
2317  nsp_tbl = palloc(sz);
2318  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2319  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2320  pfree(nsp_tbl);
2321  }
2322  else if ( PG_NARGS() == 3 )
2323  {
2324  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2325  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2326  col = PG_GETARG_TEXT_P(2);
2327  sz = strlen(nsp) + strlen(tbl) + 6;
2328  nsp_tbl = palloc(sz);
2329  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2330  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2331  pfree(nsp_tbl);
2332  }
2333  else if ( PG_NARGS() == 2 )
2334  {
2335  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2336  col = PG_GETARG_TEXT_P(1);
2337  sz = strlen(tbl) + 3;
2338  nsp_tbl = palloc(sz);
2339  snprintf(nsp_tbl, sz, "\"%s\"", tbl);
2340  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2341  pfree(nsp_tbl);
2342  }
2343  else
2344  {
2345  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2346  PG_RETURN_NULL();
2347  }
2348 
2349  /* Read the extent from the head of the spatial index, if there is one */
2350 
2351  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2352  if (idx_oid)
2353  {
2354  /* TODO: how about only_parent ? */
2355  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2356  POSTGIS_DEBUGF(2, "index for \"%s.%s\" exists, reading gbox from there", tbl, text_to_cstring(col));
2357  if ( ! gbox ) PG_RETURN_NULL();
2358  }
2359  else
2360  {
2361  POSTGIS_DEBUGF(2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2362 
2363  /* Fall back to reading the stats, if no index is found */
2364 
2365  /* Estimated extent only returns 2D bounds, so use mode 2 */
2366  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2367 
2368  /* Error out on no stats */
2369  if ( ! nd_stats ) {
2370  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2371  PG_RETURN_NULL();
2372  }
2373 
2374  /* Construct the box */
2375  gbox = palloc(sizeof(GBOX));
2376  FLAGS_SET_GEODETIC(gbox->flags, 0);
2377  FLAGS_SET_Z(gbox->flags, 0);
2378  FLAGS_SET_M(gbox->flags, 0);
2379  gbox->xmin = nd_stats->extent.min[0];
2380  gbox->xmax = nd_stats->extent.max[0];
2381  gbox->ymin = nd_stats->extent.min[1];
2382  gbox->ymax = nd_stats->extent.max[1];
2383  pfree(nd_stats);
2384  }
2385 
2386  PG_RETURN_POINTER(gbox);
2387 }
2388 
2396 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2397 {
2398  if ( PG_NARGS() == 3 )
2399  {
2400  PG_RETURN_DATUM(
2401  DirectFunctionCall3(gserialized_estimated_extent,
2402  PG_GETARG_DATUM(0),
2403  PG_GETARG_DATUM(1),
2404  PG_GETARG_DATUM(2)));
2405  }
2406  else if ( PG_NARGS() == 2 )
2407  {
2408  PG_RETURN_DATUM(
2409  DirectFunctionCall2(gserialized_estimated_extent,
2410  PG_GETARG_DATUM(0),
2411  PG_GETARG_DATUM(1)));
2412  }
2413 
2414  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2415  PG_RETURN_NULL();
2416 }
2417 
2418 /************************************************************************/
2419 
2420 static Oid
2421 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
2422 {
2423  Relation tbl_rel;
2424  ListCell *lc;
2425  List *idx_list;
2426  Oid result = InvalidOid;
2427  char *colname = text_to_cstring(col);
2428 
2429  /* Lookup our spatial index key types */
2430  Oid b2d_oid = postgis_oid(BOX2DFOID);
2431  Oid gdx_oid = postgis_oid(BOX3DOID);
2432 
2433  if (!(b2d_oid && gdx_oid))
2434  return InvalidOid;
2435 
2436  tbl_rel = RelationIdGetRelation(tbl_oid);
2437  idx_list = RelationGetIndexList(tbl_rel);
2438  RelationClose(tbl_rel);
2439 
2440  /* For each index associated with this table... */
2441  foreach(lc, idx_list)
2442  {
2443  Form_pg_class idx_form;
2444  HeapTuple idx_tup;
2445  int idx_relam;
2446  Oid idx_oid = lfirst_oid(lc);
2447 
2448  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2449  if (!HeapTupleIsValid(idx_tup))
2450  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2451  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2452  idx_relam = idx_form->relam;
2453  ReleaseSysCache(idx_tup);
2454 
2455  /* Does the index use a GIST access method? */
2456  if (idx_relam == GIST_AM_OID)
2457  {
2458  Form_pg_attribute att;
2459  Oid atttypid;
2460  int attnum;
2461  /* Is the index on the column name we are looking for? */
2462  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2463  ObjectIdGetDatum(idx_oid),
2464  PointerGetDatum(colname));
2465  if (!HeapTupleIsValid(att_tup))
2466  continue;
2467 
2468  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2469  atttypid = att->atttypid;
2470  attnum = att->attnum;
2471  ReleaseSysCache(att_tup);
2472 
2473  /* Is the column actually spatial? */
2474  if (b2d_oid == atttypid || gdx_oid == atttypid)
2475  {
2476  /* Save result, clean up, and break out */
2477  result = idx_oid;
2478  if (att_num)
2479  *att_num = attnum;
2480  if (key_type)
2481  *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
2482  break;
2483  }
2484  }
2485  }
2486  return result;
2487 }
2488 
2489 static GBOX *
2490 spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
2491 {
2492  BOX2DF *bounds_2df = NULL;
2493  GIDX *bounds_gidx = NULL;
2494  GBOX *gbox = NULL;
2495  Relation idx_rel;
2496  Buffer buffer;
2497  Page page;
2498  OffsetNumber offset;
2499  unsigned long offset_max;
2500 
2501  if (!idx_oid)
2502  return NULL;
2503 
2504  idx_rel = index_open(idx_oid, AccessShareLock);
2505  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2506  page = (Page) BufferGetPage(buffer);
2507  offset = FirstOffsetNumber;
2508  offset_max = PageGetMaxOffsetNumber(page);
2509  while (offset <= offset_max)
2510  {
2511  ItemId iid = PageGetItemId(page, offset);
2512  IndexTuple ituple;
2513  if (!iid)
2514  {
2515  ReleaseBuffer(buffer);
2516  index_close(idx_rel, AccessShareLock);
2517  return NULL;
2518  }
2519  ituple = (IndexTuple) PageGetItem(page, iid);
2520  if (!GistTupleIsInvalid(ituple))
2521  {
2522  bool isnull;
2523  Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2524  if (!isnull)
2525  {
2526  if (key_type == STATISTIC_KIND_2D)
2527  {
2528  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2529  if (bounds_2df)
2530  box2df_merge(bounds_2df, b);
2531  else
2532  bounds_2df = box2df_copy(b);
2533  }
2534  else
2535  {
2536  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2537  if (bounds_gidx)
2538  gidx_merge(&bounds_gidx, b);
2539  else
2540  bounds_gidx = gidx_copy(b);
2541  }
2542  }
2543  }
2544  offset++;
2545  }
2546 
2547  ReleaseBuffer(buffer);
2548  index_close(idx_rel, AccessShareLock);
2549 
2550  if (key_type == STATISTIC_KIND_2D && bounds_2df)
2551  {
2552  if (box2df_is_empty(bounds_2df))
2553  return NULL;
2554  gbox = gbox_new(0);
2555  box2df_to_gbox_p(bounds_2df, gbox);
2556  }
2557  else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
2558  {
2559  if (gidx_is_unknown(bounds_gidx))
2560  return NULL;
2561  gbox = gbox_new(0);
2562  gbox_from_gidx(bounds_gidx, gbox, 0);
2563  }
2564  else
2565  return NULL;
2566 
2567  return gbox;
2568 }
2569 
2570 /*
2571 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2572  RETURNS box2d
2573  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2574  LANGUAGE 'c' STABLE STRICT;
2575 */
2576 
2578 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2579 {
2580  GBOX *gbox = NULL;
2581  int key_type;
2582  int att_num;
2583  Oid tbl_oid = PG_GETARG_DATUM(0);
2584  text *col = PG_GETARG_TEXT_P(1);
2585  Oid idx_oid;
2586 
2587  if(!tbl_oid)
2588  PG_RETURN_NULL();
2589 
2590  /* We need to initialize the internal cache to access it later via postgis_oid() */
2591  postgis_initialize_cache();
2592 
2593  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2594  if (!idx_oid)
2595  PG_RETURN_NULL();
2596 
2597  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2598  if (!gbox)
2599  PG_RETURN_NULL();
2600  else
2601  PG_RETURN_POINTER(gbox);
2602 }
2603 
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition: cu_print.c:262
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: gbox.c:32
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: gbox.c:392
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
#define STATISTIC_SLOT_ND
static char * nd_stats_to_grid(const ND_STATS *stats)
Create a printable view of the ND_STATS histogram.
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static int range_full(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Expand the bounds of target to include source.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
#define MAX_NUM_BINS
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
#define BIN_MIN_SIZE
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:96
#define FLAGS_GET_Z(flags)
Definition: liblwgeom.h:165
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:166
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:175
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:173
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:172
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:168
This library is the generic geometry handling section of PostGIS.
#define str(s)
args
Definition: ovdump.py:45
Datum buffer(PG_FUNCTION_ARGS)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:247
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:85
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:133
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:105
double ymax
Definition: liblwgeom.h:357
double zmax
Definition: liblwgeom.h:359
double xmax
Definition: liblwgeom.h:355
double zmin
Definition: liblwgeom.h:358
double mmax
Definition: liblwgeom.h:361
double ymin
Definition: liblwgeom.h:356
double xmin
Definition: liblwgeom.h:354
double mmin
Definition: liblwgeom.h:360
lwflags_t flags
Definition: liblwgeom.h:353
AnalyzeAttrComputeStatsFunc std_compute_stats
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.