PostGIS  2.5.7dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
80 #endif
81 #include "utils/builtins.h"
82 #include "utils/datum.h"
83 #include "utils/snapmgr.h"
84 #include "utils/fmgroids.h"
85 #include "funcapi.h"
86 #include "access/heapam.h"
87 #include "catalog/pg_type.h"
88 #include "access/relscan.h"
89 
90 #include "executor/spi.h"
91 #include "fmgr.h"
92 #include "commands/vacuum.h"
93 #if PG_VERSION_NUM < 120000
94 #include "nodes/relation.h"
95 #else
96 #include "nodes/pathnodes.h"
97 #endif
98 #include "parser/parsetree.h"
99 #include "utils/array.h"
100 #include "utils/lsyscache.h"
101 #include "utils/builtins.h"
102 #include "utils/syscache.h"
103 #include "utils/rel.h"
104 #include "utils/selfuncs.h"
105 
106 #include "../postgis_config.h"
107 
108 #if POSTGIS_PGSQL_VERSION >= 93
109  #include "access/htup_details.h"
110 #endif
111 
112 #include "stringbuffer.h"
113 #include "liblwgeom.h"
114 #include "lwgeom_pg.h" /* For debugging macros. */
115 #include "gserialized_gist.h" /* For index common functions */
116 
117 #include <math.h>
118 #if HAVE_IEEEFP_H
119 #include <ieeefp.h>
120 #endif
121 #include <float.h>
122 #include <string.h>
123 #include <stdio.h>
124 #include <errno.h>
125 #include <ctype.h>
126 
127 
128 /************************************************************************/
129 
130 
131 /* Fall back to older finite() if necessary */
132 #ifndef HAVE_ISFINITE
133 # ifdef HAVE_GNU_ISFINITE
134 # define _GNU_SOURCE
135 # else
136 # define isfinite finite
137 # endif
138 #endif
139 
140 
141 /* Prototypes */
142 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
143 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
144 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
145 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
146 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
147 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
148 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
149 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
150 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
151 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
152 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
153 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
154 
155 /* Local prototypes */
156 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
157 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
158 
159 
160 /* Old Prototype */
161 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
162 
163 /*
164 * Assign a number to the n-dimensional statistics kind
165 *
166 * tgl suggested:
167 *
168 * 1-100: reserved for assignment by the core Postgres project
169 * 100-199: reserved for assignment by PostGIS
170 * 200-9999: reserved for other globally-known stats kinds
171 * 10000-32767: reserved for private site-local use
172 */
173 #define STATISTIC_KIND_ND 102
174 #define STATISTIC_KIND_2D 103
175 #define STATISTIC_SLOT_ND 0
176 #define STATISTIC_SLOT_2D 1
177 
178 /*
179 * The SD factor restricts the side of the statistics histogram
180 * based on the standard deviation of the extent of the data.
181 * SDFACTOR is the number of standard deviations from the mean
182 * the histogram will extend.
183 */
184 #define SDFACTOR 3.25
185 
191 #define ND_DIMS 4
192 
199 #define MIN_DIMENSION_WIDTH 0.000000001
200 
205 #define MAX_DIMENSION_WIDTH 1.0E+20
206 
210 #define DEFAULT_ND_SEL 0.0001
211 #define DEFAULT_ND_JOINSEL 0.001
212 
216 #define FALLBACK_ND_SEL 0.2
217 #define FALLBACK_ND_JOINSEL 0.3
218 
224 typedef struct ND_BOX_T
225 {
226  float4 min[ND_DIMS];
227  float4 max[ND_DIMS];
229 
233 typedef struct ND_IBOX_T
234 {
235  int min[ND_DIMS];
236  int max[ND_DIMS];
238 
239 
246 typedef struct ND_STATS_T
247 {
248  /* Dimensionality of the histogram. */
249  float4 ndims;
250 
251  /* Size of n-d histogram in each dimension. */
252  float4 size[ND_DIMS];
253 
254  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
256 
257  /* How many rows in the table itself? */
259 
260  /* How many rows were in the sample that built this histogram? */
262 
263  /* How many not-Null/Empty features were in the sample? */
265 
266  /* How many features actually got sampled in the histogram? */
268 
269  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
271 
272  /* How many cells did those histogram features cover? */
273  /* Since we are pro-rating coverage, this number should */
274  /* now always equal histogram_features */
276 
277  /* Variable length # of floats for histogram */
278  float4 value[1];
280 
281 
282 
283 
290 static int
291 gbox_ndims(const GBOX* gbox)
292 {
293  int dims = 2;
294  if ( FLAGS_GET_GEODETIC(gbox->flags) )
295  return 3;
296  if ( FLAGS_GET_Z(gbox->flags) )
297  dims++;
298  if ( FLAGS_GET_M(gbox->flags) )
299  dims++;
300  return dims;
301 }
302 
308 static int
309 text_p_get_mode(const text *txt)
310 {
311  int mode = 2;
312  char *modestr;
313  if (VARSIZE(txt) - VARHDRSZ <= 0)
314  return mode;
315  modestr = (char*)VARDATA(txt);
316  if ( modestr[0] == 'N' )
317  mode = 0;
318  return mode;
319 }
320 
321 
325 static int
326 cmp_int (const void *a, const void *b)
327 {
328  int ia = *((const int*)a);
329  int ib = *((const int*)b);
330 
331  if ( ia == ib )
332  return 0;
333  else if ( ia > ib )
334  return 1;
335  else
336  return -1;
337 }
338 
343 static int
344 range_quintile(int *vals, int nvals)
345 {
346  qsort(vals, nvals, sizeof(int), cmp_int);
347  return vals[4*nvals/5] - vals[nvals/5];
348 }
349 
353 static double
354 total_double(const double *vals, int nvals)
355 {
356  int i;
357  float total = 0;
358  /* Calculate total */
359  for ( i = 0; i < nvals; i++ )
360  total += vals[i];
361 
362  return total;
363 }
364 
365 #if POSTGIS_DEBUG_LEVEL >= 3
366 
370 static int
371 total_int(const int *vals, int nvals)
372 {
373  int i;
374  int total = 0;
375  /* Calculate total */
376  for ( i = 0; i < nvals; i++ )
377  total += vals[i];
378 
379  return total;
380 }
381 
385 static double
386 avg(const int *vals, int nvals)
387 {
388  int t = total_int(vals, nvals);
389  return (double)t / (double)nvals;
390 }
391 
395 static double
396 stddev(const int *vals, int nvals)
397 {
398  int i;
399  double sigma2 = 0;
400  double mean = avg(vals, nvals);
401 
402  /* Calculate sigma2 */
403  for ( i = 0; i < nvals; i++ )
404  {
405  double v = (double)(vals[i]);
406  sigma2 += (mean - v) * (mean - v);
407  }
408  return sqrt(sigma2 / nvals);
409 }
410 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
411 
416 static int
417 nd_stats_value_index(const ND_STATS *stats, int *indexes)
418 {
419  int d;
420  int accum = 1, vdx = 0;
421 
422  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
423  /* n-d histogram coordinate implies. */
424  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
425  for ( d = 0; d < (int)(stats->ndims); d++ )
426  {
427  int size = (int)(stats->size[d]);
428  if ( indexes[d] < 0 || indexes[d] >= size )
429  {
430  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
431  return -1;
432  }
433  vdx += indexes[d] * accum;
434  accum *= size;
435  }
436  return vdx;
437 }
438 
442 static char*
443 nd_box_to_json(const ND_BOX *nd_box, int ndims)
444 {
445  char *rv;
446  int i;
448 
449  stringbuffer_append(sb, "{\"min\":[");
450  for ( i = 0; i < ndims; i++ )
451  {
452  if ( i ) stringbuffer_append(sb, ",");
453  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
454  }
455  stringbuffer_append(sb, "],\"max\":[");
456  for ( i = 0; i < ndims; i++ )
457  {
458  if ( i ) stringbuffer_append(sb, ",");
459  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
460  }
461  stringbuffer_append(sb, "]}");
462 
465  return rv;
466 }
467 
468 
473 static char*
474 nd_stats_to_json(const ND_STATS *nd_stats)
475 {
476  char *json_extent, *str;
477  int d;
479  int ndims = (int)roundf(nd_stats->ndims);
480 
481  stringbuffer_append(sb, "{");
482  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
483 
484  /* Size */
485  stringbuffer_append(sb, "\"size\":[");
486  for ( d = 0; d < ndims; d++ )
487  {
488  if ( d ) stringbuffer_append(sb, ",");
489  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
490  }
491  stringbuffer_append(sb, "],");
492 
493  /* Extent */
494  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
495  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
496  pfree(json_extent);
497 
498  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
499  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
500  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
501  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
502  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
503  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
504  stringbuffer_append(sb, "}");
505 
506  str = stringbuffer_getstringcopy(sb);
508  return str;
509 }
510 
511 
517 // static char*
518 // nd_stats_to_grid(const ND_STATS *stats)
519 // {
520 // char *rv;
521 // int j, k;
522 // int sizex = (int)roundf(stats->size[0]);
523 // int sizey = (int)roundf(stats->size[1]);
524 // stringbuffer_t *sb = stringbuffer_create();
525 //
526 // for ( k = 0; k < sizey; k++ )
527 // {
528 // for ( j = 0; j < sizex; j++ )
529 // {
530 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
531 // }
532 // stringbuffer_append(sb, "\n");
533 // }
534 //
535 // rv = stringbuffer_getstringcopy(sb);
536 // stringbuffer_destroy(sb);
537 // return rv;
538 // }
539 
540 
542 static int
543 nd_box_merge(const ND_BOX *source, ND_BOX *target)
544 {
545  int d;
546  for ( d = 0; d < ND_DIMS; d++ )
547  {
548  target->min[d] = Min(target->min[d], source->min[d]);
549  target->max[d] = Max(target->max[d], source->max[d]);
550  }
551  return true;
552 }
553 
555 static int
557 {
558  memset(a, 0, sizeof(ND_BOX));
559  return true;
560 }
561 
567 static int
569 {
570  int d;
571  for ( d = 0; d < ND_DIMS; d++ )
572  {
573  a->min[d] = FLT_MAX;
574  a->max[d] = -1 * FLT_MAX;
575  }
576  return true;
577 }
578 
580 static void
581 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
582 {
583  int d = 0;
584  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
585 
586  nd_box_init(nd_box);
587  nd_box->min[d] = gbox->xmin;
588  nd_box->max[d] = gbox->xmax;
589  d++;
590  nd_box->min[d] = gbox->ymin;
591  nd_box->max[d] = gbox->ymax;
592  d++;
593  if ( FLAGS_GET_GEODETIC(gbox->flags) )
594  {
595  nd_box->min[d] = gbox->zmin;
596  nd_box->max[d] = gbox->zmax;
597  return;
598  }
599  if ( FLAGS_GET_Z(gbox->flags) )
600  {
601  nd_box->min[d] = gbox->zmin;
602  nd_box->max[d] = gbox->zmax;
603  d++;
604  }
605  if ( FLAGS_GET_M(gbox->flags) )
606  {
607  nd_box->min[d] = gbox->mmin;
608  nd_box->max[d] = gbox->mmax;
609  d++;
610  }
611  return;
612 }
613 
617 static int
618 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
619 {
620  int d;
621  for ( d = 0; d < ndims; d++ )
622  {
623  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
624  return false;
625  }
626  return true;
627 }
628 
632 static int
633 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
634 {
635  int d;
636  for ( d = 0; d < ndims; d++ )
637  {
638  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
639  return false;
640  }
641  return true;
642 }
643 
648 static int
649 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
650 {
651  int d;
652  double size;
653  for ( d = 0; d < ND_DIMS; d++ )
654  {
655  size = nd_box->max[d] - nd_box->min[d];
656  /* Avoid expanding boxes that are either too wide or too narrow*/
657  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
658  continue;
659  nd_box->min[d] -= size * expansion_factor / 2;
660  nd_box->max[d] += size * expansion_factor / 2;
661  }
662  return true;
663 }
664 
669 static inline int
670 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
671 {
672  int d;
673 
674  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
675 
676  /* Initialize ibox */
677  memset(nd_ibox, 0, sizeof(ND_IBOX));
678 
679  /* In each dimension... */
680  for ( d = 0; d < nd_stats->ndims; d++ )
681  {
682  double smin = nd_stats->extent.min[d];
683  double smax = nd_stats->extent.max[d];
684  double width = smax - smin;
685 
686  if (width < MIN_DIMENSION_WIDTH)
687  {
688  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
689  }
690  else
691  {
692  int size = (int)roundf(nd_stats->size[d]);
693 
694  /* ... find cells the box overlaps with in this dimension */
695  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
696  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
697 
698  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
699  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
700 
701  /* Push any out-of range values into range */
702  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
703  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
704  }
705  }
706  return true;
707 }
708 
712 static inline double
713 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
714 {
715  int d;
716  bool covered = true;
717  double ivol = 1.0;
718  double vol2 = 1.0;
719  double vol1 = 1.0;
720 
721  for ( d = 0 ; d < ndims; d++ )
722  {
723  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
724  return 0.0; /* Disjoint */
725 
726  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
727  covered = false;
728  }
729 
730  if ( covered )
731  return 1.0;
732 
733  for ( d = 0; d < ndims; d++ )
734  {
735  double width1 = b1->max[d] - b1->min[d];
736  double width2 = b2->max[d] - b2->min[d];
737  double imin, imax, iwidth;
738 
739  vol1 *= width1;
740  vol2 *= width2;
741 
742  imin = Max(b1->min[d], b2->min[d]);
743  imax = Min(b1->max[d], b2->max[d]);
744  iwidth = imax - imin;
745  iwidth = Max(0.0, iwidth);
746 
747  ivol *= iwidth;
748  }
749 
750  if ( vol2 == 0.0 )
751  return vol2;
752 
753  return ivol / vol2;
754 }
755 
756 /* How many bins shall we use in figuring out the distribution? */
757 #define NUM_BINS 50
758 
774 static int
775 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
776 {
777  int d, i, k, range;
778  int counts[NUM_BINS];
779  double smin, smax; /* Spatial min, spatial max */
780  double swidth; /* Spatial width of dimension */
781 #if POSTGIS_DEBUG_LEVEL >= 3
782  double average, sdev, sdev_ratio;
783 #endif
784  int bmin, bmax; /* Bin min, bin max */
785  const ND_BOX *ndb;
786 
787  /* For each dimension... */
788  for ( d = 0; d < ndims; d++ )
789  {
790  /* Initialize counts for this dimension */
791  memset(counts, 0, sizeof(counts));
792 
793  smin = extent->min[d];
794  smax = extent->max[d];
795  swidth = smax - smin;
796 
797  /* Don't try and calculate distribution of overly narrow */
798  /* or overly wide dimensions. Here we're being pretty geographical, */
799  /* expecting "normal" planar or geographic coordinates. */
800  /* Otherwise we have to "handle" +/- Inf bounded features and */
801  /* the assumptions needed for that are as bad as this hack. */
802  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
803  {
804  distribution[d] = 0;
805  continue;
806  }
807 
808  /* Sum up the overlaps of each feature with the dimensional bins */
809  for ( i = 0; i < num_boxes; i++ )
810  {
811  double minoffset, maxoffset;
812 
813  /* Skip null entries */
814  ndb = nd_boxes[i];
815  if ( ! ndb ) continue;
816 
817  /* Where does box fall relative to the working range */
818  minoffset = ndb->min[d] - smin;
819  maxoffset = ndb->max[d] - smin;
820 
821  /* Skip boxes that are outside our working range */
822  if ( minoffset < 0 || minoffset > swidth ||
823  maxoffset < 0 || maxoffset > swidth )
824  {
825  continue;
826  }
827 
828  /* What bins does this range correspond to? */
829  bmin = floor(NUM_BINS * minoffset / swidth);
830  bmax = floor(NUM_BINS * maxoffset / swidth);
831 
832  /* Should only happen when maxoffset==swidth */
833  bmax = bmax >= NUM_BINS ? NUM_BINS-1 : bmax;
834 
835  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
836 
837  /* Increment the counts in all the bins this feature overlaps */
838  for ( k = bmin; k <= bmax; k++ )
839  {
840  counts[k] += 1;
841  }
842 
843  }
844 
845  /* How dispersed is the distribution of features across bins? */
846  range = range_quintile(counts, NUM_BINS);
847 
848 #if POSTGIS_DEBUG_LEVEL >= 3
849  average = avg(counts, NUM_BINS);
850  sdev = stddev(counts, NUM_BINS);
851  sdev_ratio = sdev/average;
852 
853  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
854  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
855  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
856  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
857 #endif
858 
859  distribution[d] = range;
860  }
861 
862  return true;
863 }
864 
870 static inline int
871 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
872 {
873  int d = 0;
874 
875  while ( d < ndims )
876  {
877  if ( counter[d] < ibox->max[d] )
878  {
879  counter[d] += 1;
880  break;
881  }
882  counter[d] = ibox->min[d];
883  d++;
884  }
885  /* That's it, cannot increment any more! */
886  if ( d == ndims )
887  return false;
888 
889  /* Increment complete! */
890  return true;
891 }
892 
893 static ND_STATS*
894 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
895 {
896  int stats_kind = STATISTIC_KIND_ND;
897  int rv;
898  ND_STATS *nd_stats;
899 
900  /* If we're in 2D mode, set the kind appropriately */
901  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
902 
903  /* Then read the geom status histogram from that */
904 
905 #if POSTGIS_PGSQL_VERSION < 100
906  {
907  float4 *floatptr;
908  int nvalues;
909 
910  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
911  NULL, NULL, NULL, &floatptr, &nvalues);
912 
913  if ( ! rv ) {
914  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
915  return NULL;
916  }
917 
918  /* Clone the stats here so we can release the attstatsslot immediately */
919  nd_stats = palloc(sizeof(float) * nvalues);
920  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
921 
922  /* Clean up */
923  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
924  }
925 #else /* PostgreSQL 10 or higher */
926  {
927  AttStatsSlot sslot;
928  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
929  ATTSTATSSLOT_NUMBERS);
930  if ( ! rv ) {
931  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
932  return NULL;
933  }
934 
935  /* Clone the stats here so we can release the attstatsslot immediately */
936  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
937  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
938 
939  free_attstatsslot(&sslot);
940  }
941 #endif
942 
943  return nd_stats;
944 }
945 
950 static ND_STATS*
951 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
952 {
953  HeapTuple stats_tuple = NULL;
954  ND_STATS *nd_stats;
955 
956  /* First pull the stats tuple for the whole tree */
957  if ( ! only_parent )
958  {
959  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
960  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
961  if ( stats_tuple )
962  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
963  }
964  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
965  if ( only_parent || ! stats_tuple )
966  {
967  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
968  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
969  if ( stats_tuple )
970  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
971  }
972  if ( ! stats_tuple )
973  {
974  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
975  return NULL;
976  }
977 
978  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
979  ReleaseSysCache(stats_tuple);
980  if ( ! nd_stats )
981  {
982  POSTGIS_DEBUGF(2,
983  "histogram for attribute %d of table \"%s\" does not exist?",
984  att_num, get_rel_name(table_oid));
985  }
986 
987  return nd_stats;
988 }
989 
998 static ND_STATS*
999 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
1000 {
1001  const char *att_name = text_to_cstring(att_text);
1002  AttrNumber att_num;
1003 
1004  /* We know the name? Look up the num */
1005  if ( att_text )
1006  {
1007  /* Get the attribute number */
1008  att_num = get_attnum(table_oid, att_name);
1009  if ( ! att_num ) {
1010  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1011  return NULL;
1012  }
1013  }
1014  else
1015  {
1016  elog(ERROR, "attribute name is null");
1017  return NULL;
1018  }
1019 
1020  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1021 }
1022 
1036 static float8
1038 {
1039  int ncells1, ncells2;
1040  int ndims1, ndims2, ndims;
1041  double ntuples_max;
1042  double ntuples_not_null1, ntuples_not_null2;
1043 
1044  ND_BOX extent1, extent2;
1045  ND_IBOX ibox1, ibox2;
1046  int at1[ND_DIMS];
1047  int at2[ND_DIMS];
1048  double min1[ND_DIMS];
1049  double width1[ND_DIMS];
1050  double cellsize1[ND_DIMS];
1051  int size2[ND_DIMS];
1052  double min2[ND_DIMS];
1053  double width2[ND_DIMS];
1054  double cellsize2[ND_DIMS];
1055  int size1[ND_DIMS];
1056  int d;
1057  double val = 0;
1058  float8 selectivity;
1059 
1060  /* Drop out on null inputs */
1061  if ( ! ( s1 && s2 ) )
1062  {
1063  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1064  return FALLBACK_ND_SEL;
1065  }
1066 
1067  /* We need to know how many cells each side has... */
1068  ncells1 = (int)roundf(s1->histogram_cells);
1069  ncells2 = (int)roundf(s2->histogram_cells);
1070 
1071  /* ...so that we can drive the summation loop with the smaller histogram. */
1072  if ( ncells1 > ncells2 )
1073  {
1074  const ND_STATS *stats_tmp = s1;
1075  s1 = s2;
1076  s2 = stats_tmp;
1077  }
1078 
1079  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1080  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1081 
1082  /* Re-read that info after the swap */
1083  ncells1 = (int)roundf(s1->histogram_cells);
1084  ncells2 = (int)roundf(s2->histogram_cells);
1085 
1086  /* Q: What's the largest possible join size these relations can create? */
1087  /* A: The product of the # of non-null rows in each relation. */
1088  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1089  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1090  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1091 
1092  /* Get the ndims as ints */
1093  ndims1 = (int)roundf(s1->ndims);
1094  ndims2 = (int)roundf(s2->ndims);
1095  ndims = Max(ndims1, ndims2);
1096 
1097  /* Get the extents */
1098  extent1 = s1->extent;
1099  extent2 = s2->extent;
1100 
1101  /* If relation stats do not intersect, join is very very selective. */
1102  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1103  {
1104  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1105  PG_RETURN_FLOAT8(0.0);
1106  }
1107 
1108  /*
1109  * First find the index range of the part of the smaller
1110  * histogram that overlaps the larger one.
1111  */
1112  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1113  {
1114  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1115  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1116  }
1117 
1118  /* Initialize counters / constants on s1 */
1119  for ( d = 0; d < ndims1; d++ )
1120  {
1121  at1[d] = ibox1.min[d];
1122  min1[d] = s1->extent.min[d];
1123  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1124  size1[d] = (int)roundf(s1->size[d]);
1125  cellsize1[d] = width1[d] / size1[d];
1126  }
1127 
1128  /* Initialize counters / constants on s2 */
1129  for ( d = 0; d < ndims2; d++ )
1130  {
1131  min2[d] = s2->extent.min[d];
1132  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1133  size2[d] = (int)roundf(s2->size[d]);
1134  cellsize2[d] = width2[d] / size2[d];
1135  }
1136 
1137  /* For each affected cell of s1... */
1138  do
1139  {
1140  double val1;
1141  /* Construct the bounds of this cell */
1142  ND_BOX nd_cell1;
1143  nd_box_init(&nd_cell1);
1144  for ( d = 0; d < ndims1; d++ )
1145  {
1146  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1147  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1148  }
1149 
1150  /* Find the cells of s2 that cell1 overlaps.. */
1151  nd_box_overlap(s2, &nd_cell1, &ibox2);
1152 
1153  /* Initialize counter */
1154  for ( d = 0; d < ndims2; d++ )
1155  {
1156  at2[d] = ibox2.min[d];
1157  }
1158 
1159  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1160 
1161  /* Get the value at this cell */
1162  val1 = s1->value[nd_stats_value_index(s1, at1)];
1163 
1164  /* For each overlapped cell of s2... */
1165  do
1166  {
1167  double ratio2;
1168  double val2;
1169 
1170  /* Construct the bounds of this cell */
1171  ND_BOX nd_cell2;
1172  nd_box_init(&nd_cell2);
1173  for ( d = 0; d < ndims2; d++ )
1174  {
1175  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1176  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1177  }
1178 
1179  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1180 
1181  /* Calculate overlap ratio of the cells */
1182  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1183 
1184  /* Multiply the cell counts, scaled by overlap ratio */
1185  val2 = s2->value[nd_stats_value_index(s2, at2)];
1186  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1187  val += val1 * (val2 * ratio2);
1188  }
1189  while ( nd_increment(&ibox2, ndims2, at2) );
1190 
1191  }
1192  while( nd_increment(&ibox1, ndims1, at1) );
1193 
1194  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1195 
1196  /*
1197  * In order to compare our total cell count "val" to the
1198  * ntuples_max, we need to scale val up to reflect a full
1199  * table estimate. So, multiply by ratio of table size to
1200  * sample size.
1201  */
1202  val *= (s1->table_features / s1->sample_features);
1203  val *= (s2->table_features / s2->sample_features);
1204 
1205  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1206 
1207  /*
1208  * Because the cell counts are over-determined due to
1209  * double counting of features that overlap multiple cells
1210  * (see the compute_gserialized_stats routine)
1211  * we also have to scale our cell count "val" *down*
1212  * to adjust for the double counting.
1213  */
1214 // val /= (s1->cells_covered / s1->histogram_features);
1215 // val /= (s2->cells_covered / s2->histogram_features);
1216 
1217  /*
1218  * Finally, the selectivity is the estimated number of
1219  * rows to be returned divided by the maximum possible
1220  * number of rows that can be returned.
1221  */
1222  selectivity = val / ntuples_max;
1223 
1224  /* Guard against over-estimates and crazy numbers :) */
1225  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1226  {
1227  selectivity = DEFAULT_ND_JOINSEL;
1228  }
1229  else if ( selectivity > 1.0 )
1230  {
1231  selectivity = 1.0;
1232  }
1233 
1234  return selectivity;
1235 }
1236 
1242 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1243 {
1244  PG_RETURN_DATUM(DirectFunctionCall5(
1246  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1247  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1248  Int32GetDatum(0) /* ND mode */
1249  ));
1250 }
1251 
1257 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1258 {
1259  PG_RETURN_DATUM(DirectFunctionCall5(
1261  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1262  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1263  Int32GetDatum(2) /* 2D mode */
1264  ));
1265 }
1266 
1276 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1277 {
1278  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1279  /* Oid operator = PG_GETARG_OID(1); */
1280  List *args = (List *) PG_GETARG_POINTER(2);
1281  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1282  int mode = PG_GETARG_INT32(4);
1283 
1284  Node *arg1, *arg2;
1285  Var *var1, *var2;
1286  Oid relid1, relid2;
1287 
1288  ND_STATS *stats1, *stats2;
1289  float8 selectivity;
1290 
1291  /* Only respond to an inner join/unknown context join */
1292  if (jointype != JOIN_INNER)
1293  {
1294  elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
1295  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1296  }
1297 
1298  /* Find Oids of the geometry columns we are working with */
1299  arg1 = (Node*) linitial(args);
1300  arg2 = (Node*) lsecond(args);
1301  var1 = (Var*) arg1;
1302  var2 = (Var*) arg2;
1303 
1304  /* We only do column joins right now, no functional joins */
1305  /* TODO: handle g1 && ST_Expand(g2) */
1306  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1307  {
1308  elog(DEBUG1, "%s called with arguments that are not column references", __func__);
1309  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1310  }
1311 
1312  /* What are the Oids of our tables/relations? */
1313  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1314  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1315 
1316  POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1317  get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1318 
1319  /* Pull the stats from the stats system. */
1320  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1321  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1322 
1323  /* If we can't get stats, we have to stop here! */
1324  if ( ! stats1 )
1325  {
1326  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
1327  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1328  }
1329  else if ( ! stats2 )
1330  {
1331  POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
1332  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1333  }
1334 
1335  selectivity = estimate_join_selectivity(stats1, stats2);
1336  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1337 
1338  pfree(stats1);
1339  pfree(stats2);
1340  PG_RETURN_FLOAT8(selectivity);
1341 }
1342 
1343 
1344 
1345 
1364 static void
1365 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1366  int sample_rows, double total_rows, int mode)
1367 {
1368  MemoryContext old_context;
1369  int d, i; /* Counters */
1370  int notnull_cnt = 0; /* # not null rows in the sample */
1371  int null_cnt = 0; /* # null rows in the sample */
1372  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1373 
1374  ND_STATS *nd_stats; /* Our histogram */
1375  size_t nd_stats_size; /* Size to allocate */
1376 
1377  double total_width = 0; /* # of bytes used by sample */
1378  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1379  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1380 
1381  ND_BOX sum; /* Sum of extents of sample boxes */
1382  ND_BOX avg; /* Avg of extents of sample boxes */
1383  ND_BOX stddev; /* StdDev of extents of sample boxes */
1384 
1385  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1386  ND_BOX sample_extent; /* Extent of the raw sample */
1387  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1388  ND_BOX histo_extent; /* Spatial extent of the histogram */
1389  ND_BOX histo_extent_new; /* Temporary variable */
1390  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1391  int histo_cells; /* Number of cells in the histogram */
1392  int histo_cells_new = 1; /* Temporary variable */
1393 
1394  int ndims = 2; /* Dimensionality of the sample */
1395  int histo_ndims = 0; /* Dimensionality of the histogram */
1396  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1397  double total_distribution; /* Total of sample_distribution */
1398 
1399  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1400  int stats_kind; /* And this is what? (2D vs ND) */
1401 
1402  /* Initialize sum and stddev */
1403  nd_box_init(&sum);
1404  nd_box_init(&stddev);
1405  nd_box_init(&avg);
1406  nd_box_init(&histo_extent);
1407  nd_box_init(&histo_extent_new);
1408 
1409  /*
1410  * This is where gserialized_analyze_nd
1411  * should put its' custom parameters.
1412  */
1413  /* void *mystats = stats->extra_data; */
1414 
1415  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1416  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1417  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1418 
1419  /*
1420  * We might need less space, but don't think
1421  * its worth saving...
1422  */
1423  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1424 
1425  /*
1426  * First scan:
1427  * o read boxes
1428  * o find dimensionality of the sample
1429  * o find extent of the sample
1430  * o count null-infinite/not-null values
1431  * o compute total_width
1432  * o compute total features's box area (for avgFeatureArea)
1433  * o sum features box coordinates (for standard deviation)
1434  */
1435  for ( i = 0; i < sample_rows; i++ )
1436  {
1437  Datum datum;
1438  GSERIALIZED *geom;
1439  GBOX gbox;
1440  ND_BOX *nd_box;
1441  bool is_null;
1442  bool is_copy;
1443 
1444  datum = fetchfunc(stats, i, &is_null);
1445 
1446  /* Skip all NULLs. */
1447  if ( is_null )
1448  {
1449  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1450  null_cnt++;
1451  continue;
1452  }
1453 
1454  /* Read the bounds from the gserialized. */
1455  geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1456  is_copy = VARATT_IS_EXTENDED(datum);
1457  if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1458  {
1459  /* Skip empties too. */
1460  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1461  continue;
1462  }
1463 
1464  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1465  if ( mode == 2 )
1466  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1467 
1468  /* Check bounds for validity (finite and not NaN) */
1469  if ( ! gbox_is_valid(&gbox) )
1470  {
1471  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1472  continue;
1473  }
1474 
1475  /*
1476  * In N-D mode, set the ndims to the maximum dimensionality found
1477  * in the sample. Otherwise, leave at ndims == 2.
1478  */
1479  if ( mode != 2 )
1480  ndims = Max(gbox_ndims(&gbox), ndims);
1481 
1482  /* Convert gbox to n-d box */
1483  nd_box = palloc(sizeof(ND_BOX));
1484  nd_box_from_gbox(&gbox, nd_box);
1485 
1486  /* Cache n-d bounding box */
1487  sample_boxes[notnull_cnt] = nd_box;
1488 
1489  /* Initialize sample extent before merging first entry */
1490  if ( ! notnull_cnt )
1491  nd_box_init_bounds(&sample_extent);
1492 
1493  /* Add current sample to overall sample extent */
1494  nd_box_merge(nd_box, &sample_extent);
1495 
1496  /* How many bytes does this sample use? */
1497  total_width += VARSIZE(geom);
1498 
1499  /* Add bounds coordinates to sums for stddev calculation */
1500  for ( d = 0; d < ndims; d++ )
1501  {
1502  sum.min[d] += nd_box->min[d];
1503  sum.max[d] += nd_box->max[d];
1504  }
1505 
1506  /* Increment our "good feature" count */
1507  notnull_cnt++;
1508 
1509  /* Free up memory if our sample geometry was copied */
1510  if ( is_copy )
1511  pfree(geom);
1512 
1513  /* Give backend a chance of interrupting us */
1514  vacuum_delay_point();
1515  }
1516 
1517  /*
1518  * We'll build a histogram having stats->attr->attstattarget cells
1519  * on each side, within reason... we'll use ndims*10000 as the
1520  * maximum number of cells.
1521  * Also, if we're sampling a relatively small table, we'll try to ensure that
1522  * we have an average of 5 features for each cell so the histogram isn't
1523  * so sparse.
1524  */
1525  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1526  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1527  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1528  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1529  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1530 
1531  /* If there's no useful features, we can't work out stats */
1532  if ( ! notnull_cnt )
1533  {
1534  elog(NOTICE, "no non-null/empty features, unable to compute statistics");
1535  stats->stats_valid = false;
1536  return;
1537  }
1538 
1539  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1540 
1541  /*
1542  * Second scan:
1543  * o compute standard deviation
1544  */
1545  for ( d = 0; d < ndims; d++ )
1546  {
1547  /* Calculate average bounds values */
1548  avg.min[d] = sum.min[d] / notnull_cnt;
1549  avg.max[d] = sum.max[d] / notnull_cnt;
1550 
1551  /* Calculate standard deviation for this dimension bounds */
1552  for ( i = 0; i < notnull_cnt; i++ )
1553  {
1554  const ND_BOX *ndb = sample_boxes[i];
1555  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1556  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1557  }
1558  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1559  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1560 
1561  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1562  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1563  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1564  }
1565 
1566  /*
1567  * Third scan:
1568  * o skip hard deviants
1569  * o compute new histogram box
1570  */
1571  nd_box_init_bounds(&histo_extent_new);
1572  for ( i = 0; i < notnull_cnt; i++ )
1573  {
1574  const ND_BOX *ndb = sample_boxes[i];
1575  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1576  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1577  {
1578  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1579  sample_boxes[i] = NULL;
1580  continue;
1581  }
1582  /* Expand our new box to fit all the other features. */
1583  nd_box_merge(ndb, &histo_extent_new);
1584  }
1585  /*
1586  * Expand the box slightly (1%) to avoid edge effects
1587  * with objects that are on the boundary
1588  */
1589  nd_box_expand(&histo_extent_new, 0.01);
1590  histo_extent = histo_extent_new;
1591 
1592  /*
1593  * How should we allocate our histogram cells to the
1594  * different dimensions? We can't do it by raw dimensional width,
1595  * because in x/y/z space, the z can have different units
1596  * from the x/y. Similarly for x/y/t space.
1597  * So, we instead calculate how much features overlap
1598  * each other in their dimension to figure out which
1599  * dimensions have useful selectivity characteristics (more
1600  * variability in density) and therefor would find
1601  * more cells useful (to distinguish between dense places and
1602  * homogeneous places).
1603  */
1604  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1605  sample_distribution);
1606 
1607  /*
1608  * The sample_distribution array now tells us how spread out the
1609  * data is in each dimension, so we use that data to allocate
1610  * the histogram cells we have available.
1611  * At this point, histo_cells_target is the approximate target number
1612  * of cells.
1613  */
1614 
1615  /*
1616  * Some dimensions have basically a uniform distribution, we want
1617  * to allocate no cells to those dimensions, only to dimensions
1618  * that have some interesting differences in data distribution.
1619  * Here we count up the number of interesting dimensions
1620  */
1621  for ( d = 0; d < ndims; d++ )
1622  {
1623  if ( sample_distribution[d] > 0 )
1624  histo_ndims++;
1625  }
1626 
1627  if ( histo_ndims == 0 )
1628  {
1629  /* Special case: all our dimensions had low variability! */
1630  /* We just divide the cells up evenly */
1631  POSTGIS_DEBUG(3, " special case: no axes have variability");
1632  histo_cells_new = 1;
1633  for ( d = 0; d < ndims; d++ )
1634  {
1635  histo_size[d] = 1 + (int)pow((double)histo_cells_target, 1/(double)ndims);
1636  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1637  histo_cells_new *= histo_size[d];
1638  }
1639  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1640  }
1641  else
1642  {
1643  /*
1644  * We're going to express the amount of variability in each dimension
1645  * as a proportion of the total variability and allocate cells in that
1646  * dimension relative to that proportion.
1647  */
1648  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1649  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1650  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1651  histo_cells_new = 1; /* For the number of cells in the final histogram */
1652  for ( d = 0; d < ndims; d++ )
1653  {
1654  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1655  {
1656  histo_size[d] = 1;
1657  }
1658  else /* Interesting dimension */
1659  {
1660  /* How does this dims variability compare to the total? */
1661  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1662  /*
1663  * Scale the target cells number by the # of dims and ratio,
1664  * then take the appropriate root to get the estimated number of cells
1665  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1666  */
1667  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1668  /* If something goes awry, just give this dim one slot */
1669  if ( ! histo_size[d] )
1670  histo_size[d] = 1;
1671  }
1672  histo_cells_new *= histo_size[d];
1673  }
1674  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1675  }
1676 
1677  /* Update histo_cells to the actual number of cells we need to allocate */
1678  histo_cells = histo_cells_new;
1679  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1680 
1681  /*
1682  * Create the histogram (ND_STATS) in the stats memory context
1683  */
1684  old_context = MemoryContextSwitchTo(stats->anl_context);
1685  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1686  nd_stats = palloc(nd_stats_size);
1687  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1688  MemoryContextSwitchTo(old_context);
1689 
1690  /* Initialize the #ND_STATS objects */
1691  nd_stats->ndims = ndims;
1692  nd_stats->extent = histo_extent;
1693  nd_stats->sample_features = sample_rows;
1694  nd_stats->table_features = total_rows;
1695  nd_stats->not_null_features = notnull_cnt;
1696  /* Copy in the histogram dimensions */
1697  for ( d = 0; d < ndims; d++ )
1698  nd_stats->size[d] = histo_size[d];
1699 
1700  /*
1701  * Fourth scan:
1702  * o fill histogram values with the proportion of
1703  * features' bbox overlaps: a feature's bvol
1704  * can fully overlap (1) or partially overlap
1705  * (fraction of 1) an histogram cell.
1706  *
1707  * Note that we are filling each cell with the "portion of
1708  * the feature's box that overlaps the cell". So, if we sum
1709  * up the values in the histogram, we could get the
1710  * histogram feature count.
1711  *
1712  */
1713  for ( i = 0; i < notnull_cnt; i++ )
1714  {
1715  const ND_BOX *nd_box;
1716  ND_IBOX nd_ibox;
1717  int at[ND_DIMS];
1718  int d;
1719  double num_cells = 0;
1720  double tmp_volume = 1.0;
1721  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1722  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1723  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1724 
1725  nd_box = sample_boxes[i];
1726  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1727 
1728  /* Give backend a chance of interrupting us */
1729  vacuum_delay_point();
1730 
1731  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1732  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1733  memset(at, 0, sizeof(int)*ND_DIMS);
1734 
1735  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1736  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1737  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1738 
1739  for ( d = 0; d < nd_stats->ndims; d++ )
1740  {
1741  /* Initialize the starting values */
1742  at[d] = nd_ibox.min[d];
1743  min[d] = nd_stats->extent.min[d];
1744  max[d] = nd_stats->extent.max[d];
1745  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1746 
1747  /* What's the volume (area) of this feature's box? */
1748  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1749  }
1750 
1751  /* Add feature volume (area) to our total */
1752  total_sample_volume += tmp_volume;
1753 
1754  /*
1755  * Move through all the overlaped histogram cells values and
1756  * add the box overlap proportion to them.
1757  */
1758  do
1759  {
1760  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1761  double ratio;
1762  /* Create a box for this histogram cell */
1763  for ( d = 0; d < nd_stats->ndims; d++ )
1764  {
1765  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1766  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1767  }
1768 
1769  /*
1770  * If a feature box is completely inside one cell the ratio will be
1771  * 1.0. If a feature box is 50% in two cells, each cell will get
1772  * 0.5 added on.
1773  */
1774  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1775  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1776  num_cells += ratio;
1777  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1778  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1779  }
1780  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1781 
1782  /* Keep track of overall number of overlaps counted */
1783  total_cell_count += num_cells;
1784  /* How many features have we added to this histogram? */
1785  histogram_features++;
1786  }
1787 
1788  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1789  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1790  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1791 
1792  /* Error out if we got no sample information */
1793  if ( ! histogram_features )
1794  {
1795  POSTGIS_DEBUG(3, " no stats have been gathered");
1796  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1797  stats->stats_valid = false;
1798  return;
1799  }
1800 
1801  nd_stats->histogram_features = histogram_features;
1802  nd_stats->histogram_cells = histo_cells;
1803  nd_stats->cells_covered = total_cell_count;
1804 
1805  /* Put this histogram data into the right slot/kind */
1806  if ( mode == 2 )
1807  {
1808  stats_slot = STATISTIC_SLOT_2D;
1809  stats_kind = STATISTIC_KIND_2D;
1810  }
1811  else
1812  {
1813  stats_slot = STATISTIC_SLOT_ND;
1814  stats_kind = STATISTIC_KIND_ND;
1815  }
1816 
1817  /* Write the statistics data */
1818  stats->stakind[stats_slot] = stats_kind;
1819  stats->staop[stats_slot] = InvalidOid;
1820  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1821  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1822  stats->stanullfrac = (float4)null_cnt/sample_rows;
1823  stats->stawidth = total_width/notnull_cnt;
1824  stats->stadistinct = -1.0;
1825  stats->stats_valid = true;
1826 
1827  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1828  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1829  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1830  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1831  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1832  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1833  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1834  /*
1835  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1836  */
1837 
1838  return;
1839 }
1840 
1841 
1859 static void
1860 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1861  int sample_rows, double total_rows)
1862 {
1863  /* 2D Mode */
1864  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1865  /* ND Mode */
1866  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1867 }
1868 
1869 
1898 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1899 {
1900  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1901  Form_pg_attribute attr = stats->attr;
1902 
1903  POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1904 
1905  /* If the attstattarget column is negative, use the default value */
1906  /* NB: it is okay to scribble on stats->attr since it's a copy */
1907  if (attr->attstattarget < 0)
1908  attr->attstattarget = default_statistics_target;
1909 
1910  POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1911 
1912  /* Setup the minimum rows and the algorithm function.
1913  * 300 matches the default value set in
1914  * postgresql/src/backend/commands/analyze.c */
1915  stats->minrows = 300 * stats->attr->attstattarget;
1916  stats->compute_stats = compute_gserialized_stats;
1917 
1918  POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1919 
1920  /* Indicate we are done successfully */
1921  PG_RETURN_BOOL(true);
1922 }
1923 
1936 static float8
1937 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1938 {
1939  int d; /* counter */
1940  float8 selectivity;
1941  ND_BOX nd_box;
1942  ND_IBOX nd_ibox;
1943  int at[ND_DIMS];
1944  double cell_size[ND_DIMS];
1945  double min[ND_DIMS];
1946  double max[ND_DIMS];
1947  double total_count = 0.0;
1948  int ndims_max;
1949 
1950  /* Calculate the overlap of the box on the histogram */
1951  if ( ! nd_stats )
1952  {
1953  elog(NOTICE, " estimate_selectivity called with null input");
1954  return FALLBACK_ND_SEL;
1955  }
1956 
1957  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1958 
1959  /* Initialize nd_box. */
1960  nd_box_from_gbox(box, &nd_box);
1961 
1962  /*
1963  * To return 2D stats on an ND sample, we need to make the
1964  * 2D box cover the full range of the other dimensions in the
1965  * histogram.
1966  */
1967  POSTGIS_DEBUGF(3, " mode: %d", mode);
1968  if ( mode == 2 )
1969  {
1970  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1971  ndims_max = 2;
1972  }
1973 
1974  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1975  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1976 
1977  /*
1978  * Search box completely misses histogram extent?
1979  * We have to intersect in all N dimensions or else we have
1980  * zero interaction under the &&& operator. It's important
1981  * to short circuit in this case, as some of the tests below
1982  * will return junk results when run on non-intersecting inputs.
1983  */
1984  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1985  {
1986  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1987  return 0.0;
1988  }
1989 
1990  /* Search box completely contains histogram extent! */
1991  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1992  {
1993  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1994  return 1.0;
1995  }
1996 
1997  /* Calculate the overlap of the box on the histogram */
1998  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1999  {
2000  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2001  return FALLBACK_ND_SEL;
2002  }
2003 
2004  /* Work out some measurements of the histogram */
2005  for ( d = 0; d < nd_stats->ndims; d++ )
2006  {
2007  /* Cell size in each dim */
2008  min[d] = nd_stats->extent.min[d];
2009  max[d] = nd_stats->extent.max[d];
2010  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2011  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2012 
2013  /* Initialize the counter */
2014  at[d] = nd_ibox.min[d];
2015  }
2016 
2017  /* Move through all the overlap values and sum them */
2018  do
2019  {
2020  float cell_count, ratio;
2021  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2022 
2023  /* We have to pro-rate partially overlapped cells. */
2024  for ( d = 0; d < nd_stats->ndims; d++ )
2025  {
2026  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2027  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2028  }
2029 
2030  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2031  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2032 
2033  /* Add the pro-rated count for this cell to the overall total */
2034  total_count += cell_count * ratio;
2035  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2036  }
2037  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2038 
2039  /* Scale by the number of features in our histogram to get the proportion */
2040  selectivity = total_count / nd_stats->histogram_features;
2041 
2042  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2043  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2044  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2045  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2046 
2047  /* Prevent rounding overflows */
2048  if (selectivity > 1.0) selectivity = 1.0;
2049  else if (selectivity < 0.0) selectivity = 0.0;
2050 
2051  return selectivity;
2052 }
2053 
2054 
2055 
2061 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2062 {
2063  Oid table_oid = PG_GETARG_OID(0);
2064  text *att_text = PG_GETARG_TEXT_P(1);
2065  ND_STATS *nd_stats;
2066  char *str;
2067  text *json;
2068  int mode = 2; /* default to 2D mode */
2069  bool only_parent = false; /* default to whole tree stats */
2070 
2071  /* Check if we've been asked to not use 2d mode */
2072  if ( ! PG_ARGISNULL(2) )
2073  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2074 
2075  /* Retrieve the stats object */
2076  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2077  if ( ! nd_stats )
2078  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2079 
2080  /* Convert to JSON */
2081  str = nd_stats_to_json(nd_stats);
2082  json = cstring_to_text(str);
2083  pfree(str);
2084  pfree(nd_stats);
2085  PG_RETURN_TEXT_P(json);
2086 }
2087 
2088 
2094 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2095 {
2096  Oid table_oid = PG_GETARG_OID(0);
2097  text *att_text = PG_GETARG_TEXT_P(1);
2098  Datum geom_datum = PG_GETARG_DATUM(2);
2099  GBOX gbox; /* search box read from gserialized datum */
2100  float8 selectivity = 0;
2101  ND_STATS *nd_stats;
2102  int mode = 2; /* 2D mode by default */
2103 
2104  /* Check if we've been asked to not use 2d mode */
2105  if ( ! PG_ARGISNULL(3) )
2106  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2107 
2108  /* Retrieve the stats object */
2109  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2110 
2111  if ( ! nd_stats )
2112  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2113 
2114  /* Calculate the gbox */
2115  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2116  elog(ERROR, "unable to calculate bounding box from geometry");
2117 
2118  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2119 
2120  /* Do the estimation */
2121  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2122 
2123  pfree(nd_stats);
2124  PG_RETURN_FLOAT8(selectivity);
2125 }
2126 
2127 
2133 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2134 {
2135  Oid table_oid1 = PG_GETARG_OID(0);
2136  text *att_text1 = PG_GETARG_TEXT_P(1);
2137  Oid table_oid2 = PG_GETARG_OID(2);
2138  text *att_text2 = PG_GETARG_TEXT_P(3);
2139  ND_STATS *nd_stats1, *nd_stats2;
2140  float8 selectivity = 0;
2141  int mode = 2; /* 2D mode by default */
2142 
2143 
2144  /* Retrieve the stats object */
2145  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2146  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2147 
2148  if ( ! nd_stats1 )
2149  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2150 
2151  if ( ! nd_stats2 )
2152  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2153 
2154  /* Check if we've been asked to not use 2d mode */
2155  if ( ! PG_ARGISNULL(4) )
2156  {
2157  text *modetxt = PG_GETARG_TEXT_P(4);
2158  char *modestr = text_to_cstring(modetxt);
2159  if ( modestr[0] == 'N' )
2160  mode = 0;
2161  }
2162 
2163  /* Do the estimation */
2164  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2165 
2166  pfree(nd_stats1);
2167  pfree(nd_stats2);
2168  PG_RETURN_FLOAT8(selectivity);
2169 }
2170 
2176 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2177 {
2178  PG_RETURN_DATUM(DirectFunctionCall5(
2180  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2181  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2182  Int32GetDatum(2) /* 2-D mode */
2183  ));
2184 }
2185 
2191 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2192 {
2193  PG_RETURN_DATUM(DirectFunctionCall5(
2195  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2196  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2197  Int32GetDatum(0) /* N-D mode */
2198  ));
2199 }
2200 
2215 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2216 {
2217  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2218  /* Oid operator_oid = PG_GETARG_OID(1); */
2219  List *args = (List *) PG_GETARG_POINTER(2);
2220  /* int varRelid = PG_GETARG_INT32(3); */
2221  int mode = PG_GETARG_INT32(4);
2222 
2223  VariableStatData vardata;
2224  ND_STATS *nd_stats = NULL;
2225 
2226  Node *other;
2227  Var *self;
2228  GBOX search_box;
2229  float8 selectivity = 0;
2230 
2231  POSTGIS_DEBUG(2, "gserialized_gist_sel called");
2232 
2233  /*
2234  * TODO: This is a big one,
2235  * All this statistics code *only* tries to generate a valid
2236  * selectivity for && and &&&. That leaves all the other
2237  * geometry operators with bad stats! The selectivity
2238  * calculation should take account of the incoming operator
2239  * type and do the right thing.
2240  */
2241 
2242  /* Fail if not a binary opclause (probably shouldn't happen) */
2243  if (list_length(args) != 2)
2244  {
2245  POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
2246  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2247  }
2248 
2249  /* Find the constant part */
2250  other = (Node *) linitial(args);
2251  if ( ! IsA(other, Const) )
2252  {
2253  self = (Var *)other;
2254  other = (Node *) lsecond(args);
2255  }
2256  else
2257  {
2258  self = (Var *) lsecond(args);
2259  }
2260 
2261  if ( ! IsA(other, Const) )
2262  {
2263  POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
2264  PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
2265  }
2266 
2267  /* Convert the constant to a BOX */
2268  if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2269  {
2270  POSTGIS_DEBUG(3, "search box is EMPTY");
2271  PG_RETURN_FLOAT8(0.0);
2272  }
2273  POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
2274 
2275  /* Get pg_statistic row */
2276  examine_variable(root, (Node*)self, 0, &vardata);
2277  if ( vardata.statsTuple ) {
2278  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2279  }
2280  ReleaseVariableStats(vardata);
2281 
2282  if ( ! nd_stats )
2283  {
2284  POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
2285  PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
2286  }
2287 
2288  POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
2289 
2290  /* Do the estimation! */
2291  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2292  POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
2293 
2294  pfree(nd_stats);
2295  PG_RETURN_FLOAT8(selectivity);
2296 }
2297 
2298 
2299 
2306 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2307 {
2308  char *nsp = NULL;
2309  char *tbl = NULL;
2310  text *col = NULL;
2311  char *nsp_tbl = NULL;
2312  Oid tbl_oid, idx_oid;
2313  ND_STATS *nd_stats;
2314  GBOX *gbox = NULL;
2315  bool only_parent = false;
2316  int key_type;
2317 
2318  if ( PG_NARGS() == 4 )
2319  {
2320  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2321  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2322  col = PG_GETARG_TEXT_P(2);
2323  only_parent = PG_GETARG_BOOL(3);
2324  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2325  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2326  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2327  pfree(nsp_tbl);
2328  }
2329  else if ( PG_NARGS() == 3 )
2330  {
2331  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2332  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2333  col = PG_GETARG_TEXT_P(2);
2334  nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2335  sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl);
2336  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2337  pfree(nsp_tbl);
2338  }
2339  else if ( PG_NARGS() == 2 )
2340  {
2341  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2342  col = PG_GETARG_TEXT_P(1);
2343  nsp_tbl = palloc(strlen(tbl) + 3);
2344  sprintf(nsp_tbl, "\"%s\"", tbl);
2345  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2346  pfree(nsp_tbl);
2347  }
2348  else
2349  {
2350  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2351  PG_RETURN_NULL();
2352  }
2353 
2354 #if 1
2355  /* Read the extent from the head of the spatial index, if there is one */
2356  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2357  if (!idx_oid)
2358  elog(DEBUG2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2359  gbox = spatial_index_read_extent(idx_oid, key_type);
2360 #endif
2361 
2362  /* Fall back to reading the stats, if no index answer */
2363  if (!gbox)
2364  {
2365  /* Estimated extent only returns 2D bounds, so use mode 2 */
2366  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2367 
2368  /* Error out on no stats */
2369  if ( ! nd_stats ) {
2370  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2371  PG_RETURN_NULL();
2372  }
2373 
2374  /* Construct the box */
2375  gbox = palloc(sizeof(GBOX));
2376  FLAGS_SET_GEODETIC(gbox->flags, 0);
2377  FLAGS_SET_Z(gbox->flags, 0);
2378  FLAGS_SET_M(gbox->flags, 0);
2379  gbox->xmin = nd_stats->extent.min[0];
2380  gbox->xmax = nd_stats->extent.max[0];
2381  gbox->ymin = nd_stats->extent.min[1];
2382  gbox->ymax = nd_stats->extent.max[1];
2383  pfree(nd_stats);
2384  }
2385 
2386  PG_RETURN_POINTER(gbox);
2387 }
2388 
2396 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2397 {
2398  if ( PG_NARGS() == 3 )
2399  {
2400  PG_RETURN_DATUM(
2401  DirectFunctionCall3(gserialized_estimated_extent,
2402  PG_GETARG_DATUM(0),
2403  PG_GETARG_DATUM(1),
2404  PG_GETARG_DATUM(2)));
2405  }
2406  else if ( PG_NARGS() == 2 )
2407  {
2408  PG_RETURN_DATUM(
2409  DirectFunctionCall2(gserialized_estimated_extent,
2410  PG_GETARG_DATUM(0),
2411  PG_GETARG_DATUM(1)));
2412  }
2413 
2414  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2415  PG_RETURN_NULL();
2416 }
2417 
2418 /************************************************************************/
2419 
2420 static Oid
2421 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
2422 {
2423  Relation tbl_rel;
2424  ListCell *lc;
2425  List *idx_list;
2426  Oid result = InvalidOid;
2427  char *colname = text_to_cstring(col);
2428 
2429  /* Lookup our spatial index key types */
2430  Oid b2d_oid = postgis_oid(BOX2DFOID);
2431  Oid gdx_oid = postgis_oid(BOX3DOID);
2432 
2433  if (!(b2d_oid && gdx_oid))
2434  return InvalidOid;
2435 
2436  tbl_rel = RelationIdGetRelation(tbl_oid);
2437  idx_list = RelationGetIndexList(tbl_rel);
2438  RelationClose(tbl_rel);
2439 
2440  /* For each index associated with this table... */
2441  foreach(lc, idx_list)
2442  {
2443  Form_pg_class idx_form;
2444  HeapTuple idx_tup;
2445  int idx_relam;
2446  Oid idx_oid = lfirst_oid(lc);
2447 
2448  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2449  if (!HeapTupleIsValid(idx_tup))
2450  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2451  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2452  idx_relam = idx_form->relam;
2453  ReleaseSysCache(idx_tup);
2454 
2455  /* Does the index use a GIST access method? */
2456  if (idx_relam == GIST_AM_OID)
2457  {
2458  Form_pg_attribute att;
2459  Oid atttypid;
2460  /* Is the index on the column name we are looking for? */
2461  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2462  ObjectIdGetDatum(idx_oid),
2463  PointerGetDatum(colname));
2464  if (!HeapTupleIsValid(att_tup))
2465  continue;
2466 
2467  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2468  atttypid = att->atttypid;
2469  ReleaseSysCache(att_tup);
2470 
2471  /* Is the column actually spatial? */
2472  if (b2d_oid == atttypid || gdx_oid == atttypid)
2473  {
2474  /* Save result, clean up, and break out */
2475  result = idx_oid;
2476  if (key_type)
2477  *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2478  break;
2479  }
2480  }
2481  }
2482  return result;
2483 }
2484 
2485 static GBOX *
2486 spatial_index_read_extent(Oid idx_oid, int key_type)
2487 {
2488  BOX2DF *bounds_2df = NULL;
2489  GIDX *bounds_gidx = NULL;
2490  GBOX *gbox = NULL;
2491  Relation idx_rel;
2492  Buffer buffer;
2493  Page page;
2494  OffsetNumber offset;
2495  unsigned long offset_max;
2496 
2497  if (!idx_oid)
2498  return NULL;
2499 
2500  idx_rel = index_open(idx_oid, AccessShareLock);
2501  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2502  page = (Page) BufferGetPage(buffer);
2503  offset = FirstOffsetNumber;
2504  offset_max = PageGetMaxOffsetNumber(page);
2505  while (offset <= offset_max)
2506  {
2507  ItemId iid = PageGetItemId(page, offset);
2508  IndexTuple ituple;
2509  if (!iid)
2510  {
2511  ReleaseBuffer(buffer);
2512  index_close(idx_rel, AccessShareLock);
2513  return NULL;
2514  }
2515  ituple = (IndexTuple) PageGetItem(page, iid);
2516  if (!GistTupleIsInvalid(ituple))
2517  {
2518  bool isnull;
2519  Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
2520  if (!isnull)
2521  {
2522  if (key_type == STATISTIC_SLOT_2D)
2523  {
2524  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2525  if (bounds_2df)
2526  box2df_merge(bounds_2df, b);
2527  else
2528  bounds_2df = box2df_copy(b);
2529  }
2530  else
2531  {
2532  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2533  if (bounds_gidx)
2534  gidx_merge(&bounds_gidx, b);
2535  else
2536  bounds_gidx = gidx_copy(b);
2537  }
2538  }
2539  }
2540  offset++;
2541  }
2542 
2543  ReleaseBuffer(buffer);
2544  index_close(idx_rel, AccessShareLock);
2545 
2546  if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2547  {
2548  if (box2df_is_empty(bounds_2df))
2549  return NULL;
2550  gbox = gbox_new(0);
2551  box2df_to_gbox_p(bounds_2df, gbox);
2552  }
2553  else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2554  {
2555  if (gidx_is_unknown(bounds_gidx))
2556  return NULL;
2557  gbox = gbox_new(0);
2558  gbox_from_gidx(bounds_gidx, gbox, 0);
2559  }
2560  else
2561  return NULL;
2562 
2563  return gbox;
2564 }
2565 
2566 /*
2567 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2568  RETURNS box2d
2569  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2570  LANGUAGE 'c' STABLE STRICT;
2571 */
2572 
2574 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2575 {
2576  GBOX *gbox = NULL;
2577  int key_type;
2578  Oid tbl_oid = PG_GETARG_DATUM(0);
2579  text *col = PG_GETARG_TEXT_P(1);
2580 
2581  Oid idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
2582  if (!idx_oid)
2583  PG_RETURN_NULL();
2584 
2585  gbox = spatial_index_read_extent(idx_oid, key_type);
2586  if (!gbox)
2587  PG_RETURN_NULL();
2588  else
2589  PG_RETURN_POINTER(gbox);
2590 }
2591 
GBOX * gbox_new(uint8_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: g_box.c:39
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: g_box.c:204
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: g_box.c:399
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
Definition: g_serialized.c:640
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type)
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:79
#define FLAGS_GET_Z(flags)
Macros for manipulating the 'flags' byte.
Definition: liblwgeom.h:140
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:141
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:149
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:147
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:146
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:143
This library is the generic geometry handling section of PostGIS.
args
Definition: ovdump.py:44
Datum buffer(PG_FUNCTION_ARGS)
char * text_to_cstring(const text *textptr)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:253
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:35
void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.c:134
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:78
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:160
double ymax
Definition: liblwgeom.h:298
double zmax
Definition: liblwgeom.h:300
double xmax
Definition: liblwgeom.h:296
double zmin
Definition: liblwgeom.h:299
double mmax
Definition: liblwgeom.h:302
double ymin
Definition: liblwgeom.h:297
double xmin
Definition: liblwgeom.h:295
double mmin
Definition: liblwgeom.h:301
uint8_t flags
Definition: liblwgeom.h:294
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.