PostGIS  3.0.6dev-r@@SVN_REVISION@@
gserialized_estimate.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22  *
23  **********************************************************************/
24 
25 
26 
27 /**********************************************************************
28  THEORY OF OPERATION
29 
30 The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
31 calculates (compute_gserialized_stats_mode) two histograms of occurances of
32 features, once for the 2D domain (and the && operator) one for the
33 ND domain (and the &&& operator).
34 
35 Queries in PostgreSQL call into the selectivity sub-system to find out
36 the relative effectiveness of different clauses in sub-setting
37 relations. Queries with constant arguments call gserialized_gist_sel,
38 queries with relations on both sides call gserialized_gist_joinsel.
39 
40 gserialized_gist_sel sums up the values in the histogram that overlap
41 the contant search box.
42 
43 gserialized_gist_joinsel sums up the product of the overlapping
44 cells in each relation's histogram.
45 
46 Depending on the operator and type, the mode of selectivity calculation
47 will be 2D or ND.
48 
49 - geometry && geometry ==> 2D
50 - geometry &&& geometry ==> ND
51 - geography && geography ==> ND
52 
53 The 2D mode is put in effect by retrieving the 2D histogram from the
54 statistics cache and then allowing the generic ND calculations to
55 go to work.
56 
57 TO DO: More testing and examination of the &&& operator and mixed
58 dimensionality cases. (2D geometry) &&& (3D column), etc.
59 
60 **********************************************************************/
61 
62 #include "postgres.h"
63 
64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
80 #endif
81 #include "utils/builtins.h"
82 #include "utils/datum.h"
83 #include "utils/snapmgr.h"
84 #include "utils/fmgroids.h"
85 #include "funcapi.h"
86 #include "access/heapam.h"
87 #include "catalog/pg_type.h"
88 #include "access/relscan.h"
89 
90 #include "executor/spi.h"
91 #include "fmgr.h"
92 #include "commands/vacuum.h"
93 #if PG_VERSION_NUM < 120000
94 #include "nodes/relation.h"
95 #else
96 #include "nodes/pathnodes.h"
97 #endif
98 #include "parser/parsetree.h"
99 #include "utils/array.h"
100 #include "utils/lsyscache.h"
101 #include "utils/builtins.h"
102 #include "utils/syscache.h"
103 #include "utils/rel.h"
104 #include "utils/selfuncs.h"
105 
106 #include "../postgis_config.h"
107 
108 #include "access/htup_details.h"
109 
110 #include "stringbuffer.h"
111 #include "liblwgeom.h"
112 #include "lwgeom_pg.h" /* For debugging macros. */
113 #include "gserialized_gist.h" /* For index common functions */
114 
115 #include <math.h>
116 #if HAVE_IEEEFP_H
117 #include <ieeefp.h>
118 #endif
119 #include <float.h>
120 #include <string.h>
121 #include <stdio.h>
122 #include <ctype.h>
123 
124 
125 /************************************************************************/
126 
127 
128 /* Prototypes */
129 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
130 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
131 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
132 Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
133 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
134 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
135 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
136 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
137 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
138 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
139 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
140 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
141 
142 /* Local prototypes */
143 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num);
144 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num);
145 
146 
147 /* Other prototypes */
148 float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
149 float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
150 
151 
152 /* Old Prototype */
153 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
154 
155 /*
156  * Assign a number to the n-dimensional statistics kind
157  *
158  * tgl suggested:
159  *
160  * 1-100: reserved for assignment by the core Postgres project
161  * 100-199: reserved for assignment by PostGIS
162  * 200-9999: reserved for other globally-known stats kinds
163  * 10000-32767: reserved for private site-local use
164  */
165 #define STATISTIC_KIND_ND 102
166 #define STATISTIC_KIND_2D 103
167 #define STATISTIC_SLOT_ND 0
168 #define STATISTIC_SLOT_2D 1
169 
170 /*
171 * The SD factor restricts the side of the statistics histogram
172 * based on the standard deviation of the extent of the data.
173 * SDFACTOR is the number of standard deviations from the mean
174 * the histogram will extend.
175 */
176 #define SDFACTOR 3.25
177 
183 #define ND_DIMS 4
184 
191 #define MIN_DIMENSION_WIDTH 0.000000001
192 
197 #define MAX_DIMENSION_WIDTH 1.0E+20
198 
202 #define DEFAULT_ND_SEL 0.0001
203 #define DEFAULT_ND_JOINSEL 0.001
204 
208 #define FALLBACK_ND_SEL 0.2
209 #define FALLBACK_ND_JOINSEL 0.3
210 
216 typedef struct ND_BOX_T
217 {
218  float4 min[ND_DIMS];
219  float4 max[ND_DIMS];
221 
225 typedef struct ND_IBOX_T
226 {
227  int min[ND_DIMS];
228  int max[ND_DIMS];
230 
231 
238 typedef struct ND_STATS_T
239 {
240  /* Dimensionality of the histogram. */
241  float4 ndims;
242 
243  /* Size of n-d histogram in each dimension. */
244  float4 size[ND_DIMS];
245 
246  /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
248 
249  /* How many rows in the table itself? */
251 
252  /* How many rows were in the sample that built this histogram? */
254 
255  /* How many not-Null/Empty features were in the sample? */
257 
258  /* How many features actually got sampled in the histogram? */
260 
261  /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
263 
264  /* How many cells did those histogram features cover? */
265  /* Since we are pro-rating coverage, this number should */
266  /* now always equal histogram_features */
268 
269  /* Variable length # of floats for histogram */
270  float4 value[1];
272 
273 
274 
275 
282 static int
283 gbox_ndims(const GBOX* gbox)
284 {
285  int dims = 2;
286  if ( FLAGS_GET_GEODETIC(gbox->flags) )
287  return 3;
288  if ( FLAGS_GET_Z(gbox->flags) )
289  dims++;
290  if ( FLAGS_GET_M(gbox->flags) )
291  dims++;
292  return dims;
293 }
294 
300 static int
301 text_p_get_mode(const text *txt)
302 {
303  int mode = 2;
304  char *modestr;
305  if (VARSIZE_ANY_EXHDR(txt) <= 0)
306  return mode;
307  modestr = (char*)VARDATA(txt);
308  if ( modestr[0] == 'N' )
309  mode = 0;
310  return mode;
311 }
312 
313 
317 static int
318 cmp_int (const void *a, const void *b)
319 {
320  int ia = *((const int*)a);
321  int ib = *((const int*)b);
322 
323  if ( ia == ib )
324  return 0;
325  else if ( ia > ib )
326  return 1;
327  else
328  return -1;
329 }
330 
335 static int
336 range_quintile(int *vals, int nvals)
337 {
338  qsort(vals, nvals, sizeof(int), cmp_int);
339  return vals[4*nvals/5] - vals[nvals/5];
340 }
341 
345 static double
346 total_double(const double *vals, int nvals)
347 {
348  int i;
349  float total = 0;
350  /* Calculate total */
351  for ( i = 0; i < nvals; i++ )
352  total += vals[i];
353 
354  return total;
355 }
356 
357 #if POSTGIS_DEBUG_LEVEL >= 3
358 
362 static int
363 total_int(const int *vals, int nvals)
364 {
365  int i;
366  int total = 0;
367  /* Calculate total */
368  for ( i = 0; i < nvals; i++ )
369  total += vals[i];
370 
371  return total;
372 }
373 
377 static double
378 avg(const int *vals, int nvals)
379 {
380  int t = total_int(vals, nvals);
381  return (double)t / (double)nvals;
382 }
383 
387 static double
388 stddev(const int *vals, int nvals)
389 {
390  int i;
391  double sigma2 = 0;
392  double mean = avg(vals, nvals);
393 
394  /* Calculate sigma2 */
395  for ( i = 0; i < nvals; i++ )
396  {
397  double v = (double)(vals[i]);
398  sigma2 += (mean - v) * (mean - v);
399  }
400  return sqrt(sigma2 / nvals);
401 }
402 #endif /* POSTGIS_DEBUG_LEVEL >= 3 */
403 
408 static int
409 nd_stats_value_index(const ND_STATS *stats, int *indexes)
410 {
411  int d;
412  int accum = 1, vdx = 0;
413 
414  /* Calculate the index into the 1-d values array that the (i,j,k,l) */
415  /* n-d histogram coordinate implies. */
416  /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
417  for ( d = 0; d < (int)(stats->ndims); d++ )
418  {
419  int size = (int)(stats->size[d]);
420  if ( indexes[d] < 0 || indexes[d] >= size )
421  {
422  POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
423  return -1;
424  }
425  vdx += indexes[d] * accum;
426  accum *= size;
427  }
428  return vdx;
429 }
430 
434 static char*
435 nd_box_to_json(const ND_BOX *nd_box, int ndims)
436 {
437  char *rv;
438  int i;
440 
441  stringbuffer_append(sb, "{\"min\":[");
442  for ( i = 0; i < ndims; i++ )
443  {
444  if ( i ) stringbuffer_append(sb, ",");
445  stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
446  }
447  stringbuffer_append(sb, "],\"max\":[");
448  for ( i = 0; i < ndims; i++ )
449  {
450  if ( i ) stringbuffer_append(sb, ",");
451  stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
452  }
453  stringbuffer_append(sb, "]}");
454 
457  return rv;
458 }
459 
460 
465 static char*
466 nd_stats_to_json(const ND_STATS *nd_stats)
467 {
468  char *json_extent, *str;
469  int d;
471  int ndims = (int)roundf(nd_stats->ndims);
472 
473  stringbuffer_append(sb, "{");
474  stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
475 
476  /* Size */
477  stringbuffer_append(sb, "\"size\":[");
478  for ( d = 0; d < ndims; d++ )
479  {
480  if ( d ) stringbuffer_append(sb, ",");
481  stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
482  }
483  stringbuffer_append(sb, "],");
484 
485  /* Extent */
486  json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
487  stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
488  pfree(json_extent);
489 
490  stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
491  stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
492  stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
493  stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
494  stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
495  stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
496  stringbuffer_append(sb, "}");
497 
500  return str;
501 }
502 
503 
509 // static char*
510 // nd_stats_to_grid(const ND_STATS *stats)
511 // {
512 // char *rv;
513 // int j, k;
514 // int sizex = (int)roundf(stats->size[0]);
515 // int sizey = (int)roundf(stats->size[1]);
516 // stringbuffer_t *sb = stringbuffer_create();
517 //
518 // for ( k = 0; k < sizey; k++ )
519 // {
520 // for ( j = 0; j < sizex; j++ )
521 // {
522 // stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
523 // }
524 // stringbuffer_append(sb, "\n");
525 // }
526 //
527 // rv = stringbuffer_getstringcopy(sb);
528 // stringbuffer_destroy(sb);
529 // return rv;
530 // }
531 
532 
534 static int
535 nd_box_merge(const ND_BOX *source, ND_BOX *target)
536 {
537  int d;
538  for ( d = 0; d < ND_DIMS; d++ )
539  {
540  target->min[d] = Min(target->min[d], source->min[d]);
541  target->max[d] = Max(target->max[d], source->max[d]);
542  }
543  return true;
544 }
545 
547 static int
549 {
550  memset(a, 0, sizeof(ND_BOX));
551  return true;
552 }
553 
559 static int
561 {
562  int d;
563  for ( d = 0; d < ND_DIMS; d++ )
564  {
565  a->min[d] = FLT_MAX;
566  a->max[d] = -1 * FLT_MAX;
567  }
568  return true;
569 }
570 
572 static void
573 nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
574 {
575  int d = 0;
576  POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
577 
578  nd_box_init(nd_box);
579  nd_box->min[d] = gbox->xmin;
580  nd_box->max[d] = gbox->xmax;
581  d++;
582  nd_box->min[d] = gbox->ymin;
583  nd_box->max[d] = gbox->ymax;
584  d++;
585  if ( FLAGS_GET_GEODETIC(gbox->flags) )
586  {
587  nd_box->min[d] = gbox->zmin;
588  nd_box->max[d] = gbox->zmax;
589  return;
590  }
591  if ( FLAGS_GET_Z(gbox->flags) )
592  {
593  nd_box->min[d] = gbox->zmin;
594  nd_box->max[d] = gbox->zmax;
595  d++;
596  }
597  if ( FLAGS_GET_M(gbox->flags) )
598  {
599  nd_box->min[d] = gbox->mmin;
600  nd_box->max[d] = gbox->mmax;
601  d++;
602  }
603  return;
604 }
605 
609 static int
610 nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
611 {
612  int d;
613  for ( d = 0; d < ndims; d++ )
614  {
615  if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
616  return false;
617  }
618  return true;
619 }
620 
624 static int
625 nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
626 {
627  int d;
628  for ( d = 0; d < ndims; d++ )
629  {
630  if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
631  return false;
632  }
633  return true;
634 }
635 
640 static int
641 nd_box_expand(ND_BOX *nd_box, double expansion_factor)
642 {
643  int d;
644  double size;
645  for ( d = 0; d < ND_DIMS; d++ )
646  {
647  size = nd_box->max[d] - nd_box->min[d];
648  /* Avoid expanding boxes that are either too wide or too narrow*/
649  if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
650  continue;
651  nd_box->min[d] -= size * expansion_factor / 2;
652  nd_box->max[d] += size * expansion_factor / 2;
653  }
654  return true;
655 }
656 
661 static inline int
662 nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
663 {
664  int d;
665 
666  POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
667 
668  /* Initialize ibox */
669  memset(nd_ibox, 0, sizeof(ND_IBOX));
670 
671  /* In each dimension... */
672  for ( d = 0; d < nd_stats->ndims; d++ )
673  {
674  double smin = nd_stats->extent.min[d];
675  double smax = nd_stats->extent.max[d];
676  double width = smax - smin;
677 
678  if (width < MIN_DIMENSION_WIDTH)
679  {
680  nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
681  }
682  else
683  {
684  int size = (int)roundf(nd_stats->size[d]);
685 
686  /* ... find cells the box overlaps with in this dimension */
687  nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
688  nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
689 
690  POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
691  POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
692 
693  /* Push any out-of range values into range */
694  nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
695  nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
696  }
697  }
698  return true;
699 }
700 
704 static inline double
705 nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
706 {
707  int d;
708  bool covered = true;
709  double ivol = 1.0;
710  double vol2 = 1.0;
711  double vol1 = 1.0;
712 
713  for ( d = 0 ; d < ndims; d++ )
714  {
715  if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
716  return 0.0; /* Disjoint */
717 
718  if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
719  covered = false;
720  }
721 
722  if ( covered )
723  return 1.0;
724 
725  for ( d = 0; d < ndims; d++ )
726  {
727  double width1 = b1->max[d] - b1->min[d];
728  double width2 = b2->max[d] - b2->min[d];
729  double imin, imax, iwidth;
730 
731  vol1 *= width1;
732  vol2 *= width2;
733 
734  imin = Max(b1->min[d], b2->min[d]);
735  imax = Min(b1->max[d], b2->max[d]);
736  iwidth = imax - imin;
737  iwidth = Max(0.0, iwidth);
738 
739  ivol *= iwidth;
740  }
741 
742  if ( vol2 == 0.0 )
743  return vol2;
744 
745  return ivol / vol2;
746 }
747 
748 /* How many bins shall we use in figuring out the distribution? */
749 #define NUM_BINS 50
750 
766 static int
767 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
768 {
769  int d, i, k, range;
770  int counts[NUM_BINS];
771  double smin, smax; /* Spatial min, spatial max */
772  double swidth; /* Spatial width of dimension */
773 #if POSTGIS_DEBUG_LEVEL >= 3
774  double average, sdev, sdev_ratio;
775 #endif
776  int bmin, bmax; /* Bin min, bin max */
777  const ND_BOX *ndb;
778 
779  /* For each dimension... */
780  for ( d = 0; d < ndims; d++ )
781  {
782  /* Initialize counts for this dimension */
783  memset(counts, 0, sizeof(counts));
784 
785  smin = extent->min[d];
786  smax = extent->max[d];
787  swidth = smax - smin;
788 
789  /* Don't try and calculate distribution of overly narrow */
790  /* or overly wide dimensions. Here we're being pretty geographical, */
791  /* expecting "normal" planar or geographic coordinates. */
792  /* Otherwise we have to "handle" +/- Inf bounded features and */
793  /* the assumptions needed for that are as bad as this hack. */
794  if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
795  {
796  distribution[d] = 0;
797  continue;
798  }
799 
800  /* Sum up the overlaps of each feature with the dimensional bins */
801  for ( i = 0; i < num_boxes; i++ )
802  {
803  double minoffset, maxoffset;
804 
805  /* Skip null entries */
806  ndb = nd_boxes[i];
807  if ( ! ndb ) continue;
808 
809  /* Where does box fall relative to the working range */
810  minoffset = ndb->min[d] - smin;
811  maxoffset = ndb->max[d] - smin;
812 
813  /* Skip boxes that our outside our working range */
814  if ( minoffset < 0 || minoffset > swidth ||
815  maxoffset < 0 || maxoffset > swidth )
816  {
817  continue;
818  }
819 
820  /* What bins does this range correspond to? */
821  bmin = floor(NUM_BINS * minoffset / swidth);
822  bmax = floor(NUM_BINS * maxoffset / swidth);
823 
824  /* Should only happen when maxoffset==swidth */
825  if (bmax >= NUM_BINS)
826  bmax = NUM_BINS-1;
827 
828  POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
829 
830  /* Increment the counts in all the bins this feature overlaps */
831  for ( k = bmin; k <= bmax; k++ )
832  {
833  counts[k] += 1;
834  }
835 
836  }
837 
838  /* How dispersed is the distribution of features across bins? */
839  range = range_quintile(counts, NUM_BINS);
840 
841 #if POSTGIS_DEBUG_LEVEL >= 3
842  average = avg(counts, NUM_BINS);
843  sdev = stddev(counts, NUM_BINS);
844  sdev_ratio = sdev/average;
845 
846  POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
847  POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
848  POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
849  POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
850 #endif
851 
852  distribution[d] = range;
853  }
854 
855  return true;
856 }
857 
863 static inline int
864 nd_increment(ND_IBOX *ibox, int ndims, int *counter)
865 {
866  int d = 0;
867 
868  while ( d < ndims )
869  {
870  if ( counter[d] < ibox->max[d] )
871  {
872  counter[d] += 1;
873  break;
874  }
875  counter[d] = ibox->min[d];
876  d++;
877  }
878  /* That's it, cannot increment any more! */
879  if ( d == ndims )
880  return false;
881 
882  /* Increment complete! */
883  return true;
884 }
885 
886 static ND_STATS*
887 pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
888 {
889  int stats_kind = STATISTIC_KIND_ND;
890  int rv;
891  ND_STATS *nd_stats;
892 
893  /* If we're in 2D mode, set the kind appropriately */
894  if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
895 
896  /* Then read the geom status histogram from that */
897 
898 #if POSTGIS_PGSQL_VERSION < 100
899  {
900  float4 *floatptr;
901  int nvalues;
902 
903  rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
904  NULL, NULL, NULL, &floatptr, &nvalues);
905 
906  if ( ! rv ) {
907  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
908  return NULL;
909  }
910 
911  /* Clone the stats here so we can release the attstatsslot immediately */
912  nd_stats = palloc(sizeof(float) * nvalues);
913  memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
914 
915  /* Clean up */
916  free_attstatsslot(0, NULL, 0, floatptr, nvalues);
917  }
918 #else /* PostgreSQL 10 or higher */
919  {
920  AttStatsSlot sslot;
921  rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
922  ATTSTATSSLOT_NUMBERS);
923  if ( ! rv ) {
924  POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
925  return NULL;
926  }
927 
928  /* Clone the stats here so we can release the attstatsslot immediately */
929  nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
930  memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
931 
932  free_attstatsslot(&sslot);
933  }
934 #endif
935 
936  return nd_stats;
937 }
938 
943 static ND_STATS*
944 pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
945 {
946  HeapTuple stats_tuple = NULL;
947  ND_STATS *nd_stats;
948 
949  /* First pull the stats tuple for the whole tree */
950  if ( ! only_parent )
951  {
952  POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
953  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
954  if ( stats_tuple )
955  POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
956  }
957  /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
958  if ( only_parent || ! stats_tuple )
959  {
960  POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
961  stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
962  if ( stats_tuple )
963  POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
964  }
965  if ( ! stats_tuple )
966  {
967  POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
968  return NULL;
969  }
970 
971  nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
972  ReleaseSysCache(stats_tuple);
973  if ( ! nd_stats )
974  {
975  POSTGIS_DEBUGF(2,
976  "histogram for attribute %d of table \"%s\" does not exist?",
977  att_num, get_rel_name(table_oid));
978  }
979 
980  return nd_stats;
981 }
982 
991 static ND_STATS*
992 pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
993 {
994  const char *att_name = text_to_cstring(att_text);
995  AttrNumber att_num;
996 
997  /* We know the name? Look up the num */
998  if ( att_text )
999  {
1000  /* Get the attribute number */
1001  att_num = get_attnum(table_oid, att_name);
1002  if ( ! att_num ) {
1003  elog(ERROR, "attribute \"%s\" does not exist", att_name);
1004  return NULL;
1005  }
1006  }
1007  else
1008  {
1009  elog(ERROR, "attribute name is null");
1010  return NULL;
1011  }
1012 
1013  return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
1014 }
1015 
1029 static float8
1031 {
1032  int ncells1, ncells2;
1033  int ndims1, ndims2, ndims;
1034  double ntuples_max;
1035  double ntuples_not_null1, ntuples_not_null2;
1036 
1037  ND_BOX extent1, extent2;
1038  ND_IBOX ibox1, ibox2;
1039  int at1[ND_DIMS];
1040  int at2[ND_DIMS];
1041  double min1[ND_DIMS];
1042  double width1[ND_DIMS];
1043  double cellsize1[ND_DIMS];
1044  int size2[ND_DIMS];
1045  double min2[ND_DIMS];
1046  double width2[ND_DIMS];
1047  double cellsize2[ND_DIMS];
1048  int size1[ND_DIMS];
1049  int d;
1050  double val = 0;
1051  float8 selectivity;
1052 
1053  /* Drop out on null inputs */
1054  if ( ! ( s1 && s2 ) )
1055  {
1056  elog(NOTICE, " estimate_join_selectivity called with null inputs");
1057  return FALLBACK_ND_SEL;
1058  }
1059 
1060  /* We need to know how many cells each side has... */
1061  ncells1 = (int)roundf(s1->histogram_cells);
1062  ncells2 = (int)roundf(s2->histogram_cells);
1063 
1064  /* ...so that we can drive the summation loop with the smaller histogram. */
1065  if ( ncells1 > ncells2 )
1066  {
1067  const ND_STATS *stats_tmp = s1;
1068  s1 = s2;
1069  s2 = stats_tmp;
1070  }
1071 
1072  POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
1073  POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
1074 
1075  /* Re-read that info after the swap */
1076  ncells1 = (int)roundf(s1->histogram_cells);
1077  ncells2 = (int)roundf(s2->histogram_cells);
1078 
1079  /* Q: What's the largest possible join size these relations can create? */
1080  /* A: The product of the # of non-null rows in each relation. */
1081  ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
1082  ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
1083  ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1084 
1085  /* Get the ndims as ints */
1086  ndims1 = (int)roundf(s1->ndims);
1087  ndims2 = (int)roundf(s2->ndims);
1088  ndims = Max(ndims1, ndims2);
1089 
1090  /* Get the extents */
1091  extent1 = s1->extent;
1092  extent2 = s2->extent;
1093 
1094  /* If relation stats do not intersect, join is very very selective. */
1095  if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
1096  {
1097  POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
1098  PG_RETURN_FLOAT8(0.0);
1099  }
1100 
1101  /*
1102  * First find the index range of the part of the smaller
1103  * histogram that overlaps the larger one.
1104  */
1105  if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
1106  {
1107  POSTGIS_DEBUG(3, "could not calculate overlap of relations");
1108  PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
1109  }
1110 
1111  /* Initialize counters / constants on s1 */
1112  for ( d = 0; d < ndims1; d++ )
1113  {
1114  at1[d] = ibox1.min[d];
1115  min1[d] = s1->extent.min[d];
1116  width1[d] = s1->extent.max[d] - s1->extent.min[d];
1117  size1[d] = (int)roundf(s1->size[d]);
1118  cellsize1[d] = width1[d] / size1[d];
1119  }
1120 
1121  /* Initialize counters / constants on s2 */
1122  for ( d = 0; d < ndims2; d++ )
1123  {
1124  min2[d] = s2->extent.min[d];
1125  width2[d] = s2->extent.max[d] - s2->extent.min[d];
1126  size2[d] = (int)roundf(s2->size[d]);
1127  cellsize2[d] = width2[d] / size2[d];
1128  }
1129 
1130  /* For each affected cell of s1... */
1131  do
1132  {
1133  double val1;
1134  /* Construct the bounds of this cell */
1135  ND_BOX nd_cell1;
1136  nd_box_init(&nd_cell1);
1137  for ( d = 0; d < ndims1; d++ )
1138  {
1139  nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1140  nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1141  }
1142 
1143  /* Find the cells of s2 that cell1 overlaps.. */
1144  nd_box_overlap(s2, &nd_cell1, &ibox2);
1145 
1146  /* Initialize counter */
1147  for ( d = 0; d < ndims2; d++ )
1148  {
1149  at2[d] = ibox2.min[d];
1150  }
1151 
1152  POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1153 
1154  /* Get the value at this cell */
1155  val1 = s1->value[nd_stats_value_index(s1, at1)];
1156 
1157  /* For each overlapped cell of s2... */
1158  do
1159  {
1160  double ratio2;
1161  double val2;
1162 
1163  /* Construct the bounds of this cell */
1164  ND_BOX nd_cell2;
1165  nd_box_init(&nd_cell2);
1166  for ( d = 0; d < ndims2; d++ )
1167  {
1168  nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1169  nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1170  }
1171 
1172  POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1173 
1174  /* Calculate overlap ratio of the cells */
1175  ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1176 
1177  /* Multiply the cell counts, scaled by overlap ratio */
1178  val2 = s2->value[nd_stats_value_index(s2, at2)];
1179  POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1180  val += val1 * (val2 * ratio2);
1181  }
1182  while ( nd_increment(&ibox2, ndims2, at2) );
1183 
1184  }
1185  while( nd_increment(&ibox1, ndims1, at1) );
1186 
1187  POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1188 
1189  /*
1190  * In order to compare our total cell count "val" to the
1191  * ntuples_max, we need to scale val up to reflect a full
1192  * table estimate. So, multiply by ratio of table size to
1193  * sample size.
1194  */
1195  val *= (s1->table_features / s1->sample_features);
1196  val *= (s2->table_features / s2->sample_features);
1197 
1198  POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1199 
1200  /*
1201  * Because the cell counts are over-determined due to
1202  * double counting of features that overlap multiple cells
1203  * (see the compute_gserialized_stats routine)
1204  * we also have to scale our cell count "val" *down*
1205  * to adjust for the double counting.
1206  */
1207 // val /= (s1->cells_covered / s1->histogram_features);
1208 // val /= (s2->cells_covered / s2->histogram_features);
1209 
1210  /*
1211  * Finally, the selectivity is the estimated number of
1212  * rows to be returned divided by the maximum possible
1213  * number of rows that can be returned.
1214  */
1215  selectivity = val / ntuples_max;
1216 
1217  /* Guard against over-estimates and crazy numbers :) */
1218  if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1219  {
1220  selectivity = DEFAULT_ND_JOINSEL;
1221  }
1222  else if ( selectivity > 1.0 )
1223  {
1224  selectivity = 1.0;
1225  }
1226 
1227  return selectivity;
1228 }
1229 
1235 Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1236 {
1237  PG_RETURN_DATUM(DirectFunctionCall5(
1239  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1240  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1241  Int32GetDatum(0) /* ND mode */
1242  ));
1243 }
1244 
1250 Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1251 {
1252  PG_RETURN_DATUM(DirectFunctionCall5(
1254  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1255  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1256  Int32GetDatum(2) /* 2D mode */
1257  ));
1258 }
1259 
1260 double
1261 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1262 {
1263  float8 selectivity;
1264  Oid relid1, relid2;
1265  ND_STATS *stats1, *stats2;
1266  Node *arg1 = (Node*) linitial(args);
1267  Node *arg2 = (Node*) lsecond(args);
1268  Var *var1 = (Var*) arg1;
1269  Var *var2 = (Var*) arg2;
1270 
1271  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1272 
1273  /* We only do column joins right now, no functional joins */
1274  /* TODO: handle g1 && ST_Expand(g2) */
1275  if (!IsA(arg1, Var) || !IsA(arg2, Var))
1276  {
1277  POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1278  return DEFAULT_ND_JOINSEL;
1279  }
1280 
1281  /* What are the Oids of our tables/relations? */
1282  relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1283  relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1284 
1285  /* Pull the stats from the stats system. */
1286  stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1287  stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1288 
1289  /* If we can't get stats, we have to stop here! */
1290  if (!stats1)
1291  {
1292  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1293  return DEFAULT_ND_JOINSEL;
1294  }
1295  else if (!stats2)
1296  {
1297  POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1298  return DEFAULT_ND_JOINSEL;
1299  }
1300 
1301  selectivity = estimate_join_selectivity(stats1, stats2);
1302  POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1303  pfree(stats1);
1304  pfree(stats2);
1305  return selectivity;
1306 }
1307 
1317 Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1318 {
1319  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1320  /* Oid operator = PG_GETARG_OID(1); */
1321  List *args = (List *) PG_GETARG_POINTER(2);
1322  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1323  int mode = PG_GETARG_INT32(4);
1324 
1325  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1326 
1327  /* Check length of args and punt on > 2 */
1328  if (list_length(args) != 2)
1329  {
1330  POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1331  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1332  }
1333 
1334  /* Only respond to an inner join/unknown context join */
1335  if (jointype != JOIN_INNER)
1336  {
1337  POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1338  PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1339  }
1340 
1341  PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1342 }
1343 
1344 
1345 
1346 
1365 static void
1366 compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1367  int sample_rows, double total_rows, int mode)
1368 {
1369  MemoryContext old_context;
1370  int d, i; /* Counters */
1371  int notnull_cnt = 0; /* # not null rows in the sample */
1372  int null_cnt = 0; /* # null rows in the sample */
1373  int histogram_features = 0; /* # rows that actually got counted in the histogram */
1374 
1375  ND_STATS *nd_stats; /* Our histogram */
1376  size_t nd_stats_size; /* Size to allocate */
1377 
1378  double total_width = 0; /* # of bytes used by sample */
1379  double total_sample_volume = 0; /* Area/volume coverage of the sample */
1380  double total_cell_count = 0; /* # of cells in histogram affected by sample */
1381 
1382  ND_BOX sum; /* Sum of extents of sample boxes */
1383  ND_BOX avg; /* Avg of extents of sample boxes */
1384  ND_BOX stddev; /* StdDev of extents of sample boxes */
1385 
1386  const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1387  ND_BOX sample_extent; /* Extent of the raw sample */
1388  int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1389  ND_BOX histo_extent; /* Spatial extent of the histogram */
1390  ND_BOX histo_extent_new; /* Temporary variable */
1391  int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1392  int histo_cells; /* Number of cells in the histogram */
1393  int histo_cells_new = 1; /* Temporary variable */
1394 
1395  int ndims = 2; /* Dimensionality of the sample */
1396  int histo_ndims = 0; /* Dimensionality of the histogram */
1397  double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1398  double total_distribution; /* Total of sample_distribution */
1399 
1400  int stats_slot; /* What slot is this data going into? (2D vs ND) */
1401  int stats_kind; /* And this is what? (2D vs ND) */
1402 
1403  /* Initialize sum and stddev */
1404  nd_box_init(&sum);
1405  nd_box_init(&stddev);
1406  nd_box_init(&avg);
1407  nd_box_init(&histo_extent);
1408  nd_box_init(&histo_extent_new);
1409 
1410  /*
1411  * This is where gserialized_analyze_nd
1412  * should put its' custom parameters.
1413  */
1414  /* void *mystats = stats->extra_data; */
1415 
1416  POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1417  POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1418  POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1419 
1420  /*
1421  * We might need less space, but don't think
1422  * its worth saving...
1423  */
1424  sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1425 
1426  /*
1427  * First scan:
1428  * o read boxes
1429  * o find dimensionality of the sample
1430  * o find extent of the sample
1431  * o count null-infinite/not-null values
1432  * o compute total_width
1433  * o compute total features's box area (for avgFeatureArea)
1434  * o sum features box coordinates (for standard deviation)
1435  */
1436  for ( i = 0; i < sample_rows; i++ )
1437  {
1438  Datum datum;
1439  GSERIALIZED *geom;
1440  GBOX gbox;
1441  ND_BOX *nd_box;
1442  bool is_null;
1443  bool is_copy;
1444 
1445  datum = fetchfunc(stats, i, &is_null);
1446 
1447  /* Skip all NULLs. */
1448  if ( is_null )
1449  {
1450  POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1451  null_cnt++;
1452  continue;
1453  }
1454 
1455  /* Read the bounds from the gserialized. */
1456  geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
1457  is_copy = VARATT_IS_EXTENDED(datum);
1458  if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
1459  {
1460  /* Skip empties too. */
1461  POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1462  continue;
1463  }
1464 
1465  /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1466  if ( mode == 2 )
1467  gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1468 
1469  /* Check bounds for validity (finite and not NaN) */
1470  if ( ! gbox_is_valid(&gbox) )
1471  {
1472  POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1473  continue;
1474  }
1475 
1476  /*
1477  * In N-D mode, set the ndims to the maximum dimensionality found
1478  * in the sample. Otherwise, leave at ndims == 2.
1479  */
1480  if ( mode != 2 )
1481  ndims = Max(gbox_ndims(&gbox), ndims);
1482 
1483  /* Convert gbox to n-d box */
1484  nd_box = palloc(sizeof(ND_BOX));
1485  nd_box_from_gbox(&gbox, nd_box);
1486 
1487  /* Cache n-d bounding box */
1488  sample_boxes[notnull_cnt] = nd_box;
1489 
1490  /* Initialize sample extent before merging first entry */
1491  if ( ! notnull_cnt )
1492  nd_box_init_bounds(&sample_extent);
1493 
1494  /* Add current sample to overall sample extent */
1495  nd_box_merge(nd_box, &sample_extent);
1496 
1497  /* How many bytes does this sample use? */
1498  total_width += VARSIZE(geom);
1499 
1500  /* Add bounds coordinates to sums for stddev calculation */
1501  for ( d = 0; d < ndims; d++ )
1502  {
1503  sum.min[d] += nd_box->min[d];
1504  sum.max[d] += nd_box->max[d];
1505  }
1506 
1507  /* Increment our "good feature" count */
1508  notnull_cnt++;
1509 
1510  /* Free up memory if our sample geometry was copied */
1511  if ( is_copy )
1512  pfree(geom);
1513 
1514  /* Give backend a chance of interrupting us */
1515  vacuum_delay_point();
1516  }
1517 
1518  /*
1519  * We'll build a histogram having stats->attr->attstattarget cells
1520  * on each side, within reason... we'll use ndims*10000 as the
1521  * maximum number of cells.
1522  * Also, if we're sampling a relatively small table, we'll try to ensure that
1523  * we have an average of 5 features for each cell so the histogram isn't
1524  * so sparse.
1525  */
1526  histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
1527  histo_cells_target = Min(histo_cells_target, ndims * 10000);
1528  histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
1529  POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1530  POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1531 
1532  /* If there's no useful features, we can't work out stats */
1533  if ( ! notnull_cnt )
1534  {
1535  Oid relation_oid = stats->attr->attrelid;
1536  char *relation_name = get_rel_name(relation_oid);
1537  elog(NOTICE,
1538  "PostGIS: Unable to compute statistics for \"%s.%s\": No non-null/empty features",
1539  relation_name ? relation_name : "(NULL)",
1540  stats->attr->attname.data);
1541  stats->stats_valid = false;
1542  return;
1543  }
1544 
1545  POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1546 
1547  /*
1548  * Second scan:
1549  * o compute standard deviation
1550  */
1551  for ( d = 0; d < ndims; d++ )
1552  {
1553  /* Calculate average bounds values */
1554  avg.min[d] = sum.min[d] / notnull_cnt;
1555  avg.max[d] = sum.max[d] / notnull_cnt;
1556 
1557  /* Calculate standard deviation for this dimension bounds */
1558  for ( i = 0; i < notnull_cnt; i++ )
1559  {
1560  const ND_BOX *ndb = sample_boxes[i];
1561  stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1562  stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1563  }
1564  stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1565  stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1566 
1567  /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1568  histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1569  histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1570  }
1571 
1572  /*
1573  * Third scan:
1574  * o skip hard deviants
1575  * o compute new histogram box
1576  */
1577  nd_box_init_bounds(&histo_extent_new);
1578  for ( i = 0; i < notnull_cnt; i++ )
1579  {
1580  const ND_BOX *ndb = sample_boxes[i];
1581  /* Skip any hard deviants (boxes entirely outside our histo_extent */
1582  if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1583  {
1584  POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1585  sample_boxes[i] = NULL;
1586  continue;
1587  }
1588  /* Expand our new box to fit all the other features. */
1589  nd_box_merge(ndb, &histo_extent_new);
1590  }
1591  /*
1592  * Expand the box slightly (1%) to avoid edge effects
1593  * with objects that are on the boundary
1594  */
1595  nd_box_expand(&histo_extent_new, 0.01);
1596  histo_extent = histo_extent_new;
1597 
1598  /*
1599  * How should we allocate our histogram cells to the
1600  * different dimensions? We can't do it by raw dimensional width,
1601  * because in x/y/z space, the z can have different units
1602  * from the x/y. Similarly for x/y/t space.
1603  * So, we instead calculate how much features overlap
1604  * each other in their dimension to figure out which
1605  * dimensions have useful selectivity characteristics (more
1606  * variability in density) and therefor would find
1607  * more cells useful (to distinguish between dense places and
1608  * homogeneous places).
1609  */
1610  nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1611  sample_distribution);
1612 
1613  /*
1614  * The sample_distribution array now tells us how spread out the
1615  * data is in each dimension, so we use that data to allocate
1616  * the histogram cells we have available.
1617  * At this point, histo_cells_target is the approximate target number
1618  * of cells.
1619  */
1620 
1621  /*
1622  * Some dimensions have basically a uniform distribution, we want
1623  * to allocate no cells to those dimensions, only to dimensions
1624  * that have some interesting differences in data distribution.
1625  * Here we count up the number of interesting dimensions
1626  */
1627  for ( d = 0; d < ndims; d++ )
1628  {
1629  if ( sample_distribution[d] > 0 )
1630  histo_ndims++;
1631  }
1632 
1633  if ( histo_ndims == 0 )
1634  {
1635  /* Special case: all our dimensions had low variability! */
1636  /* We just divide the cells up evenly */
1637  POSTGIS_DEBUG(3, " special case: no axes have variability");
1638  histo_cells_new = 1;
1639  for ( d = 0; d < ndims; d++ )
1640  {
1641  histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1642  if ( ! histo_size[d] )
1643  histo_size[d] = 1;
1644  POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1645  histo_cells_new *= histo_size[d];
1646  }
1647  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1648  }
1649  else
1650  {
1651  /*
1652  * We're going to express the amount of variability in each dimension
1653  * as a proportion of the total variability and allocate cells in that
1654  * dimension relative to that proportion.
1655  */
1656  POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1657  total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1658  POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1659  histo_cells_new = 1; /* For the number of cells in the final histogram */
1660  for ( d = 0; d < ndims; d++ )
1661  {
1662  if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1663  {
1664  histo_size[d] = 1;
1665  }
1666  else /* Interesting dimension */
1667  {
1668  /* How does this dims variability compare to the total? */
1669  float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1670  /*
1671  * Scale the target cells number by the # of dims and ratio,
1672  * then take the appropriate root to get the estimated number of cells
1673  * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1674  */
1675  histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
1676  /* If something goes awry, just give this dim one slot */
1677  if ( ! histo_size[d] )
1678  histo_size[d] = 1;
1679  }
1680  histo_cells_new *= histo_size[d];
1681  }
1682  POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1683  }
1684 
1685  /* Update histo_cells to the actual number of cells we need to allocate */
1686  histo_cells = histo_cells_new;
1687  POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1688 
1689  /*
1690  * Create the histogram (ND_STATS) in the stats memory context
1691  */
1692  old_context = MemoryContextSwitchTo(stats->anl_context);
1693  nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1694  nd_stats = palloc(nd_stats_size);
1695  memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1696  MemoryContextSwitchTo(old_context);
1697 
1698  /* Initialize the #ND_STATS objects */
1699  nd_stats->ndims = ndims;
1700  nd_stats->extent = histo_extent;
1701  nd_stats->sample_features = sample_rows;
1702  nd_stats->table_features = total_rows;
1703  nd_stats->not_null_features = notnull_cnt;
1704  /* Copy in the histogram dimensions */
1705  for ( d = 0; d < ndims; d++ )
1706  nd_stats->size[d] = histo_size[d];
1707 
1708  /*
1709  * Fourth scan:
1710  * o fill histogram values with the proportion of
1711  * features' bbox overlaps: a feature's bvol
1712  * can fully overlap (1) or partially overlap
1713  * (fraction of 1) an histogram cell.
1714  *
1715  * Note that we are filling each cell with the "portion of
1716  * the feature's box that overlaps the cell". So, if we sum
1717  * up the values in the histogram, we could get the
1718  * histogram feature count.
1719  *
1720  */
1721  for ( i = 0; i < notnull_cnt; i++ )
1722  {
1723  const ND_BOX *nd_box;
1724  ND_IBOX nd_ibox;
1725  int at[ND_DIMS];
1726  int d;
1727  double num_cells = 0;
1728  double tmp_volume = 1.0;
1729  double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1730  double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1731  double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1732 
1733  nd_box = sample_boxes[i];
1734  if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1735 
1736  /* Give backend a chance of interrupting us */
1737  vacuum_delay_point();
1738 
1739  /* Find the cells that overlap with this box and put them into the ND_IBOX */
1740  nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1741  memset(at, 0, sizeof(int)*ND_DIMS);
1742 
1743  POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1744  nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1745  nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1746 
1747  for ( d = 0; d < nd_stats->ndims; d++ )
1748  {
1749  /* Initialize the starting values */
1750  at[d] = nd_ibox.min[d];
1751  min[d] = nd_stats->extent.min[d];
1752  max[d] = nd_stats->extent.max[d];
1753  cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1754 
1755  /* What's the volume (area) of this feature's box? */
1756  tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
1757  }
1758 
1759  /* Add feature volume (area) to our total */
1760  total_sample_volume += tmp_volume;
1761 
1762  /*
1763  * Move through all the overlaped histogram cells values and
1764  * add the box overlap proportion to them.
1765  */
1766  do
1767  {
1768  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1769  double ratio;
1770  /* Create a box for this histogram cell */
1771  for ( d = 0; d < nd_stats->ndims; d++ )
1772  {
1773  nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1774  nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1775  }
1776 
1777  /*
1778  * If a feature box is completely inside one cell the ratio will be
1779  * 1.0. If a feature box is 50% in two cells, each cell will get
1780  * 0.5 added on.
1781  */
1782  ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1783  nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1784  num_cells += ratio;
1785  POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1786  POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1787  }
1788  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1789 
1790  /* Keep track of overall number of overlaps counted */
1791  total_cell_count += num_cells;
1792  /* How many features have we added to this histogram? */
1793  histogram_features++;
1794  }
1795 
1796  POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1797  POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1798  POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1799 
1800  /* Error out if we got no sample information */
1801  if ( ! histogram_features )
1802  {
1803  POSTGIS_DEBUG(3, " no stats have been gathered");
1804  elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1805  stats->stats_valid = false;
1806  return;
1807  }
1808 
1809  nd_stats->histogram_features = histogram_features;
1810  nd_stats->histogram_cells = histo_cells;
1811  nd_stats->cells_covered = total_cell_count;
1812 
1813  /* Put this histogram data into the right slot/kind */
1814  if ( mode == 2 )
1815  {
1816  stats_slot = STATISTIC_SLOT_2D;
1817  stats_kind = STATISTIC_KIND_2D;
1818  }
1819  else
1820  {
1821  stats_slot = STATISTIC_SLOT_ND;
1822  stats_kind = STATISTIC_KIND_ND;
1823  }
1824 
1825  /* Write the statistics data */
1826  stats->stakind[stats_slot] = stats_kind;
1827  stats->staop[stats_slot] = InvalidOid;
1828  stats->stanumbers[stats_slot] = (float4*)nd_stats;
1829  stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1830  stats->stanullfrac = (float4)null_cnt/sample_rows;
1831  stats->stawidth = total_width/notnull_cnt;
1832  stats->stadistinct = -1.0;
1833  stats->stats_valid = true;
1834 
1835  POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1836  POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1837  POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1838  POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1839  POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1840  POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1841  POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1842  /*
1843  POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1844  */
1845 
1846  return;
1847 }
1848 
1849 
1867 static void
1868 compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1869  int sample_rows, double total_rows)
1870 {
1871  /* 2D Mode */
1872  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1873 
1874  if (stats->stats_valid)
1875  {
1876  /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1877  compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1878  }
1879 }
1880 
1881 
1910 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1911 {
1912  VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1913  Form_pg_attribute attr = stats->attr;
1914 
1915  POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
1916 
1917  /* If the attstattarget column is negative, use the default value */
1918  /* NB: it is okay to scribble on stats->attr since it's a copy */
1919  if (attr->attstattarget < 0)
1920  attr->attstattarget = default_statistics_target;
1921 
1922  POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1923 
1924  /* Setup the minimum rows and the algorithm function.
1925  * 300 matches the default value set in
1926  * postgresql/src/backend/commands/analyze.c */
1927  stats->minrows = 300 * stats->attr->attstattarget;
1928  stats->compute_stats = compute_gserialized_stats;
1929 
1930  POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1931 
1932  /* Indicate we are done successfully */
1933  PG_RETURN_BOOL(true);
1934 }
1935 
1948 static float8
1949 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1950 {
1951  int d; /* counter */
1952  float8 selectivity;
1953  ND_BOX nd_box;
1954  ND_IBOX nd_ibox;
1955  int at[ND_DIMS];
1956  double cell_size[ND_DIMS];
1957  double min[ND_DIMS];
1958  double max[ND_DIMS];
1959  double total_count = 0.0;
1960  int ndims_max;
1961 
1962  /* Calculate the overlap of the box on the histogram */
1963  if ( ! nd_stats )
1964  {
1965  elog(NOTICE, " estimate_selectivity called with null input");
1966  return FALLBACK_ND_SEL;
1967  }
1968 
1969  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1970 
1971  /* Initialize nd_box. */
1972  nd_box_from_gbox(box, &nd_box);
1973 
1974  /*
1975  * To return 2D stats on an ND sample, we need to make the
1976  * 2D box cover the full range of the other dimensions in the
1977  * histogram.
1978  */
1979  POSTGIS_DEBUGF(3, " mode: %d", mode);
1980  if ( mode == 2 )
1981  {
1982  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1983  ndims_max = 2;
1984  }
1985 
1986  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1987  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1988 
1989  /*
1990  * Search box completely misses histogram extent?
1991  * We have to intersect in all N dimensions or else we have
1992  * zero interaction under the &&& operator. It's important
1993  * to short circuit in this case, as some of the tests below
1994  * will return junk results when run on non-intersecting inputs.
1995  */
1996  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1997  {
1998  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1999  return 0.0;
2000  }
2001 
2002  /* Search box completely contains histogram extent! */
2003  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
2004  {
2005  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
2006  return 1.0;
2007  }
2008 
2009  /* Calculate the overlap of the box on the histogram */
2010  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2011  {
2012  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2013  return FALLBACK_ND_SEL;
2014  }
2015 
2016  /* Work out some measurements of the histogram */
2017  for ( d = 0; d < nd_stats->ndims; d++ )
2018  {
2019  /* Cell size in each dim */
2020  min[d] = nd_stats->extent.min[d];
2021  max[d] = nd_stats->extent.max[d];
2022  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2023  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2024 
2025  /* Initialize the counter */
2026  at[d] = nd_ibox.min[d];
2027  }
2028 
2029  /* Move through all the overlap values and sum them */
2030  do
2031  {
2032  float cell_count, ratio;
2033  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2034 
2035  /* We have to pro-rate partially overlapped cells. */
2036  for ( d = 0; d < nd_stats->ndims; d++ )
2037  {
2038  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2039  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2040  }
2041 
2042  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2043  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2044 
2045  /* Add the pro-rated count for this cell to the overall total */
2046  total_count += cell_count * ratio;
2047  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2048  }
2049  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2050 
2051  /* Scale by the number of features in our histogram to get the proportion */
2052  selectivity = total_count / nd_stats->histogram_features;
2053 
2054  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2055  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2056  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2057  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2058 
2059  /* Prevent rounding overflows */
2060  if (selectivity > 1.0) selectivity = 1.0;
2061  else if (selectivity < 0.0) selectivity = 0.0;
2062 
2063  return selectivity;
2064 }
2065 
2066 
2067 
2073 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
2074 {
2075  Oid table_oid = PG_GETARG_OID(0);
2076  text *att_text = PG_GETARG_TEXT_P(1);
2077  ND_STATS *nd_stats;
2078  char *str;
2079  text *json;
2080  int mode = 2; /* default to 2D mode */
2081  bool only_parent = false; /* default to whole tree stats */
2082 
2083  /* Check if we've been asked to not use 2d mode */
2084  if ( ! PG_ARGISNULL(2) )
2085  mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
2086 
2087  /* Retrieve the stats object */
2088  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
2089  if ( ! nd_stats )
2090  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2091 
2092  /* Convert to JSON */
2093  str = nd_stats_to_json(nd_stats);
2094  json = cstring_to_text(str);
2095  pfree(str);
2096  pfree(nd_stats);
2097  PG_RETURN_TEXT_P(json);
2098 }
2099 
2100 
2106 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
2107 {
2108  Oid table_oid = PG_GETARG_OID(0);
2109  text *att_text = PG_GETARG_TEXT_P(1);
2110  Datum geom_datum = PG_GETARG_DATUM(2);
2111  GBOX gbox; /* search box read from gserialized datum */
2112  float8 selectivity = 0;
2113  ND_STATS *nd_stats;
2114  int mode = 2; /* 2D mode by default */
2115 
2116  /* Check if we've been asked to not use 2d mode */
2117  if ( ! PG_ARGISNULL(3) )
2118  mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
2119 
2120  /* Retrieve the stats object */
2121  nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
2122 
2123  if ( ! nd_stats )
2124  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
2125 
2126  /* Calculate the gbox */
2127  if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2128  elog(ERROR, "unable to calculate bounding box from geometry");
2129 
2130  POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
2131 
2132  /* Do the estimation */
2133  selectivity = estimate_selectivity(&gbox, nd_stats, mode);
2134 
2135  pfree(nd_stats);
2136  PG_RETURN_FLOAT8(selectivity);
2137 }
2138 
2139 
2145 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
2146 {
2147  Oid table_oid1 = PG_GETARG_OID(0);
2148  text *att_text1 = PG_GETARG_TEXT_P(1);
2149  Oid table_oid2 = PG_GETARG_OID(2);
2150  text *att_text2 = PG_GETARG_TEXT_P(3);
2151  ND_STATS *nd_stats1, *nd_stats2;
2152  float8 selectivity = 0;
2153  int mode = 2; /* 2D mode by default */
2154 
2155 
2156  /* Retrieve the stats object */
2157  nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
2158  nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
2159 
2160  if ( ! nd_stats1 )
2161  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2162 
2163  if ( ! nd_stats2 )
2164  elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2165 
2166  /* Check if we've been asked to not use 2d mode */
2167  if ( ! PG_ARGISNULL(4) )
2168  {
2169  text *modetxt = PG_GETARG_TEXT_P(4);
2170  char *modestr = text_to_cstring(modetxt);
2171  if ( modestr[0] == 'N' )
2172  mode = 0;
2173  }
2174 
2175  /* Do the estimation */
2176  selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2177 
2178  pfree(nd_stats1);
2179  pfree(nd_stats2);
2180  PG_RETURN_FLOAT8(selectivity);
2181 }
2182 
2188 Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2189 {
2190  PG_RETURN_DATUM(DirectFunctionCall5(
2192  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2193  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2194  Int32GetDatum(2) /* 2-D mode */
2195  ));
2196 }
2197 
2203 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2204 {
2205  PG_RETURN_DATUM(DirectFunctionCall5(
2207  PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2208  PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2209  Int32GetDatum(0) /* N-D mode */
2210  ));
2211 }
2212 
2213 
2228 float8
2229 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2230 {
2231  VariableStatData vardata;
2232  Node *other = NULL;
2233  bool varonleft;
2234  ND_STATS *nd_stats = NULL;
2235 
2236  GBOX search_box;
2237  float8 selectivity = 0;
2238  Const *otherConst;
2239 
2240  POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2241 
2242  if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2243  {
2244  POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2245  return DEFAULT_ND_SEL;
2246  }
2247 
2248  if (!IsA(other, Const))
2249  {
2250  ReleaseVariableStats(vardata);
2251  POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2252  return DEFAULT_ND_SEL;
2253  }
2254 
2255  otherConst = (Const*)other;
2256  if ((!otherConst) || otherConst->constisnull)
2257  {
2258  ReleaseVariableStats(vardata);
2259  POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2260  return DEFAULT_ND_SEL;
2261  }
2262 
2263  if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2264  {
2265  ReleaseVariableStats(vardata);
2266  POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2267  return 0.0;
2268  }
2269 
2270  if (!vardata.statsTuple)
2271  {
2272  POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2273  return DEFAULT_ND_SEL;
2274  }
2275 
2276  nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2277  ReleaseVariableStats(vardata);
2278  selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2279  pfree(nd_stats);
2280  return selectivity;
2281 }
2282 
2284 Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2285 {
2286  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2287  // Oid operator_oid = PG_GETARG_OID(1);
2288  List *args = (List *) PG_GETARG_POINTER(2);
2289  int varRelid = PG_GETARG_INT32(3);
2290  int mode = PG_GETARG_INT32(4);
2291  float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2292  POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2293  PG_RETURN_FLOAT8(selectivity);
2294 }
2295 
2296 
2297 
2304 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2305 {
2306  char *nsp = NULL;
2307  char *tbl = NULL;
2308  text *col = NULL;
2309  char *nsp_tbl = NULL;
2310  Oid tbl_oid, idx_oid = 0;
2311  ND_STATS *nd_stats;
2312  GBOX *gbox = NULL;
2313  bool only_parent = false;
2314  int key_type, att_num;
2315  size_t sz;
2316 
2317  /* We need to initialize the internal cache to access it later via postgis_oid() */
2318  postgis_initialize_cache(fcinfo);
2319 
2320  if ( PG_NARGS() == 4 )
2321  {
2322  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2323  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2324  col = PG_GETARG_TEXT_P(2);
2325  only_parent = PG_GETARG_BOOL(3);
2326  sz = strlen(nsp) + strlen(tbl) + 6;
2327  nsp_tbl = palloc(sz);
2328  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2329  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2330  pfree(nsp_tbl);
2331  }
2332  else if ( PG_NARGS() == 3 )
2333  {
2334  nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2335  tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2336  col = PG_GETARG_TEXT_P(2);
2337  sz = strlen(nsp) + strlen(tbl) + 6;
2338  nsp_tbl = palloc(sz);
2339  snprintf(nsp_tbl, sz, "\"%s\".\"%s\"", nsp, tbl);
2340  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2341  pfree(nsp_tbl);
2342  }
2343  else if ( PG_NARGS() == 2 )
2344  {
2345  tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2346  col = PG_GETARG_TEXT_P(1);
2347  sz = strlen(tbl) + 3;
2348  nsp_tbl = palloc(sz);
2349  snprintf(nsp_tbl, sz, "\"%s\"", tbl);
2350  tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2351  pfree(nsp_tbl);
2352  }
2353  else
2354  {
2355  elog(ERROR, "estimated_extent() called with wrong number of arguments");
2356  PG_RETURN_NULL();
2357  }
2358 
2359  /* Read the extent from the head of the spatial index, if there is one */
2360 
2361  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2362  if (idx_oid)
2363  {
2364  /* TODO: how about only_parent ? */
2365  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2366  POSTGIS_DEBUGF(2, "index for \"%s.%s\" exists, reading gbox from there", tbl, text_to_cstring(col));
2367  if ( ! gbox ) PG_RETURN_NULL();
2368  }
2369  else
2370  {
2371  POSTGIS_DEBUGF(2, "index for \"%s.%s\" does not exist", tbl, text_to_cstring(col));
2372 
2373  /* Fall back to reading the stats, if no index is found */
2374 
2375  /* Estimated extent only returns 2D bounds, so use mode 2 */
2376  nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
2377 
2378  /* Error out on no stats */
2379  if ( ! nd_stats ) {
2380  elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text_to_cstring(col));
2381  PG_RETURN_NULL();
2382  }
2383 
2384  /* Construct the box */
2385  gbox = palloc(sizeof(GBOX));
2386  FLAGS_SET_GEODETIC(gbox->flags, 0);
2387  FLAGS_SET_Z(gbox->flags, 0);
2388  FLAGS_SET_M(gbox->flags, 0);
2389  gbox->xmin = nd_stats->extent.min[0];
2390  gbox->xmax = nd_stats->extent.max[0];
2391  gbox->ymin = nd_stats->extent.min[1];
2392  gbox->ymax = nd_stats->extent.max[1];
2393  pfree(nd_stats);
2394  }
2395 
2396  PG_RETURN_POINTER(gbox);
2397 }
2398 
2406 Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2407 {
2408  if ( PG_NARGS() == 3 )
2409  {
2410  PG_RETURN_DATUM(
2411  DirectFunctionCall3(gserialized_estimated_extent,
2412  PG_GETARG_DATUM(0),
2413  PG_GETARG_DATUM(1),
2414  PG_GETARG_DATUM(2)));
2415  }
2416  else if ( PG_NARGS() == 2 )
2417  {
2418  PG_RETURN_DATUM(
2419  DirectFunctionCall2(gserialized_estimated_extent,
2420  PG_GETARG_DATUM(0),
2421  PG_GETARG_DATUM(1)));
2422  }
2423 
2424  elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2425  PG_RETURN_NULL();
2426 }
2427 
2428 /************************************************************************/
2429 
2430 static Oid
2431 table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
2432 {
2433  Relation tbl_rel;
2434  ListCell *lc;
2435  List *idx_list;
2436  Oid result = InvalidOid;
2437  char *colname = text_to_cstring(col);
2438 
2439  /* Lookup our spatial index key types */
2440  Oid b2d_oid = postgis_oid(BOX2DFOID);
2441  Oid gdx_oid = postgis_oid(BOX3DOID);
2442 
2443  if (!(b2d_oid && gdx_oid))
2444  return InvalidOid;
2445 
2446  tbl_rel = RelationIdGetRelation(tbl_oid);
2447  idx_list = RelationGetIndexList(tbl_rel);
2448  RelationClose(tbl_rel);
2449 
2450  /* For each index associated with this table... */
2451  foreach(lc, idx_list)
2452  {
2453  Form_pg_class idx_form;
2454  HeapTuple idx_tup;
2455  int idx_relam;
2456  Oid idx_oid = lfirst_oid(lc);
2457 
2458  idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2459  if (!HeapTupleIsValid(idx_tup))
2460  elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
2461  idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2462  idx_relam = idx_form->relam;
2463  ReleaseSysCache(idx_tup);
2464 
2465  /* Does the index use a GIST access method? */
2466  if (idx_relam == GIST_AM_OID)
2467  {
2468  Form_pg_attribute att;
2469  Oid atttypid;
2470  int attnum;
2471  /* Is the index on the column name we are looking for? */
2472  HeapTuple att_tup = SearchSysCache2(ATTNAME,
2473  ObjectIdGetDatum(idx_oid),
2474  PointerGetDatum(colname));
2475  if (!HeapTupleIsValid(att_tup))
2476  continue;
2477 
2478  att = (Form_pg_attribute) GETSTRUCT(att_tup);
2479  atttypid = att->atttypid;
2480  attnum = att->attnum;
2481  ReleaseSysCache(att_tup);
2482 
2483  /* Is the column actually spatial? */
2484  if (b2d_oid == atttypid || gdx_oid == atttypid)
2485  {
2486  /* Save result, clean up, and break out */
2487  result = idx_oid;
2488  if (att_num)
2489  *att_num = attnum;
2490  if (key_type)
2491  *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
2492  break;
2493  }
2494  }
2495  }
2496  return result;
2497 }
2498 
2499 static GBOX *
2500 spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
2501 {
2502  BOX2DF *bounds_2df = NULL;
2503  GIDX *bounds_gidx = NULL;
2504  GBOX *gbox = NULL;
2505  Relation idx_rel;
2506  Buffer buffer;
2507  Page page;
2508  OffsetNumber offset;
2509  unsigned long offset_max;
2510 
2511  if (!idx_oid)
2512  return NULL;
2513 
2514  idx_rel = index_open(idx_oid, AccessShareLock);
2515  buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2516  page = (Page) BufferGetPage(buffer);
2517  offset = FirstOffsetNumber;
2518  offset_max = PageGetMaxOffsetNumber(page);
2519  while (offset <= offset_max)
2520  {
2521  ItemId iid = PageGetItemId(page, offset);
2522  IndexTuple ituple;
2523  if (!iid)
2524  {
2525  ReleaseBuffer(buffer);
2526  index_close(idx_rel, AccessShareLock);
2527  return NULL;
2528  }
2529  ituple = (IndexTuple) PageGetItem(page, iid);
2530  if (!GistTupleIsInvalid(ituple))
2531  {
2532  bool isnull;
2533  Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2534  if (!isnull)
2535  {
2536  if (key_type == STATISTIC_SLOT_2D)
2537  {
2538  BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2539  if (bounds_2df)
2540  box2df_merge(bounds_2df, b);
2541  else
2542  bounds_2df = box2df_copy(b);
2543  }
2544  else
2545  {
2546  GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2547  if (bounds_gidx)
2548  gidx_merge(&bounds_gidx, b);
2549  else
2550  bounds_gidx = gidx_copy(b);
2551  }
2552  }
2553  }
2554  offset++;
2555  }
2556 
2557  ReleaseBuffer(buffer);
2558  index_close(idx_rel, AccessShareLock);
2559 
2560  if (key_type == STATISTIC_SLOT_2D && bounds_2df)
2561  {
2562  if (box2df_is_empty(bounds_2df))
2563  return NULL;
2564  gbox = gbox_new(0);
2565  box2df_to_gbox_p(bounds_2df, gbox);
2566  }
2567  else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
2568  {
2569  if (gidx_is_unknown(bounds_gidx))
2570  return NULL;
2571  gbox = gbox_new(0);
2572  gbox_from_gidx(bounds_gidx, gbox, 0);
2573  }
2574  else
2575  return NULL;
2576 
2577  return gbox;
2578 }
2579 
2580 /*
2581 CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2582  RETURNS box2d
2583  AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2584  LANGUAGE 'c' STABLE STRICT;
2585 */
2586 
2588 Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
2589 {
2590  GBOX *gbox = NULL;
2591  int key_type;
2592  int att_num;
2593  Oid tbl_oid = PG_GETARG_DATUM(0);
2594  text *col = PG_GETARG_TEXT_P(1);
2595  Oid idx_oid;
2596 
2597  if(!tbl_oid)
2598  PG_RETURN_NULL();
2599 
2600  /* We need to initialize the internal cache to access it later via postgis_oid() */
2601  postgis_initialize_cache(fcinfo);
2602 
2603  idx_oid = table_get_spatial_index(tbl_oid, col, &key_type, &att_num);
2604  if (!idx_oid)
2605  PG_RETURN_NULL();
2606 
2607  gbox = spatial_index_read_extent(idx_oid, key_type, att_num);
2608  if (!gbox)
2609  PG_RETURN_NULL();
2610  else
2611  PG_RETURN_POINTER(gbox);
2612 }
2613 
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition: gbox.c:32
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition: gbox.c:197
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition: gbox.c:392
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *gbox)
Read the box from the GSERIALIZED or calculate it if necessary.
Definition: gserialized.c:65
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define NUM_BINS
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define SDFACTOR
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition: liblwgeom.h:110
#define FLAGS_GET_Z(flags)
Definition: liblwgeom.h:179
#define FLAGS_GET_M(flags)
Definition: liblwgeom.h:180
#define FLAGS_SET_GEODETIC(flags, value)
Definition: liblwgeom.h:189
#define FLAGS_SET_M(flags, value)
Definition: liblwgeom.h:187
#define FLAGS_SET_Z(flags, value)
Definition: liblwgeom.h:186
#define FLAGS_GET_GEODETIC(flags)
Definition: liblwgeom.h:182
This library is the generic geometry handling section of PostGIS.
#define str(s)
args
Definition: ovdump.py:45
Datum buffer(PG_FUNCTION_ARGS)
char * text_to_cstring(const text *textptr)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
Definition: stringbuffer.c:217
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
Definition: stringbuffer.c:33
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
Definition: stringbuffer.c:76
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
Definition: stringbuffer.c:124
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
Definition: stringbuffer.h:88
double ymax
Definition: liblwgeom.h:343
double zmax
Definition: liblwgeom.h:345
double xmax
Definition: liblwgeom.h:341
double zmin
Definition: liblwgeom.h:344
double mmax
Definition: liblwgeom.h:347
double ymin
Definition: liblwgeom.h:342
double xmin
Definition: liblwgeom.h:340
double mmin
Definition: liblwgeom.h:346
lwflags_t flags
Definition: liblwgeom.h:339
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int max[ND_DIMS]
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]
N-dimensional statistics structure.