PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
gserialized_estimate.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * PostGIS is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * PostGIS is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18 *
19 **********************************************************************
20 *
21 * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
22 * Copyright 2025 (C) Darafei Praliaskouski <me@komzpa.net>
23 *
24 **********************************************************************/
25
26/**********************************************************************
27 THEORY OF OPERATION
28
29The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
30calculates (compute_gserialized_stats_mode) two histograms of occurrences of
31features, once for the 2D domain (and the && operator) one for the
32ND domain (and the &&& operator).
33
34Queries in PostgreSQL call into the selectivity sub-system to find out
35the relative effectiveness of different clauses in sub-setting
36relations. Queries with constant arguments call gserialized_gist_sel,
37queries with relations on both sides call gserialized_gist_joinsel.
38
39gserialized_gist_sel sums up the values in the histogram that overlap
40the constant search box.
41
42gserialized_gist_joinsel sums up the product of the overlapping
43cells in each relation's histogram.
44
45Depending on the operator and type, the mode of selectivity calculation
46will be 2D or ND.
47
48- geometry && geometry ==> 2D
49- geometry &&& geometry ==> ND
50- geography && geography ==> ND
51
52The 2D mode is put in effect by retrieving the 2D histogram from the
53statistics cache and then allowing the generic ND calculations to
54go to work.
55
56TO DO: More testing and examination of the &&& operator and mixed
57dimensionality cases. (2D geometry) &&& (3D column), etc.
58
59**********************************************************************/
60
61#include "postgres.h"
62
63#include "access/genam.h"
64#include "access/gin.h"
65#include "access/gist.h"
66#include "access/gist_private.h"
67#include "access/gistscan.h"
68#include "access/detoast.h" /* For toast_raw_datum_size */
69#include "utils/datum.h"
70#include "access/heapam.h"
71#include "catalog/index.h"
72#include "catalog/pg_am.h"
73#include "miscadmin.h"
74#include "storage/lmgr.h"
75#include "catalog/namespace.h"
76#include "catalog/indexing.h"
77
78#include "utils/regproc.h"
79#include "utils/varlena.h"
80
81#include "utils/builtins.h"
82#include "utils/datum.h"
83#include "utils/snapmgr.h"
84#include "utils/fmgroids.h"
85#include "funcapi.h"
86#include "access/heapam.h"
87#include "catalog/pg_type.h"
88#include "access/relscan.h"
89
90#include "executor/spi.h"
91#include "fmgr.h"
92#include "commands/vacuum.h"
93#include "nodes/pathnodes.h"
94
95#include "parser/parsetree.h"
96#include "utils/array.h"
97#include "utils/lsyscache.h"
98#include "utils/builtins.h"
99#include "utils/syscache.h"
100#include "utils/rel.h"
101#include "utils/selfuncs.h"
102
103#include "../postgis_config.h"
104
105#include "access/htup_details.h"
106
107#include "stringbuffer.h"
108#include "liblwgeom.h"
109#include "lwgeodetic.h"
110#include "lwgeom_pg.h" /* For debugging macros. */
111#include "gserialized_gist.h" /* For index common functions */
113
114#include <math.h>
115#include <limits.h>
116#if HAVE_IEEEFP_H
117#include <ieeefp.h>
118#endif
119#include <float.h>
120#include <string.h>
121#include <stdio.h>
122#include <ctype.h>
123
124
125/************************************************************************/
126
127
128/* Prototypes */
129Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
130Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
131Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
132Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
133Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
134Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
135Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
136Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
137Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
138Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
139Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
140Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
141
142/* Local prototypes */
143static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum);
144static GBOX *spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
145
146/* Other prototypes */
147float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
148float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
149
150/* Old Prototype */
151Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
152
153/*
154 * Assign a number to the n-dimensional statistics kind
155 *
156 * tgl suggested:
157 *
158 * 1-100: reserved for assignment by the core Postgres project
159 * 100-199: reserved for assignment by PostGIS
160 * 200-9999: reserved for other globally-known stats kinds
161 * 10000-32767: reserved for private site-local use
162 */
163#define STATISTIC_KIND_ND 102
164#define STATISTIC_KIND_2D 103
165
166/*
167 * Postgres does not pin its slots and uses them as they come.
168 * We need to preserve its Correlation for brin to work
169 * 0 may be MCV
170 * 1 may be Histogram
171 * 2 may be Correlation
172 * We take 3 and 4.
173 */
174#define STATISTIC_SLOT_ND 3
175#define STATISTIC_SLOT_2D 4
176
177/*
178* The SD factor restricts the side of the statistics histogram
179* based on the standard deviation of the extent of the data.
180* SDFACTOR is the number of standard deviations from the mean
181* the histogram will extend.
182*/
183#define SDFACTOR 3.25
184
191#define MIN_DIMENSION_WIDTH 0.000000001
192
197#define MAX_DIMENSION_WIDTH 1.0E+20
198
202#define DEFAULT_ND_SEL 0.0001
203#define DEFAULT_ND_JOINSEL 0.001
204
208#define FALLBACK_ND_SEL 0.2
209#define FALLBACK_ND_JOINSEL 0.3
210
211typedef struct {
212 /* Saved state from std_typanalyze() */
213 AnalyzeAttrComputeStatsFunc std_compute_stats;
216
223static int
224gbox_ndims(const GBOX* gbox)
225{
226 int dims = 2;
227 if ( FLAGS_GET_GEODETIC(gbox->flags) )
228 return 3;
229 if ( FLAGS_GET_Z(gbox->flags) )
230 dims++;
231 if ( FLAGS_GET_M(gbox->flags) )
232 dims++;
233 return dims;
234}
235
241static int
242text_p_get_mode(const text *txt)
243{
244 int mode = 2;
245 char *modestr;
246 if (VARSIZE_ANY_EXHDR(txt) <= 0)
247 return mode;
248 modestr = (char *)VARDATA(txt);
249 if (modestr[0] == 'N')
250 mode = 0;
251 return mode;
252}
253
257static int
258cmp_int (const void *a, const void *b)
259{
260 int ia = *((const int*)a);
261 int ib = *((const int*)b);
262
263 if ( ia == ib )
264 return 0;
265 else if ( ia > ib )
266 return 1;
267 else
268 return -1;
269}
270
275// static int
276// range_quintile(int *vals, int nvals)
277// {
278// qsort(vals, nvals, sizeof(int), cmp_int);
279// return vals[4*nvals/5] - vals[nvals/5];
280// }
281
285static int
286range_full(int *vals, int nvals)
287{
288 qsort(vals, nvals, sizeof(int), cmp_int);
289 return vals[nvals-1] - vals[0];
290}
291
295static double
296total_double(const double *vals, int nvals)
297{
298 int i;
299 float total = 0;
300 /* Calculate total */
301 for (i = 0; i < nvals; i++)
302 total += vals[i];
303
304 return total;
305}
306
307#if POSTGIS_DEBUG_LEVEL >= 3
308
312static int
313total_int(const int *vals, int nvals)
314{
315 int i;
316 int total = 0;
317 /* Calculate total */
318 for ( i = 0; i < nvals; i++ )
319 total += vals[i];
320
321 return total;
322}
323
327static double
328avg(const int *vals, int nvals)
329{
330 int t = total_int(vals, nvals);
331 return (double)t / (double)nvals;
332}
333
337static double
338stddev(const int *vals, int nvals)
339{
340 int i;
341 double sigma2 = 0;
342 double mean = avg(vals, nvals);
343
344 /* Calculate sigma2 */
345 for ( i = 0; i < nvals; i++ )
346 {
347 double v = (double)(vals[i]);
348 sigma2 += (mean - v) * (mean - v);
349 }
350 return sqrt(sigma2 / nvals);
351}
352#endif /* POSTGIS_DEBUG_LEVEL >= 3 */
353
357static char*
358nd_box_to_json(const ND_BOX *nd_box, int ndims)
359{
360 char *rv;
361 int i;
363
364 stringbuffer_append(sb, "{\"min\":[");
365 for ( i = 0; i < ndims; i++ )
366 {
367 if ( i ) stringbuffer_append(sb, ",");
368 stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
369 }
370 stringbuffer_append(sb, "],\"max\":[");
371 for ( i = 0; i < ndims; i++ )
372 {
373 if ( i ) stringbuffer_append(sb, ",");
374 stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
375 }
376 stringbuffer_append(sb, "]}");
377
380 return rv;
381}
382
383
388static char*
390{
391 char *json_extent, *str;
392 int d;
394 int ndims = (int)roundf(nd_stats->ndims);
395
396 stringbuffer_append(sb, "{");
397 stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
398
399 /* Size */
400 stringbuffer_append(sb, "\"size\":[");
401 for ( d = 0; d < ndims; d++ )
402 {
403 if ( d ) stringbuffer_append(sb, ",");
404 stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
405 }
406 stringbuffer_append(sb, "],");
407
408 /* Extent */
409 json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
410 stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
411 pfree(json_extent);
412
413 stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
414 stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
415 stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
416 stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
417 stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
418 stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
419 stringbuffer_append(sb, "}");
420
423 return str;
424}
425
426
432static char*
434{
435 char *rv;
436 int j, k;
437 int sizex = (int)roundf(stats->size[0]);
438 int sizey = (int)roundf(stats->size[1]);
440
441 for ( k = 0; k < sizey; k++ )
442 {
443 for ( j = 0; j < sizex; j++ )
444 {
445 stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
446 }
447 stringbuffer_append(sb, "\n");
448 }
449
452 return rv;
453}
454
455
457static int
458nd_box_merge(const ND_BOX *source, ND_BOX *target)
459{
460 int d;
461 for ( d = 0; d < ND_DIMS; d++ )
462 {
463 target->min[d] = Min(target->min[d], source->min[d]);
464 target->max[d] = Max(target->max[d], source->max[d]);
465 }
466 return true;
467}
468
470static int
472{
473 memset(a, 0, sizeof(ND_BOX));
474 return true;
475}
476
482static int
484{
485 int d;
486 for ( d = 0; d < ND_DIMS; d++ )
487 {
488 a->min[d] = FLT_MAX;
489 a->max[d] = -1 * FLT_MAX;
490 }
491 return true;
492}
493
495static void
496nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
497{
498 volatile int d = 0;
499 POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
500
501 nd_box_init(nd_box);
502 nd_box->min[d] = gbox->xmin;
503 nd_box->max[d] = gbox->xmax;
504 d++;
505 nd_box->min[d] = gbox->ymin;
506 nd_box->max[d] = gbox->ymax;
507 d++;
508 if ( FLAGS_GET_GEODETIC(gbox->flags) )
509 {
510 nd_box->min[d] = gbox->zmin;
511 nd_box->max[d] = gbox->zmax;
512 return;
513 }
514 if ( FLAGS_GET_Z(gbox->flags) )
515 {
516 nd_box->min[d] = gbox->zmin;
517 nd_box->max[d] = gbox->zmax;
518 d++;
519 }
520 if ( FLAGS_GET_M(gbox->flags) )
521 {
522 nd_box->min[d] = gbox->mmin;
523 nd_box->max[d] = gbox->mmax;
524 d++;
525 }
526 return;
527}
528
532static int
533nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
534{
535 int d;
536 for ( d = 0; d < ndims; d++ )
537 {
538 if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
539 return false;
540 }
541 return true;
542}
543
547static int
548nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
549{
550 int d;
551 for ( d = 0; d < ndims; d++ )
552 {
553 if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
554 return false;
555 }
556 return true;
557}
558
563static int
564nd_box_expand(ND_BOX *nd_box, double expansion_factor)
565{
566 int d;
567 double size;
568 for ( d = 0; d < ND_DIMS; d++ )
569 {
570 size = nd_box->max[d] - nd_box->min[d];
571 /* Avoid expanding boxes that are either too wide or too narrow*/
572 if (size < MIN_DIMENSION_WIDTH || size > MAX_DIMENSION_WIDTH)
573 continue;
574 nd_box->min[d] -= size * expansion_factor / 2;
575 nd_box->max[d] += size * expansion_factor / 2;
576 }
577 return true;
578}
579
584static inline int
585nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
586{
587 int d;
588
589 POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
590
591 /* Initialize ibox */
592 memset(nd_ibox, 0, sizeof(ND_IBOX));
593
594 /* In each dimension... */
595 for ( d = 0; d < nd_stats->ndims; d++ )
596 {
597 double smin = nd_stats->extent.min[d];
598 double smax = nd_stats->extent.max[d];
599 double width = smax - smin;
600
601 if (width < MIN_DIMENSION_WIDTH)
602 {
603 nd_ibox->min[d] = nd_ibox->max[d] = nd_stats->extent.min[d];
604 }
605 else
606 {
607 int size = (int)roundf(nd_stats->size[d]);
608
609 /* ... find cells the box overlaps with in this dimension */
610 nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
611 nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
612
613 POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
614 POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
615
616 /* Push any out-of range values into range */
617 nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
618 nd_ibox->max[d] = Min(nd_ibox->max[d], size - 1);
619 }
620 }
621 return true;
622}
623
624/* How many bins shall we use in figuring out the distribution? */
625#define MAX_NUM_BINS 50
626#define BIN_MIN_SIZE 10
627
643static int
644nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
645{
646 int d, i, k, range;
647 int *counts;
648 double smin, smax; /* Spatial min, spatial max */
649 double swidth; /* Spatial width of dimension */
650#if POSTGIS_DEBUG_LEVEL >= 3
651 double average, sdev, sdev_ratio;
652#endif
653 int bmin, bmax; /* Bin min, bin max */
654 const ND_BOX *ndb;
655
656 int num_bins = Min(Max(2, num_boxes/BIN_MIN_SIZE), MAX_NUM_BINS);
657 counts = palloc0(num_bins * sizeof(int));
658
659 /* For each dimension... */
660 for ( d = 0; d < ndims; d++ )
661 {
662 /* Initialize counts for this dimension */
663 memset(counts, 0, num_bins * sizeof(int));
664
665
666 smin = extent->min[d];
667 smax = extent->max[d];
668 swidth = smax - smin;
669
670 /* Don't try and calculate distribution of overly narrow */
671 /* or overly wide dimensions. Here we're being pretty geographical, */
672 /* expecting "normal" planar or geographic coordinates. */
673 /* Otherwise we have to "handle" +/- Inf bounded features and */
674 /* the assumptions needed for that are as bad as this hack. */
675 if ( swidth < MIN_DIMENSION_WIDTH || swidth > MAX_DIMENSION_WIDTH )
676 {
677 distribution[d] = 0;
678 continue;
679 }
680
681 /* Sum up the overlaps of each feature with the dimensional bins */
682 for ( i = 0; i < num_boxes; i++ )
683 {
684 double minoffset, maxoffset;
685
686 /* Skip null entries */
687 ndb = nd_boxes[i];
688 if ( ! ndb ) continue;
689
690 /* Where does box fall relative to the working range */
691 minoffset = ndb->min[d] - smin;
692 maxoffset = ndb->max[d] - smin;
693
694 /* Skip boxes that our outside our working range */
695 if ( minoffset < 0 || minoffset > swidth ||
696 maxoffset < 0 || maxoffset > swidth )
697 {
698 continue;
699 }
700
701 /* What bins does this range correspond to? */
702 bmin = floor(num_bins * minoffset / swidth);
703 bmax = floor(num_bins * maxoffset / swidth);
704
705 /* Should only happen when maxoffset==swidth */
706 if (bmax >= num_bins)
707 bmax = num_bins-1;
708
709 POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
710
711 /* Increment the counts in all the bins this feature overlaps */
712 for ( k = bmin; k <= bmax; k++ )
713 {
714 counts[k] += 1;
715 }
716
717 }
718
719 /* How dispersed is the distribution of features across bins? */
720 // range = range_quintile(counts, num_bins);
721 range = range_full(counts, num_bins);
722
723#if POSTGIS_DEBUG_LEVEL >= 3
724 average = avg(counts, num_bins);
725 sdev = stddev(counts, num_bins);
726 sdev_ratio = sdev/average;
727
728 POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
729 POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
730 POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
731 POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
732#endif
733
734 distribution[d] = range;
735 }
736
737 pfree(counts);
738
739 return true;
740}
741
747static inline int
748nd_increment(ND_IBOX *ibox, int ndims, int *counter)
749{
750 int d = 0;
751
752 while (d < ndims)
753 {
754 if (counter[d] < ibox->max[d])
755 {
756 counter[d] += 1;
757 break;
758 }
759 counter[d] = ibox->min[d];
760 d++;
761 }
762 /* That's it, cannot increment any more! */
763 if (d == ndims)
764 return false;
765
766 /* Increment complete! */
767 return true;
768}
769
770static ND_STATS*
771pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
772{
773 int stats_kind = STATISTIC_KIND_ND;
774 int rv;
775 ND_STATS *nd_stats;
776
777 /* If we're in 2D mode, set the kind appropriately */
778 if ( mode == 2 ) stats_kind = STATISTIC_KIND_2D;
779
780 /* Then read the geom status histogram from that */
781 {
782 AttStatsSlot sslot;
783 rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
784 ATTSTATSSLOT_NUMBERS);
785 if ( ! rv ) {
786 POSTGIS_DEBUGF(2, "no slot of kind %d in stats tuple", stats_kind);
787 return NULL;
788 }
789
790 /* Clone the stats here so we can release the attstatsslot immediately */
791 nd_stats = palloc(sizeof(float4) * sslot.nnumbers);
792 memcpy(nd_stats, sslot.numbers, sizeof(float4) * sslot.nnumbers);
793
794 free_attstatsslot(&sslot);
795 }
796 return nd_stats;
797}
798
803static ND_STATS*
804pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
805{
806 HeapTuple stats_tuple = NULL;
807 ND_STATS *nd_stats;
808
809 /* First pull the stats tuple for the whole tree */
810 if ( ! only_parent )
811 {
812 POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
813 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(true));
814 if ( stats_tuple )
815 POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
816 }
817 /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */
818 if ( only_parent || ! stats_tuple )
819 {
820 POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
821 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(false));
822 if ( stats_tuple )
823 POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
824 }
825 if ( ! stats_tuple )
826 {
827 POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL");
828 return NULL;
829 }
830
831 nd_stats = pg_nd_stats_from_tuple(stats_tuple, mode);
832 ReleaseSysCache(stats_tuple);
833 if ( ! nd_stats )
834 {
835 POSTGIS_DEBUGF(2,
836 "histogram for attribute %d of table \"%s\" does not exist?",
837 att_num, get_rel_name(table_oid));
838 }
839
840 return nd_stats;
841}
842
851static ND_STATS*
852pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
853{
854 const char *att_name = text_to_cstring(att_text);
855 AttrNumber att_num;
856
857 /* We know the name? Look up the num */
858 if ( att_text )
859 {
860 /* Get the attribute number */
861 att_num = get_attnum(table_oid, att_name);
862 if ( ! att_num ) {
863 elog(ERROR, "attribute \"%s\" does not exist", att_name);
864 return NULL;
865 }
866 }
867 else
868 {
869 elog(ERROR, "attribute name is null");
870 return NULL;
871 }
872
873 return pg_get_nd_stats(table_oid, att_num, mode, only_parent);
874}
875
889static float8
891{
892 int ncells1, ncells2;
893 int ndims1, ndims2, ndims;
894 double ntuples_max;
895 double ntuples_not_null1, ntuples_not_null2;
896
897 ND_BOX extent1, extent2;
898 ND_IBOX ibox1, ibox2;
899 int at1[ND_DIMS];
900 int at2[ND_DIMS];
901 double min1[ND_DIMS];
902 double width1[ND_DIMS];
903 double cellsize1[ND_DIMS];
904 int size2[ND_DIMS];
905 double min2[ND_DIMS];
906 double width2[ND_DIMS];
907 double cellsize2[ND_DIMS];
908 int size1[ND_DIMS];
909 int d;
910 double val = 0;
911 float8 selectivity;
912
913 /* Drop out on null inputs */
914 if ( ! ( s1 && s2 ) )
915 {
916 elog(NOTICE, " estimate_join_selectivity called with null inputs");
917 return FALLBACK_ND_SEL;
918 }
919
920 /* We need to know how many cells each side has... */
921 ncells1 = (int)roundf(s1->histogram_cells);
922 ncells2 = (int)roundf(s2->histogram_cells);
923
924 /* ...so that we can drive the summation loop with the smaller histogram. */
925 if ( ncells1 > ncells2 )
926 {
927 const ND_STATS *stats_tmp = s1;
928 s1 = s2;
929 s2 = stats_tmp;
930 }
931
932 POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
933 POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
934
935 /* Re-read that info after the swap */
936 ncells1 = (int)roundf(s1->histogram_cells);
937 ncells2 = (int)roundf(s2->histogram_cells);
938
939 /* Q: What's the largest possible join size these relations can create? */
940 /* A: The product of the # of non-null rows in each relation. */
941 ntuples_not_null1 = s1->table_features * ((double)s1->not_null_features / s1->sample_features);
942 ntuples_not_null2 = s2->table_features * ((double)s2->not_null_features / s2->sample_features);
943 ntuples_max = ntuples_not_null1 * ntuples_not_null2;
944
945 /* Get the ndims as ints */
946 ndims1 = (int)roundf(s1->ndims);
947 ndims2 = (int)roundf(s2->ndims);
948 ndims = Max(ndims1, ndims2);
949
950 /* Get the extents */
951 extent1 = s1->extent;
952 extent2 = s2->extent;
953
954 /* If relation stats do not intersect, join is very very selective. */
955 if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
956 {
957 POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
958 PG_RETURN_FLOAT8(0.0);
959 }
960
961 /*
962 * First find the index range of the part of the smaller
963 * histogram that overlaps the larger one.
964 */
965 if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
966 {
967 POSTGIS_DEBUG(3, "could not calculate overlap of relations");
968 PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);
969 }
970
971 /* Initialize counters / constants on s1 */
972 for ( d = 0; d < ndims1; d++ )
973 {
974 at1[d] = ibox1.min[d];
975 min1[d] = s1->extent.min[d];
976 width1[d] = s1->extent.max[d] - s1->extent.min[d];
977 size1[d] = (int)roundf(s1->size[d]);
978 cellsize1[d] = width1[d] / size1[d];
979 }
980
981 /* Initialize counters / constants on s2 */
982 for ( d = 0; d < ndims2; d++ )
983 {
984 min2[d] = s2->extent.min[d];
985 width2[d] = s2->extent.max[d] - s2->extent.min[d];
986 size2[d] = (int)roundf(s2->size[d]);
987 cellsize2[d] = width2[d] / size2[d];
988 }
989
990 /* For each affected cell of s1... */
991 do
992 {
993 double val1;
994 /* Construct the bounds of this cell */
995 ND_BOX nd_cell1;
996 nd_box_init(&nd_cell1);
997 for ( d = 0; d < ndims1; d++ )
998 {
999 nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1000 nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1001 }
1002
1003 /* Find the cells of s2 that cell1 overlaps.. */
1004 nd_box_overlap(s2, &nd_cell1, &ibox2);
1005
1006 /* Initialize counter */
1007 for ( d = 0; d < ndims2; d++ )
1008 {
1009 at2[d] = ibox2.min[d];
1010 }
1011
1012 POSTGIS_DEBUGF(3, "at1 %d,%d %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
1013
1014 /* Get the value at this cell */
1015 val1 = s1->value[nd_stats_value_index(s1, at1)];
1016
1017 /* For each overlapped cell of s2... */
1018 do
1019 {
1020 double ratio2;
1021 double val2;
1022
1023 /* Construct the bounds of this cell */
1024 ND_BOX nd_cell2;
1025 nd_box_init(&nd_cell2);
1026 for ( d = 0; d < ndims2; d++ )
1027 {
1028 nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1029 nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1030 }
1031
1032 POSTGIS_DEBUGF(3, " at2 %d,%d %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
1033
1034 /* Calculate overlap ratio of the cells */
1035 ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1036
1037 /* Multiply the cell counts, scaled by overlap ratio */
1038 val2 = s2->value[nd_stats_value_index(s2, at2)];
1039 POSTGIS_DEBUGF(3, " val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1040 val += val1 * (val2 * ratio2);
1041 }
1042 while ( nd_increment(&ibox2, ndims2, at2) );
1043
1044 }
1045 while( nd_increment(&ibox1, ndims1, at1) );
1046
1047 POSTGIS_DEBUGF(3, "val of histogram = %g", val);
1048
1049 /*
1050 * In order to compare our total cell count "val" to the
1051 * ntuples_max, we need to scale val up to reflect a full
1052 * table estimate. So, multiply by ratio of table size to
1053 * sample size.
1054 */
1055 val *= (s1->table_features / s1->sample_features);
1056 val *= (s2->table_features / s2->sample_features);
1057
1058 POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
1059
1060 /*
1061 * Because the cell counts are over-determined due to
1062 * double counting of features that overlap multiple cells
1063 * (see the compute_gserialized_stats routine)
1064 * we also have to scale our cell count "val" *down*
1065 * to adjust for the double counting.
1066 */
1067// val /= (s1->cells_covered / s1->histogram_features);
1068// val /= (s2->cells_covered / s2->histogram_features);
1069
1070 /*
1071 * Finally, the selectivity is the estimated number of
1072 * rows to be returned divided by the maximum possible
1073 * number of rows that can be returned.
1074 */
1075 selectivity = val / ntuples_max;
1076
1077 /* Guard against over-estimates and crazy numbers :) */
1078 if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1079 {
1080 selectivity = DEFAULT_ND_JOINSEL;
1081 }
1082 else if ( selectivity > 1.0 )
1083 {
1084 selectivity = 1.0;
1085 }
1086
1087 return selectivity;
1088}
1089
1095Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
1096{
1097 PG_RETURN_DATUM(DirectFunctionCall5(
1099 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1100 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1101 Int32GetDatum(0) /* ND mode */
1102 ));
1103}
1104
1110Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
1111{
1112 PG_RETURN_DATUM(DirectFunctionCall5(
1114 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1115 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1116 Int32GetDatum(2) /* 2D mode */
1117 ));
1118}
1119
1120double
1121gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
1122{
1123 float8 selectivity;
1124 Oid relid1, relid2;
1125 ND_STATS *stats1, *stats2;
1126 Node *arg1 = (Node*) linitial(args);
1127 Node *arg2 = (Node*) lsecond(args);
1128 Var *var1 = (Var*) arg1;
1129 Var *var2 = (Var*) arg2;
1130
1131 POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1132
1133 /* We only do column joins right now, no functional joins */
1134 /* TODO: handle g1 && ST_Expand(g2) */
1135 if (!IsA(arg1, Var) || !IsA(arg2, Var))
1136 {
1137 POSTGIS_DEBUGF(1, "%s called with arguments that are not column references", __func__);
1138 return DEFAULT_ND_JOINSEL;
1139 }
1140
1141 /* What are the Oids of our tables/relations? */
1142 relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1143 relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1144
1145 /* Pull the stats from the stats system. */
1146 stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
1147 stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
1148
1149 /* If we can't get stats, we have to stop here! */
1150 if (!stats1)
1151 {
1152 POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1153 return DEFAULT_ND_JOINSEL;
1154 }
1155 else if (!stats2)
1156 {
1157 POSTGIS_DEBUGF(2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
1158 return DEFAULT_ND_JOINSEL;
1159 }
1160
1161 selectivity = estimate_join_selectivity(stats1, stats2);
1162 POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
1163 pfree(stats1);
1164 pfree(stats2);
1165 return selectivity;
1166}
1167
1177Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
1178{
1179 PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0);
1180 /* Oid operator = PG_GETARG_OID(1); */
1181 List *args = (List *)PG_GETARG_POINTER(2);
1182 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1183 int mode = PG_GETARG_INT32(4);
1184
1185 POSTGIS_DEBUGF(2, "%s: entered function", __func__);
1186
1187 /* Check length of args and punt on > 2 */
1188 if (list_length(args) != 2)
1189 {
1190 POSTGIS_DEBUGF(2, "%s: got nargs == %d", __func__, list_length(args));
1191 PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1192 }
1193
1194 /* Only respond to an inner join/unknown context join */
1195 if (jointype != JOIN_INNER)
1196 {
1197 POSTGIS_DEBUGF(1, "%s: jointype %d not supported", __func__, jointype);
1198 PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
1199 }
1200
1201 PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
1202}
1203
1222static void
1223compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1224 int sample_rows, double total_rows, int mode)
1225{
1226 MemoryContext old_context;
1227 int d, i; /* Counters */
1228 int notnull_cnt = 0; /* # not null rows in the sample */
1229 int null_cnt = 0; /* # null rows in the sample */
1230 int histogram_features = 0; /* # rows that actually got counted in the histogram */
1231
1232 ND_STATS *nd_stats; /* Our histogram */
1233 size_t nd_stats_size; /* Size to allocate */
1234
1235 double total_width = 0; /* # of bytes used by sample */
1236 double total_cell_count = 0; /* # of cells in histogram affected by sample */
1237
1238 ND_BOX sum; /* Sum of extents of sample boxes */
1239 ND_BOX avg; /* Avg of extents of sample boxes */
1240 ND_BOX stddev; /* StdDev of extents of sample boxes */
1241
1242 const ND_BOX **sample_boxes; /* ND_BOXes for each of the sample features */
1243 ND_BOX sample_extent; /* Extent of the raw sample */
1244 int histo_size[ND_DIMS]; /* histogram nrows, ncols, etc */
1245 ND_BOX histo_extent; /* Spatial extent of the histogram */
1246 ND_BOX histo_extent_new; /* Temporary variable */
1247 int histo_cells_target; /* Number of cells we will shoot for, given the stats target */
1248 int histo_cells; /* Number of cells in the histogram */
1249 int histo_cells_new = 1; /* Temporary variable */
1250
1251 int ndims = 2; /* Dimensionality of the sample */
1252 int histo_ndims = 0; /* Dimensionality of the histogram */
1253 double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
1254 double total_distribution; /* Total of sample_distribution */
1255
1256 int stats_slot; /* What slot is this data going into? (2D vs ND) */
1257 int stats_kind; /* And this is what? (2D vs ND) */
1258
1259 /* Initialize sum and stddev */
1260 nd_box_init(&sum);
1261 nd_box_init(&stddev);
1262 nd_box_init(&avg);
1263 nd_box_init(&histo_extent);
1264 nd_box_init(&histo_extent_new);
1265
1266 /*
1267 * This is where gserialized_analyze_nd
1268 * should put its' custom parameters.
1269 */
1270 /* void *mystats = stats->extra_data; */
1271
1272 POSTGIS_DEBUG(2, "compute_gserialized_stats called");
1273 POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
1274 POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
1275
1276 /*
1277 * We might need less space, but don't think
1278 * its worth saving...
1279 */
1280 sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
1281
1282 /*
1283 * First scan:
1284 * o read boxes
1285 * o find dimensionality of the sample
1286 * o find extent of the sample
1287 * o count null-infinite/not-null values
1288 * o compute total_width
1289 * o compute total features's box area (for avgFeatureArea)
1290 * o sum features box coordinates (for standard deviation)
1291 */
1292 for ( i = 0; i < sample_rows; i++ )
1293 {
1294 Datum datum;
1295 GBOX gbox = {0};
1296 ND_BOX *nd_box;
1297 bool is_null;
1298
1299 datum = fetchfunc(stats, i, &is_null);
1300
1301 /* Skip all NULLs. */
1302 if ( is_null )
1303 {
1304 POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
1305 null_cnt++;
1306 continue;
1307 }
1308
1309 /* Read the bounds from the gserialized. */
1310 if (LW_FAILURE == gserialized_datum_get_gbox_p(datum, &gbox))
1311 {
1312 /* Skip empties too. */
1313 POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
1314 continue;
1315 }
1316
1317 /* If we're in 2D mode, zero out the higher dimensions for "safety" */
1318 if ( mode == 2 )
1319 gbox.zmin = gbox.zmax = gbox.mmin = gbox.mmax = 0.0;
1320
1321 /* Check bounds for validity (finite and not NaN) */
1322 if ( ! gbox_is_valid(&gbox) )
1323 {
1324 POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
1325 continue;
1326 }
1327
1328 /*
1329 * In N-D mode, set the ndims to the maximum dimensionality found
1330 * in the sample. Otherwise, leave at ndims == 2.
1331 */
1332 if ( mode != 2 )
1333 ndims = Max(gbox_ndims(&gbox), ndims);
1334
1335 /* Convert gbox to n-d box */
1336 nd_box = palloc(sizeof(ND_BOX));
1337 nd_box_from_gbox(&gbox, nd_box);
1338
1339 /* Cache n-d bounding box */
1340 sample_boxes[notnull_cnt] = nd_box;
1341
1342 /* Initialize sample extent before merging first entry */
1343 if ( ! notnull_cnt )
1344 nd_box_init_bounds(&sample_extent);
1345
1346 /* Add current sample to overall sample extent */
1347 nd_box_merge(nd_box, &sample_extent);
1348
1349 /* How many bytes does this sample use? */
1350 total_width += toast_raw_datum_size(datum);
1351
1352 /* Add bounds coordinates to sums for stddev calculation */
1353 for ( d = 0; d < ndims; d++ )
1354 {
1355 sum.min[d] += nd_box->min[d];
1356 sum.max[d] += nd_box->max[d];
1357 }
1358
1359 /* Increment our "good feature" count */
1360 notnull_cnt++;
1361
1362 /* Give backend a chance of interrupting us */
1363#if POSTGIS_PGSQL_VERSION >= 180
1364 vacuum_delay_point(true);
1365#else
1366 vacuum_delay_point();
1367#endif
1368 }
1369
1370#if POSTGIS_PGSQL_VERSION >= 170
1371 POSTGIS_DEBUGF(3, " stats->attstattarget: %d", stats->attstattarget);
1372 histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attstattarget);
1373#else
1374 POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
1375 histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attr->attstattarget);
1376#endif
1377 POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
1378
1379 /* If there's no useful features, we can't work out stats */
1380 if ( ! notnull_cnt )
1381 {
1382 stats->stats_valid = false;
1383 return;
1384 }
1385
1386 POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
1387
1388 /*
1389 * Second scan:
1390 * o compute standard deviation
1391 */
1392 for ( d = 0; d < ndims; d++ )
1393 {
1394 /* Calculate average bounds values */
1395 avg.min[d] = sum.min[d] / notnull_cnt;
1396 avg.max[d] = sum.max[d] / notnull_cnt;
1397
1398 /* Calculate standard deviation for this dimension bounds */
1399 for ( i = 0; i < notnull_cnt; i++ )
1400 {
1401 const ND_BOX *ndb = sample_boxes[i];
1402 stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
1403 stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
1404 }
1405 stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
1406 stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
1407
1408 /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
1409 histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
1410 histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
1411 }
1412
1413 /*
1414 * Third scan:
1415 * o skip hard deviants
1416 * o compute new histogram box
1417 */
1418 nd_box_init_bounds(&histo_extent_new);
1419 for ( i = 0; i < notnull_cnt; i++ )
1420 {
1421 const ND_BOX *ndb = sample_boxes[i];
1422 /* Skip any hard deviants (boxes entirely outside our histo_extent */
1423 if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
1424 {
1425 POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
1426 sample_boxes[i] = NULL;
1427 continue;
1428 }
1429 /* Expand our new box to fit all the other features. */
1430 nd_box_merge(ndb, &histo_extent_new);
1431 }
1432 /*
1433 * Expand the box slightly (1%) to avoid edge effects
1434 * with objects that are on the boundary
1435 */
1436 nd_box_expand(&histo_extent_new, 0.01);
1437 histo_extent = histo_extent_new;
1438
1439 /*
1440 * How should we allocate our histogram cells to the
1441 * different dimensions? We can't do it by raw dimensional width,
1442 * because in x/y/z space, the z can have different units
1443 * from the x/y. Similarly for x/y/t space.
1444 * So, we instead calculate how much features overlap
1445 * each other in their dimension to figure out which
1446 * dimensions have useful selectivity characteristics (more
1447 * variability in density) and therefore would find
1448 * more cells useful (to distinguish between dense places and
1449 * homogeneous places).
1450 */
1451 nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
1452 sample_distribution);
1453
1454 /*
1455 * The sample_distribution array now tells us how spread out the
1456 * data is in each dimension, so we use that data to allocate
1457 * the histogram cells we have available.
1458 * At this point, histo_cells_target is the approximate target number
1459 * of cells.
1460 */
1461
1462 /*
1463 * Some dimensions have basically a uniform distribution, we want
1464 * to allocate no cells to those dimensions, only to dimensions
1465 * that have some interesting differences in data distribution.
1466 * Here we count up the number of interesting dimensions
1467 */
1468 for ( d = 0; d < ndims; d++ )
1469 {
1470 if ( sample_distribution[d] > 0 )
1471 histo_ndims++;
1472 }
1473
1474 if ( histo_ndims == 0 )
1475 {
1476 /* Special case: all our dimensions had low variability! */
1477 /* We just divide the cells up evenly */
1478 POSTGIS_DEBUG(3, " special case: no axes have variability");
1479 histo_cells_new = 1;
1480 for ( d = 0; d < ndims; d++ )
1481 {
1482 histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
1483 if ( ! histo_size[d] )
1484 histo_size[d] = 1;
1485 POSTGIS_DEBUGF(3, " histo_size[d]: %d", histo_size[d]);
1486 histo_cells_new *= histo_size[d];
1487 }
1488 POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1489 }
1490 else
1491 {
1492 /*
1493 * We're going to express the amount of variability in each dimension
1494 * as a proportion of the total variability and allocate cells in that
1495 * dimension relative to that proportion.
1496 */
1497 POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
1498 total_distribution = total_double(sample_distribution, ndims); /* First get the total */
1499 POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
1500 histo_cells_new = 1; /* For the number of cells in the final histogram */
1501 for ( d = 0; d < ndims; d++ )
1502 {
1503 if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
1504 {
1505 histo_size[d] = 1;
1506 }
1507 else /* Interesting dimension */
1508 {
1509 /* How does this dims variability compare to the total? */
1510 float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
1511 /*
1512 * Scale the target cells number by the # of dims and ratio,
1513 * then take the appropriate root to get the estimated number of cells
1514 * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
1515 * The dedicated helper clamps pathological floating point inputs so we
1516 * do not resurrect the NaN propagation reported in #5959 on amd64.
1517 */
1518 histo_size[d] = histogram_axis_cells(histo_cells_target, histo_ndims, edge_ratio);
1519 }
1520 histo_cells_new *= histo_size[d];
1521 }
1522 POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
1523 }
1524
1525 /* Update histo_cells to the actual number of cells we need to allocate */
1526 histo_cells = histo_cells_new;
1527 POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
1528
1529 /*
1530 * Create the histogram (ND_STATS) in the stats memory context
1531 */
1532 old_context = MemoryContextSwitchTo(stats->anl_context);
1533 nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
1534 nd_stats = palloc(nd_stats_size);
1535 memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
1536 MemoryContextSwitchTo(old_context);
1537
1538 /* Initialize the #ND_STATS objects */
1539 nd_stats->ndims = ndims;
1540 nd_stats->extent = histo_extent;
1541 nd_stats->sample_features = sample_rows;
1542 nd_stats->table_features = total_rows;
1543 nd_stats->not_null_features = notnull_cnt;
1544 /* Copy in the histogram dimensions */
1545 for ( d = 0; d < ndims; d++ )
1546 nd_stats->size[d] = histo_size[d];
1547
1548 /*
1549 * Fourth scan:
1550 * o fill histogram values with the proportion of
1551 * features' bbox overlaps: a feature's bvol
1552 * can fully overlap (1) or partially overlap
1553 * (fraction of 1) an histogram cell.
1554 *
1555 * Note that we are filling each cell with the "portion of
1556 * the feature's box that overlaps the cell". So, if we sum
1557 * up the values in the histogram, we could get the
1558 * histogram feature count.
1559 *
1560 */
1561 for ( i = 0; i < notnull_cnt; i++ )
1562 {
1563 const ND_BOX *nd_box;
1564 ND_IBOX nd_ibox;
1565 int at[ND_DIMS];
1566 double num_cells = 0;
1567 double min[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1568 double max[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1569 double cellsize[ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1570
1571 nd_box = sample_boxes[i];
1572 if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
1573
1574 /* Give backend a chance of interrupting us */
1575#if POSTGIS_PGSQL_VERSION >= 180
1576 vacuum_delay_point(true);
1577#else
1578 vacuum_delay_point();
1579#endif
1580
1581 /* Find the cells that overlap with this box and put them into the ND_IBOX */
1582 nd_box_overlap(nd_stats, nd_box, &nd_ibox);
1583 memset(at, 0, sizeof(int)*ND_DIMS);
1584
1585 POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1586 nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
1587 nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
1588
1589 for ( d = 0; d < nd_stats->ndims; d++ )
1590 {
1591 /* Initialize the starting values */
1592 at[d] = nd_ibox.min[d];
1593 min[d] = nd_stats->extent.min[d];
1594 max[d] = nd_stats->extent.max[d];
1595 cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
1596 }
1597
1598 /*
1599 * Move through all the overlapped histogram cells values and
1600 * add the box overlap proportion to them.
1601 */
1602 do
1603 {
1604 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1605 double ratio;
1606 /* Create a box for this histogram cell */
1607 for ( d = 0; d < nd_stats->ndims; d++ )
1608 {
1609 nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
1610 nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
1611 }
1612
1613 /*
1614 * If a feature box is completely inside one cell the ratio will be
1615 * 1.0. If a feature box is 50% in two cells, each cell will get
1616 * 0.5 added on.
1617 */
1618 ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
1619 nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
1620 num_cells += ratio;
1621 POSTGIS_DEBUGF(3, " ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1622 POSTGIS_DEBUGF(3, " at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1623 }
1624 while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1625
1626 /* Keep track of overall number of overlaps counted */
1627 total_cell_count += num_cells;
1628 /* How many features have we added to this histogram? */
1629 histogram_features++;
1630 }
1631
1632 POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
1633 POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
1634 POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
1635
1636 /* Error out if we got no sample information */
1637 if ( ! histogram_features )
1638 {
1639 POSTGIS_DEBUG(3, " no stats have been gathered");
1640 elog(NOTICE, " no features lie in the stats histogram, invalid stats");
1641 stats->stats_valid = false;
1642 return;
1643 }
1644
1645 nd_stats->histogram_features = histogram_features;
1646 nd_stats->histogram_cells = histo_cells;
1647 nd_stats->cells_covered = total_cell_count;
1648
1649 /* Put this histogram data into the right slot/kind */
1650 if ( mode == 2 )
1651 {
1652 stats_slot = STATISTIC_SLOT_2D;
1653 stats_kind = STATISTIC_KIND_2D;
1654 }
1655 else
1656 {
1657 stats_slot = STATISTIC_SLOT_ND;
1658 stats_kind = STATISTIC_KIND_ND;
1659 }
1660
1661 /* Write the statistics data */
1662 stats->stakind[stats_slot] = stats_kind;
1663 stats->staop[stats_slot] = InvalidOid;
1664 stats->stanumbers[stats_slot] = (float4*)nd_stats;
1665 stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
1666 stats->stanullfrac = (float4)null_cnt/sample_rows;
1667 stats->stawidth = total_width/notnull_cnt;
1668 stats->stadistinct = -1.0;
1669 stats->stats_valid = true;
1670
1671 POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1672 POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1673 POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1674 POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1675 POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1676 POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
1677 POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
1678 /*
1679 POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
1680 */
1681
1682 return;
1683}
1701static void
1702compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1703 int sample_rows, double total_rows)
1704{
1705 GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
1706 /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
1707 stats->extra_data = extra_data->std_extra_data;
1708 extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
1709 stats->extra_data = extra_data;
1710
1711 /* 2D Mode */
1712 compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
1713
1714 if (stats->stats_valid)
1715 {
1716 /* ND Mode: Only computed if 2D was computed too (not NULL and valid) */
1717 compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);
1718 }
1719}
1720
1748Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
1749{
1750 VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1751 GserializedAnalyzeExtraData *extra_data =
1753
1754 /* Ask for standard analyze to fill in as much as possible */
1755 if (!std_typanalyze(stats))
1756 PG_RETURN_BOOL(false);
1757
1758 /* Save old compute_stats and extra_data for scalar statistics ... */
1759 extra_data->std_compute_stats = stats->compute_stats;
1760 extra_data->std_extra_data = stats->extra_data;
1761 /* ... and replace with our info */
1762 stats->compute_stats = compute_gserialized_stats;
1763 stats->extra_data = extra_data;
1764
1765 /* Indicate we are done successfully */
1766 PG_RETURN_BOOL(true);
1767}
1768
1781static float8
1782estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
1783{
1784 int d; /* counter */
1785 float8 selectivity;
1786 ND_BOX nd_box;
1787 ND_IBOX nd_ibox;
1788 int at[ND_DIMS];
1789 double cell_size[ND_DIMS];
1790 double min[ND_DIMS];
1791 double max[ND_DIMS];
1792 double total_count = 0.0;
1793 int ndims_max;
1794
1795 /* Calculate the overlap of the box on the histogram */
1796 if ( ! nd_stats )
1797 {
1798 elog(NOTICE, " estimate_selectivity called with null input");
1799 return FALLBACK_ND_SEL;
1800 }
1801
1802 ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1803
1804 /* Initialize nd_box. */
1805 nd_box_from_gbox(box, &nd_box);
1806
1807 /*
1808 * To return 2D stats on an ND sample, we need to make the
1809 * 2D box cover the full range of the other dimensions in the
1810 * histogram.
1811 */
1812 POSTGIS_DEBUGF(3, " mode: %d", mode);
1813 if ( mode == 2 )
1814 {
1815 POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1816 ndims_max = 2;
1817 }
1818
1819 POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1820 POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1821
1822 // elog(DEBUG1, "out histogram:\n%s", nd_stats_to_grid(nd_stats));
1823
1824 /*
1825 * Search box completely misses histogram extent?
1826 * We have to intersect in all N dimensions or else we have
1827 * zero interaction under the &&& operator. It's important
1828 * to short circuit in this case, as some of the tests below
1829 * will return junk results when run on non-intersecting inputs.
1830 */
1831 if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1832 {
1833 POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1834 return 0.0;
1835 }
1836
1837 /* Search box completely contains histogram extent! */
1838 if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1839 {
1840 POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1841 return 1.0;
1842 }
1843
1844 /* Calculate the overlap of the box on the histogram */
1845 if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1846 {
1847 POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1848 return FALLBACK_ND_SEL;
1849 }
1850
1851 /* Work out some measurements of the histogram */
1852 for ( d = 0; d < nd_stats->ndims; d++ )
1853 {
1854 /* Cell size in each dim */
1855 min[d] = nd_stats->extent.min[d];
1856 max[d] = nd_stats->extent.max[d];
1857 cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
1858 POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
1859
1860 /* Initialize the counter */
1861 at[d] = nd_ibox.min[d];
1862 }
1863
1864 /* Move through all the overlap values and sum them */
1865 do
1866 {
1867 float cell_count, ratio;
1868 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1869
1870 /* We have to pro-rate partially overlapped cells. */
1871 for ( d = 0; d < nd_stats->ndims; d++ )
1872 {
1873 nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
1874 nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
1875 }
1876
1877 ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
1878 cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
1879
1880 /* Add the pro-rated count for this cell to the overall total */
1881 total_count += (double)cell_count * ratio;
1882 POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
1883 }
1884 while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1885
1886 /* Scale by the number of features in our histogram to get the proportion */
1887 selectivity = total_count / nd_stats->histogram_features;
1888
1889 POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
1890 POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
1891 POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
1892 POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
1893
1894 /* Prevent rounding overflows */
1895 if (selectivity > 1.0) selectivity = 1.0;
1896 else if (selectivity < 0.0) selectivity = 0.0;
1897
1898 return selectivity;
1899}
1900
1901
1902
1908Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
1909{
1910 Oid table_oid = PG_GETARG_OID(0);
1911 text *att_text = PG_GETARG_TEXT_P(1);
1912 ND_STATS *nd_stats;
1913 char *str;
1914 text *json;
1915 int mode = 2; /* default to 2D mode */
1916 bool only_parent = false; /* default to whole tree stats */
1917
1918 /* Check if we've been asked to not use 2d mode */
1919 if ( ! PG_ARGISNULL(2) )
1920 mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
1921
1922 /* Retrieve the stats object */
1923 nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent);
1924 if ( ! nd_stats )
1925 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
1926
1927 /* Convert to JSON */
1928 elog(DEBUG1, "stats grid:\n%s", nd_stats_to_grid(nd_stats));
1929 str = nd_stats_to_json(nd_stats);
1930 json = cstring_to_text(str);
1931 pfree(str);
1932 pfree(nd_stats);
1933
1934 PG_RETURN_TEXT_P(json);
1935}
1936
1937
1943Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
1944{
1945 Oid table_oid = PG_GETARG_OID(0);
1946 text *att_text = PG_GETARG_TEXT_P(1);
1947 Datum geom_datum = PG_GETARG_DATUM(2);
1948 GBOX gbox; /* search box read from gserialized datum */
1949 float8 selectivity = 0;
1950 ND_STATS *nd_stats;
1951 int mode = 2; /* 2D mode by default */
1952
1953 /* Check if we've been asked to not use 2d mode */
1954 if ( ! PG_ARGISNULL(3) )
1955 mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
1956
1957 /* Retrieve the stats object */
1958 nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, false);
1959
1960 if ( ! nd_stats )
1961 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
1962
1963 /* Calculate the gbox */
1964 if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
1965 elog(ERROR, "unable to calculate bounding box from geometry");
1966
1967 POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
1968
1969 /* Do the estimation */
1970 selectivity = estimate_selectivity(&gbox, nd_stats, mode);
1971
1972 pfree(nd_stats);
1973 PG_RETURN_FLOAT8(selectivity);
1974}
1975
1976
1982Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
1983{
1984 Oid table_oid1 = PG_GETARG_OID(0);
1985 text *att_text1 = PG_GETARG_TEXT_P(1);
1986 Oid table_oid2 = PG_GETARG_OID(2);
1987 text *att_text2 = PG_GETARG_TEXT_P(3);
1988 ND_STATS *nd_stats1, *nd_stats2;
1989 float8 selectivity = 0;
1990 int mode = 2; /* 2D mode by default */
1991
1992
1993 /* Retrieve the stats object */
1994 nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, false);
1995 nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, false);
1996
1997 if ( ! nd_stats1 )
1998 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
1999
2000 if ( ! nd_stats2 )
2001 elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2002
2003 /* Check if we've been asked to not use 2d mode */
2004 if ( ! PG_ARGISNULL(4) )
2005 {
2006 text *modetxt = PG_GETARG_TEXT_P(4);
2007 char *modestr = text_to_cstring(modetxt);
2008 if ( modestr[0] == 'N' )
2009 mode = 0;
2010 }
2011
2012 /* Do the estimation */
2013 selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
2014
2015 pfree(nd_stats1);
2016 pfree(nd_stats2);
2017 PG_RETURN_FLOAT8(selectivity);
2018}
2019
2025Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
2026{
2027 PG_RETURN_DATUM(DirectFunctionCall5(
2029 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2030 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2031 Int32GetDatum(2) /* 2-D mode */
2032 ));
2033}
2034
2040Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
2041{
2042 PG_RETURN_DATUM(DirectFunctionCall5(
2044 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2045 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2046 Int32GetDatum(0) /* N-D mode */
2047 ));
2048}
2049
2050
2065float8
2066gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
2067{
2068 VariableStatData vardata;
2069 Node *other = NULL;
2070 bool varonleft;
2071 ND_STATS *nd_stats = NULL;
2072
2073 GBOX search_box;
2074 float8 selectivity = 0;
2075 Const *otherConst;
2076
2077 POSTGIS_DEBUGF(2, "%s: entered function", __func__);
2078
2079 if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2080 {
2081 POSTGIS_DEBUGF(2, "%s: could not find vardata", __func__);
2082 return DEFAULT_ND_SEL;
2083 }
2084
2085 if (!IsA(other, Const))
2086 {
2087 ReleaseVariableStats(vardata);
2088 POSTGIS_DEBUGF(2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
2089 return DEFAULT_ND_SEL;
2090 }
2091
2092 otherConst = (Const*)other;
2093 if ((!otherConst) || otherConst->constisnull)
2094 {
2095 ReleaseVariableStats(vardata);
2096 POSTGIS_DEBUGF(2, "%s: constant argument is NULL", __func__);
2097 return DEFAULT_ND_SEL;
2098 }
2099
2100 if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2101 {
2102 ReleaseVariableStats(vardata);
2103 POSTGIS_DEBUGF(2, "%s: search box is EMPTY", __func__);
2104 return 0.0;
2105 }
2106
2107 if (!vardata.statsTuple)
2108 {
2109 POSTGIS_DEBUGF(1, "%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2110 return DEFAULT_ND_SEL;
2111 }
2112
2113 nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
2114 ReleaseVariableStats(vardata);
2115 selectivity = estimate_selectivity(&search_box, nd_stats, mode);
2116 if (nd_stats)
2117 pfree(nd_stats);
2118
2119 return selectivity;
2120}
2121
2123Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
2124{
2125 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2126 // Oid operator_oid = PG_GETARG_OID(1);
2127 List *args = (List *) PG_GETARG_POINTER(2);
2128 int varRelid = PG_GETARG_INT32(3);
2129 int mode = PG_GETARG_INT32(4);
2130 float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
2131 POSTGIS_DEBUGF(2, "%s: selectivity is %g", __func__, selectivity);
2132 PG_RETURN_FLOAT8(selectivity);
2133}
2134
2135/************************************************************************/
2136
2137
2138/*
2139 * Given an index and table column, confirm the
2140 * index was built on that column, and return the
2141 * corresponding index attribute for that column.
2142 */
2143static int16
2144index_has_attr(Oid index_oid, Oid table_oid, int16 table_attnum)
2145{
2146 HeapTuple index_tuple;
2147 Form_pg_index index_form;
2148 int16 index_attnum = InvalidAttrNumber;
2149
2150 /* Check if the index is on the desired column */
2151 index_tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
2152 if (!HeapTupleIsValid(index_tuple))
2153 elog(ERROR, "cache lookup failed for index %u", index_oid);
2154
2155 index_form = (Form_pg_index) GETSTRUCT(index_tuple);
2156
2157 /* Something went wrong, this index isn't on our table of interest */
2158 if (index_form->indrelid != table_oid)
2159 elog(ERROR, "table=%u and index=%u are not related", table_oid, index_oid);
2160
2161 /* Check if the attnum is in the indkey array */
2162 for (int16 i = 0; i < (int16)(index_form->indkey.dim1); i++)
2163 {
2164 if (index_form->indkey.values[i] == table_attnum)
2165 {
2166 index_attnum = i+1;
2167 break;
2168 }
2169 }
2170 ReleaseSysCache(index_tuple);
2171 return index_attnum;
2172}
2173
2174
2175/*
2176 * Given an index return the access method.
2177 * (We only work with GIST access method.)
2178 */
2179static int
2180index_get_am(Oid index_oid)
2181{
2182 int index_am;
2183 Form_pg_class index_rel_form;
2184 HeapTuple index_rel_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(index_oid));
2185
2186 if (!HeapTupleIsValid(index_rel_tuple))
2187 elog(ERROR, "cache lookup failed for index %u", index_oid);
2188
2189 index_rel_form = (Form_pg_class) GETSTRUCT(index_rel_tuple);
2190 index_am = index_rel_form->relam;
2191 ReleaseSysCache(index_rel_tuple);
2192 return index_am;
2193}
2194
2195
2196/*
2197 * Given an index and index attribute, lookup the
2198 * key type (box2df or gidx) of that index column.
2199 */
2200static int
2201index_get_keytype (Oid index_oid, int16 index_attnum)
2202{
2203 Oid atttypid = InvalidOid;
2204 Form_pg_attribute att_form;
2205
2206 /* Get the key type for the index key? */
2207 HeapTuple att_tuple = SearchSysCache2(ATTNUM,
2208 ObjectIdGetDatum(index_oid),
2209 Int16GetDatum(index_attnum));
2210
2211 if (!HeapTupleIsValid(att_tuple))
2212 elog(ERROR, "cache lookup failed for index %u attribute %d", index_oid, index_attnum);
2213
2214 att_form = (Form_pg_attribute) GETSTRUCT(att_tuple);
2215 atttypid = att_form->atttypid;
2216 ReleaseSysCache(att_tuple);
2217 return atttypid;
2218}
2219
2220
2221/*
2222 * Given a table and attribute number, find any
2223 * "spatial index" of that attribute. For our purposes
2224 * a spatial index is one we can read the top page of,
2225 * namely a geometry or geography column, with
2226 * a GIST index having either a gidx or box2df key.
2227 */
2228static Oid
2229table_get_spatial_index(Oid table_oid, int16 attnum, int *key_type, int16 *idx_attnum)
2230{
2231 Relation table_rel;
2232 List *index_list;
2233 ListCell *lc;
2234
2235 /* Lookup our spatial index key types */
2236 Oid b2d_oid = postgis_oid(BOX2DFOID);
2237 Oid gdx_oid = postgis_oid(GIDXOID);
2238
2239 if (!(b2d_oid && gdx_oid))
2240 return InvalidOid;
2241
2242 /* Read a list of all indexes on this table */
2243 table_rel = RelationIdGetRelation(table_oid);
2244 index_list = RelationGetIndexList(table_rel);
2245 RelationClose(table_rel);
2246
2247 /* For each index associated with this table... */
2248 foreach(lc, index_list)
2249 {
2250 Oid index_oid = lfirst_oid(lc);
2251 Oid atttypid;
2252
2253 /* Is our attribute indexed by this index? */
2254 *idx_attnum = index_has_attr(index_oid, table_oid, attnum);
2255
2256 /* No, move on */
2257 if (*idx_attnum == InvalidAttrNumber)
2258 continue;
2259
2260 /* We only handle GIST spatial indexes */
2261 if (index_get_am(index_oid) != GIST_AM_OID)
2262 continue;
2263
2264 /* Is the column actually spatial? */
2265 /* Only if it uses our spatial key types */
2266 atttypid = index_get_keytype (index_oid, *idx_attnum);
2267 if (atttypid == b2d_oid || atttypid == gdx_oid)
2268 {
2269 /* Spatial key found in this index! */
2270 *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
2271 return index_oid;
2272 }
2273 }
2274 return InvalidOid;
2275}
2276
2277/*
2278 * Given an index and indexed attribute, look up
2279 * the keys in the top page of the index, and using
2280 * the appropriate key type, return a box that is the
2281 * union of all those keys.
2282 */
2283static GBOX *
2284spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type)
2285{
2286 BOX2DF *bounds_2df = NULL;
2287 GIDX *bounds_gidx = NULL;
2288 GBOX *gbox = NULL;
2289 Relation idx_rel;
2290 Buffer buffer;
2291 Page page;
2292 unsigned long offset;
2293 unsigned long offset_max;
2294
2295 if (!idx_oid)
2296 return NULL;
2297
2298 idx_rel = index_open(idx_oid, AccessShareLock);
2299 buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2300 page = (Page) BufferGetPage(buffer);
2301 offset = FirstOffsetNumber;
2302 offset_max = PageGetMaxOffsetNumber(page);
2303 while (offset <= offset_max)
2304 {
2305 ItemId iid = PageGetItemId(page, offset);
2306 IndexTuple ituple;
2307 if (!iid)
2308 {
2309 ReleaseBuffer(buffer);
2310 index_close(idx_rel, AccessShareLock);
2311 return NULL;
2312 }
2313 ituple = (IndexTuple) PageGetItem(page, iid);
2314 if (!GistTupleIsInvalid(ituple))
2315 {
2316 bool isnull;
2317 Datum idx_attr = index_getattr(ituple, idx_att_num, idx_rel->rd_att, &isnull);
2318 if (!isnull)
2319 {
2320 if (key_type == STATISTIC_KIND_2D)
2321 {
2322 BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2323 if (bounds_2df)
2324 box2df_merge(bounds_2df, b);
2325 else
2326 bounds_2df = box2df_copy(b);
2327 }
2328 else
2329 {
2330 GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2331 if (bounds_gidx)
2332 gidx_merge(&bounds_gidx, b);
2333 else
2334 bounds_gidx = gidx_copy(b);
2335 }
2336 }
2337 }
2338 offset++;
2339 }
2340
2341 ReleaseBuffer(buffer);
2342 index_close(idx_rel, AccessShareLock);
2343
2344 if (key_type == STATISTIC_KIND_2D && bounds_2df)
2345 {
2346 if (box2df_is_empty(bounds_2df))
2347 return NULL;
2348 gbox = gbox_new(0);
2349 box2df_to_gbox_p(bounds_2df, gbox);
2350 }
2351 else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
2352 {
2353 lwflags_t flags = 0;
2354 if (gidx_is_unknown(bounds_gidx))
2355 return NULL;
2356 FLAGS_SET_Z(flags, GIDX_NDIMS(bounds_gidx) > 2);
2357 FLAGS_SET_M(flags, GIDX_NDIMS(bounds_gidx) > 3);
2358 gbox = gbox_new(flags);
2359 gbox_from_gidx(bounds_gidx, gbox, flags);
2360 }
2361 else
2362 return NULL;
2363
2364 return gbox;
2365}
2366
2367/*
2368CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
2369 RETURNS box2d
2370 AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
2371 LANGUAGE 'c' STABLE STRICT;
2372*/
2373
2376{
2377 GBOX *gbox = NULL;
2378 int key_type;
2379 int16 att_num, idx_att_num = InvalidAttrNumber;
2380 Oid tbl_oid = PG_GETARG_DATUM(0);
2381 char *col = text_to_cstring(PG_GETARG_TEXT_P(1));
2382 Oid idx_oid;
2383
2384 if(!tbl_oid)
2385 PG_RETURN_NULL();
2386
2387 /* We need to initialize the internal cache to access it later via postgis_oid() */
2388 postgis_initialize_cache();
2389
2390 att_num = get_attnum(tbl_oid, col);
2391 if (att_num == InvalidAttrNumber)
2392 PG_RETURN_NULL();
2393
2394 idx_oid = table_get_spatial_index(tbl_oid, att_num, &key_type, &idx_att_num);
2395 if (!idx_oid)
2396 PG_RETURN_NULL();
2397
2398 gbox = spatial_index_read_extent(idx_oid, idx_att_num, key_type);
2399 if (!gbox)
2400 PG_RETURN_NULL();
2401 else
2402 PG_RETURN_POINTER(gbox);
2403}
2404
2405
2406/*
2407 * Given a table and column name, look up the attribute number
2408 * and type of that column.
2409 */
2410static bool
2411get_attnum_attypid(Oid table_oid, const char *col, int16 *attnum, Oid *atttypid)
2412{
2413 HeapTuple att_tuple;
2414 Form_pg_attribute att;
2415
2416 if (!attnum || !atttypid)
2417 elog(ERROR, "%s got null input parameters", __func__);
2418
2419 /* Is the index on the column name we are looking for? */
2420 att_tuple = SearchSysCache2(ATTNAME,
2421 ObjectIdGetDatum(table_oid),
2422 PointerGetDatum(col));
2423
2424 if (!HeapTupleIsValid(att_tuple))
2425 return false;
2426
2427 att = (Form_pg_attribute) GETSTRUCT(att_tuple);
2428 *atttypid = att->atttypid;
2429 *attnum = att->attnum;
2430 ReleaseSysCache(att_tuple);
2431 return true;
2432}
2433
2434
2441Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
2442{
2443 text *coltxt = NULL;
2444 char *col = NULL;
2445 int16 attnum, idx_attnum;
2446 Oid atttypid = InvalidOid;
2447 char nsp_tbl[2*NAMEDATALEN+6];
2448 char *tbl = NULL;
2449 Oid tbl_oid, idx_oid = 0;
2450 ND_STATS *nd_stats;
2451 GBOX *gbox = NULL;
2452 bool only_parent = false;
2453 int key_type;
2454 Oid geographyOid = postgis_oid(GEOGRAPHYOID);
2455 Oid geometryOid = postgis_oid(GEOMETRYOID);
2456
2457 /* We need to initialize the internal cache to access it later via postgis_oid() */
2458 postgis_initialize_cache();
2459
2460 if (PG_NARGS() < 2 || PG_NARGS() > 4)
2461 elog(ERROR, "ST_EstimatedExtent() called with wrong number of arguments");
2462
2463 if ( PG_NARGS() == 4 )
2464 {
2465 only_parent = PG_GETARG_BOOL(3);
2466 }
2467 if ( PG_NARGS() >= 3 )
2468 {
2469 char *nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2470 tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2471 coltxt = PG_GETARG_TEXT_P(2);
2472 snprintf(nsp_tbl, sizeof(nsp_tbl), "\"%s\".\"%s\"", nsp, tbl);
2473 }
2474 if ( PG_NARGS() == 2 )
2475 {
2476 tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2477 coltxt = PG_GETARG_TEXT_P(1);
2478 snprintf(nsp_tbl, sizeof(nsp_tbl), "\"%s\"", tbl);
2479 }
2480
2481 /* Parse the namespace/table strings and lookup in system catalogs */
2482 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2483 if (!tbl_oid)
2484 elog(ERROR, "cannot lookup table %s", nsp_tbl);
2485
2486 /* Get the attribute number and type from the column name */
2487 col = text_to_cstring(coltxt);
2488 if (!get_attnum_attypid(tbl_oid, col, &attnum, &atttypid))
2489 elog(ERROR, "column %s.\"%s\" does not exist", nsp_tbl, col);
2490
2491 /* We can only do estimates on geograpy and geometry */
2492 if ((atttypid != geographyOid) && (atttypid != geometryOid))
2493 {
2494 elog(ERROR, "column %s.\"%s\" must be a geometry or geography", nsp_tbl, col);
2495 }
2496
2497 /* Read the extent from the head of the spatial index */
2498 /* works if there is a spatial index */
2499 idx_oid = table_get_spatial_index(tbl_oid, attnum, &key_type, &idx_attnum);
2500 if (idx_oid != InvalidOid)
2501 {
2502 /* TODO: how about only_parent ? */
2503 gbox = spatial_index_read_extent(idx_oid, idx_attnum, key_type);
2504 elog(DEBUG3, "index for %s.\"%s\" exists, reading gbox from there", nsp_tbl, col);
2505 if (!gbox) PG_RETURN_NULL();
2506 }
2507 /* Read the extent from the stats tables, */
2508 /* works if ANALYZE has been run */
2509 else
2510 {
2511 int stats_mode = 2;
2512 elog(DEBUG3, "index for %s.\"%s\" does not exist", nsp_tbl, col);
2513
2514 /* For a geography column, we need the XYZ geocentric bounds */
2515 if (atttypid == geographyOid)
2516 stats_mode = 3;
2517
2518 /* ND stats include an extent for the histogram */
2519 nd_stats = pg_get_nd_stats_by_name(tbl_oid, coltxt, stats_mode, only_parent);
2520
2521 /* Error out on no stats */
2522 if (!nd_stats)
2523 {
2524 elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, col);
2525 PG_RETURN_NULL();
2526 }
2527
2528 /* Construct the box */
2529 gbox = gbox_new(0);
2530 gbox->xmin = nd_stats->extent.min[0];
2531 gbox->xmax = nd_stats->extent.max[0];
2532 gbox->ymin = nd_stats->extent.min[1];
2533 gbox->ymax = nd_stats->extent.max[1];
2534 if (stats_mode != 2)
2535 {
2536 FLAGS_SET_Z(gbox->flags, 1);
2537 gbox->zmin = nd_stats->extent.min[2];
2538 gbox->zmax = nd_stats->extent.max[2];
2539 }
2540
2541 pfree(nd_stats);
2542 }
2543
2544 /* Convert geocentric geography box into a planar box */
2545 /* that users understand */
2546 if (atttypid == geographyOid)
2547 {
2548 GBOX *gbox_planar = gbox_new(0);
2549 gbox_geocentric_get_gbox_cartesian(gbox, gbox_planar);
2550 PG_RETURN_POINTER(gbox_planar);
2551 }
2552 else
2553 PG_RETURN_POINTER(gbox);
2554}
2555
2556/*
2557 * Legacy prototype for Estimated_Extent()
2558 */
2560Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
2561{
2562 if ( PG_NARGS() == 3 )
2563 {
2564 PG_RETURN_DATUM(
2565 DirectFunctionCall3(gserialized_estimated_extent,
2566 PG_GETARG_DATUM(0),
2567 PG_GETARG_DATUM(1),
2568 PG_GETARG_DATUM(2)));
2569 }
2570 else if ( PG_NARGS() == 2 )
2571 {
2572 PG_RETURN_DATUM(
2573 DirectFunctionCall2(gserialized_estimated_extent,
2574 PG_GETARG_DATUM(0),
2575 PG_GETARG_DATUM(1)));
2576 }
2577
2578 elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
2579 PG_RETURN_NULL();
2580}
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
Definition gbox.c:32
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
Definition gbox.c:404
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
Definition gbox.c:197
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int index_get_keytype(Oid index_oid, int16 index_attnum)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static int range_full(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Expand the bounds of target to include source.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
static bool get_attnum_attypid(Oid table_oid, const char *col, int16 *attnum, Oid *atttypid)
#define SDFACTOR
static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type)
#define FALLBACK_ND_JOINSEL
#define MAX_NUM_BINS
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum)
static int index_get_am(Oid index_oid)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
#define BIN_MIN_SIZE
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
static char * nd_stats_to_grid(const ND_STATS *stats)
Create a printable view of the ND_STATS histogram.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogeneously distributed or contentrated within one dimension,...
static int16 index_has_attr(Oid index_oid, Oid table_oid, int16 table_attnum)
struct ND_STATS_T ND_STATS
static int histogram_axis_cells(int histo_cells_target, int histo_ndims, double edge_ratio)
static double nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
static int histogram_cell_budget(double total_rows, int ndims, int attstattarget)
static int nd_stats_value_index(const ND_STATS *stats, const int *indexes)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define LW_FAILURE
Definition liblwgeom.h:96
uint16_t lwflags_t
Definition liblwgeom.h:299
#define FLAGS_GET_Z(flags)
Definition liblwgeom.h:165
#define FLAGS_GET_M(flags)
Definition liblwgeom.h:166
#define FLAGS_SET_M(flags, value)
Definition liblwgeom.h:173
#define FLAGS_SET_Z(flags, value)
Definition liblwgeom.h:172
#define FLAGS_GET_GEODETIC(flags)
Definition liblwgeom.h:168
This library is the generic geometry handling section of PostGIS.
int gbox_geocentric_get_gbox_cartesian(const GBOX *gbox_geocentric, GBOX *gbox_planar)
#define str(s)
Datum buffer(PG_FUNCTION_ARGS)
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
double ymax
Definition liblwgeom.h:357
double zmax
Definition liblwgeom.h:359
double xmax
Definition liblwgeom.h:355
double zmin
Definition liblwgeom.h:358
double mmax
Definition liblwgeom.h:361
double ymin
Definition liblwgeom.h:356
double xmin
Definition liblwgeom.h:354
double mmin
Definition liblwgeom.h:360
lwflags_t flags
Definition liblwgeom.h:353
AnalyzeAttrComputeStatsFunc std_compute_stats