63#include "access/genam.h"
64#include "access/gin.h"
65#include "access/gist.h"
66#include "access/gist_private.h"
67#include "access/gistscan.h"
68#if PG_VERSION_NUM < 130000
69#include "access/tuptoaster.h"
71#include "access/detoast.h"
73#include "utils/datum.h"
74#include "access/heapam.h"
75#include "catalog/index.h"
76#include "catalog/pg_am.h"
78#include "storage/lmgr.h"
79#include "catalog/namespace.h"
80#include "catalog/indexing.h"
82#include "utils/regproc.h"
83#include "utils/varlena.h"
85#include "utils/builtins.h"
86#include "utils/datum.h"
87#include "utils/snapmgr.h"
88#include "utils/fmgroids.h"
90#include "access/heapam.h"
91#include "catalog/pg_type.h"
92#include "access/relscan.h"
94#include "executor/spi.h"
96#include "commands/vacuum.h"
97#include "nodes/pathnodes.h"
99#include "parser/parsetree.h"
100#include "utils/array.h"
101#include "utils/lsyscache.h"
102#include "utils/builtins.h"
103#include "utils/syscache.h"
104#include "utils/rel.h"
105#include "utils/selfuncs.h"
107#include "../postgis_config.h"
109#include "access/htup_details.h"
114#include "lwgeom_pg.h"
115#include "gserialized_gist.h"
167#define STATISTIC_KIND_ND 102
168#define STATISTIC_KIND_2D 103
178#define STATISTIC_SLOT_ND 3
179#define STATISTIC_SLOT_2D 4
195#define MIN_DIMENSION_WIDTH 0.000000001
201#define MAX_DIMENSION_WIDTH 1.0E+20
206#define DEFAULT_ND_SEL 0.0001
207#define DEFAULT_ND_JOINSEL 0.001
212#define FALLBACK_ND_SEL 0.2
213#define FALLBACK_ND_JOINSEL 0.3
250 if (VARSIZE_ANY_EXHDR(txt) <= 0)
252 modestr = (
char *)VARDATA(txt);
253 if (modestr[0] ==
'N')
264 int ia = *((
const int*)a);
265 int ib = *((
const int*)b);
292 qsort(vals, nvals,
sizeof(
int),
cmp_int);
293 return vals[nvals-1] - vals[0];
305 for (i = 0; i < nvals; i++)
311#if POSTGIS_DEBUG_LEVEL >= 3
317total_int(
const int *vals,
int nvals)
322 for ( i = 0; i < nvals; i++ )
332avg(
const int *vals,
int nvals)
334 int t = total_int(vals, nvals);
335 return (
double)t / (double)nvals;
342stddev(
const int *vals,
int nvals)
346 double mean = avg(vals, nvals);
349 for ( i = 0; i < nvals; i++ )
351 double v = (double)(vals[i]);
352 sigma2 += (mean - v) * (mean - v);
354 return sqrt(sigma2 / nvals);
369 for ( i = 0; i < ndims; i++ )
375 for ( i = 0; i < ndims; i++ )
395 char *json_extent, *
str;
398 int ndims = (int)roundf(nd_stats->
ndims);
405 for ( d = 0; d < ndims; d++ )
441 int sizex = (int)roundf(stats->
size[0]);
442 int sizey = (int)roundf(stats->
size[1]);
445 for ( k = 0; k < sizey; k++ )
447 for ( j = 0; j < sizex; j++ )
465 for ( d = 0; d <
ND_DIMS; d++ )
467 target->
min[d] = Min(target->
min[d], source->
min[d]);
468 target->
max[d] = Max(target->
max[d], source->
max[d]);
477 memset(a, 0,
sizeof(
ND_BOX));
490 for ( d = 0; d <
ND_DIMS; d++ )
493 a->
max[d] = -1 * FLT_MAX;
540 for ( d = 0; d < ndims; d++ )
542 if ( (a->
min[d] > b->
max[d]) || (a->
max[d] < b->
min[d]) )
555 for ( d = 0; d < ndims; d++ )
557 if ( ! ((a->
min[d] < b->
min[d]) && (a->
max[d] > b->
max[d])) )
572 for ( d = 0; d <
ND_DIMS; d++ )
574 size = nd_box->
max[d] - nd_box->
min[d];
578 nd_box->
min[d] -= size * expansion_factor / 2;
579 nd_box->
max[d] += size * expansion_factor / 2;
596 memset(nd_ibox, 0,
sizeof(
ND_IBOX));
599 for ( d = 0; d < nd_stats->
ndims; d++ )
603 double width = smax - smin;
611 int size = (int)roundf(nd_stats->
size[d]);
614 nd_ibox->
min[d] = floor(size * (nd_box->
min[d] - smin) / width);
615 nd_ibox->
max[d] = floor(size * (nd_box->
max[d] - smin) / width);
617 POSTGIS_DEBUGF(5,
" stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
618 POSTGIS_DEBUGF(5,
" overlap: dim %d: (%d, %d)", d, nd_ibox->
min[d], nd_ibox->
max[d]);
621 nd_ibox->
min[d] = Max(nd_ibox->
min[d], 0);
622 nd_ibox->
max[d] = Min(nd_ibox->
max[d], size - 1);
629#define MAX_NUM_BINS 50
630#define BIN_MIN_SIZE 10
654#if POSTGIS_DEBUG_LEVEL >= 3
655 double average, sdev, sdev_ratio;
661 counts = palloc0(num_bins *
sizeof(
int));
664 for ( d = 0; d < ndims; d++ )
667 memset(counts, 0, num_bins *
sizeof(
int));
670 smin = extent->
min[d];
671 smax = extent->
max[d];
672 swidth = smax - smin;
686 for ( i = 0; i < num_boxes; i++ )
688 double minoffset, maxoffset;
692 if ( ! ndb )
continue;
695 minoffset = ndb->
min[d] - smin;
696 maxoffset = ndb->
max[d] - smin;
699 if ( minoffset < 0 || minoffset > swidth ||
700 maxoffset < 0 || maxoffset > swidth )
706 bmin = floor(num_bins * minoffset / swidth);
707 bmax = floor(num_bins * maxoffset / swidth);
710 if (bmax >= num_bins)
713 POSTGIS_DEBUGF(4,
" dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
716 for ( k = bmin; k <= bmax; k++ )
727#if POSTGIS_DEBUG_LEVEL >= 3
728 average = avg(counts, num_bins);
729 sdev = stddev(counts, num_bins);
730 sdev_ratio = sdev/average;
732 POSTGIS_DEBUGF(3,
" dimension %d: range = %d", d, range);
733 POSTGIS_DEBUGF(3,
" dimension %d: average = %.6g", d, average);
734 POSTGIS_DEBUGF(3,
" dimension %d: stddev = %.6g", d, sdev);
735 POSTGIS_DEBUGF(3,
" dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
738 distribution[d] = range;
758 if (counter[d] < ibox->
max[d])
763 counter[d] = ibox->
min[d];
787 rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
788 ATTSTATSSLOT_NUMBERS);
790 POSTGIS_DEBUGF(2,
"no slot of kind %d in stats tuple", stats_kind);
795 nd_stats = palloc(
sizeof(float4) * sslot.nnumbers);
796 memcpy(nd_stats, sslot.numbers,
sizeof(float4) * sslot.nnumbers);
798 free_attstatsslot(&sslot);
810 HeapTuple stats_tuple = NULL;
816 POSTGIS_DEBUGF(2,
"searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
817 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(
true));
819 POSTGIS_DEBUGF(2,
"found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
822 if ( only_parent || ! stats_tuple )
824 POSTGIS_DEBUGF(2,
"searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
825 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(
false));
827 POSTGIS_DEBUGF(2,
"found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
831 POSTGIS_DEBUGF(2,
"stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
836 ReleaseSysCache(stats_tuple);
840 "histogram for attribute %d of table \"%s\" does not exist?",
841 att_num, get_rel_name(table_oid));
858 const char *att_name = text_to_cstring(att_text);
865 att_num = get_attnum(table_oid, att_name);
867 elog(ERROR,
"attribute \"%s\" does not exist", att_name);
873 elog(ERROR,
"attribute name is null");
896 int ncells1, ncells2;
897 int ndims1, ndims2, ndims;
899 double ntuples_not_null1, ntuples_not_null2;
918 if ( ! ( s1 && s2 ) )
920 elog(NOTICE,
" estimate_join_selectivity called with null inputs");
929 if ( ncells1 > ncells2 )
947 ntuples_max = ntuples_not_null1 * ntuples_not_null2;
950 ndims1 = (int)roundf(s1->
ndims);
951 ndims2 = (int)roundf(s2->
ndims);
952 ndims = Max(ndims1, ndims2);
961 POSTGIS_DEBUG(3,
"relation stats do not intersect, returning 0");
962 PG_RETURN_FLOAT8(0.0);
971 POSTGIS_DEBUG(3,
"could not calculate overlap of relations");
976 for ( d = 0; d < ndims1; d++ )
978 at1[d] = ibox1.
min[d];
981 size1[d] = (int)roundf(s1->
size[d]);
982 cellsize1[d] = width1[d] / size1[d];
986 for ( d = 0; d < ndims2; d++ )
990 size2[d] = (int)roundf(s2->
size[d]);
991 cellsize2[d] = width2[d] / size2[d];
1001 for ( d = 0; d < ndims1; d++ )
1003 nd_cell1.
min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1004 nd_cell1.
max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1011 for ( d = 0; d < ndims2; d++ )
1013 at2[d] = ibox2.
min[d];
1016 POSTGIS_DEBUGF(3,
"at1 %d,%d %s", at1[0], at1[1],
nd_box_to_json(&nd_cell1, ndims1));
1030 for ( d = 0; d < ndims2; d++ )
1032 nd_cell2.
min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1033 nd_cell2.
max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1036 POSTGIS_DEBUGF(3,
" at2 %d,%d %s", at2[0], at2[1],
nd_box_to_json(&nd_cell2, ndims2));
1039 ratio2 =
nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1043 POSTGIS_DEBUGF(3,
" val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1044 val += val1 * (val2 * ratio2);
1051 POSTGIS_DEBUGF(3,
"val of histogram = %g", val);
1062 POSTGIS_DEBUGF(3,
"val scaled to full table size = %g", val);
1079 selectivity = val / ntuples_max;
1082 if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1086 else if ( selectivity > 1.0 )
1101 PG_RETURN_DATUM(DirectFunctionCall5(
1103 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1104 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1116 PG_RETURN_DATUM(DirectFunctionCall5(
1118 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1119 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1130 Node *arg1 = (Node*) linitial(args);
1131 Node *arg2 = (Node*) lsecond(args);
1132 Var *var1 = (Var*) arg1;
1133 Var *var2 = (Var*) arg2;
1135 POSTGIS_DEBUGF(2,
"%s: entered function", __func__);
1139 if (!IsA(arg1, Var) || !IsA(arg2, Var))
1141 POSTGIS_DEBUGF(1,
"%s called with arguments that are not column references", __func__);
1146 relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1147 relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1156 POSTGIS_DEBUGF(2,
"%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL");
1161 POSTGIS_DEBUGF(2,
"%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL");
1166 POSTGIS_DEBUGF(2,
"got selectivity %g", selectivity);
1183 PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0);
1185 List *args = (List *)PG_GETARG_POINTER(2);
1186 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1187 int mode = PG_GETARG_INT32(4);
1189 POSTGIS_DEBUGF(2,
"%s: entered function", __func__);
1192 if (list_length(args) != 2)
1194 POSTGIS_DEBUGF(2,
"%s: got nargs == %d", __func__, list_length(args));
1199 if (jointype != JOIN_INNER)
1201 POSTGIS_DEBUGF(1,
"%s: jointype %d not supported", __func__, jointype);
1228 int sample_rows,
double total_rows,
int mode)
1230 MemoryContext old_context;
1232 int notnull_cnt = 0;
1234 int histogram_features = 0;
1237 size_t nd_stats_size;
1239 double total_width = 0;
1240 double total_cell_count = 0;
1246 const ND_BOX **sample_boxes;
1251 int histo_cells_target;
1253 int histo_cells_new = 1;
1256 int histo_ndims = 0;
1257 double sample_distribution[
ND_DIMS];
1258 double total_distribution;
1276 POSTGIS_DEBUG(2,
"compute_gserialized_stats called");
1277 POSTGIS_DEBUGF(3,
" # sample_rows: %d", sample_rows);
1278 POSTGIS_DEBUGF(3,
" estimate of total_rows: %.6g", total_rows);
1284 sample_boxes = palloc(
sizeof(
ND_BOX*) * sample_rows);
1296 for ( i = 0; i < sample_rows; i++ )
1303 datum = fetchfunc(stats, i, &is_null);
1308 POSTGIS_DEBUGF(4,
" skipped null geometry %d", i);
1317 POSTGIS_DEBUGF(3,
" skipped empty geometry %d", i);
1328 POSTGIS_DEBUGF(3,
" skipped infinite/nan geometry %d", i);
1340 nd_box = palloc(
sizeof(
ND_BOX));
1344 sample_boxes[notnull_cnt] = nd_box;
1347 if ( ! notnull_cnt )
1354 total_width += toast_raw_datum_size(datum);
1357 for ( d = 0; d < ndims; d++ )
1359 sum.
min[d] += nd_box->
min[d];
1360 sum.
max[d] += nd_box->
max[d];
1367#if POSTGIS_PGSQL_VERSION >= 180
1368 vacuum_delay_point(
true);
1370 vacuum_delay_point();
1374#if POSTGIS_PGSQL_VERSION >= 170
1375 POSTGIS_DEBUGF(3,
" stats->attstattarget: %d", stats->attstattarget);
1378 POSTGIS_DEBUGF(3,
" stats->attr->attstattarget: %d", stats->attr->attstattarget);
1381 POSTGIS_DEBUGF(3,
" target # of histogram cells: %d", histo_cells_target);
1384 if ( ! notnull_cnt )
1386 stats->stats_valid =
false;
1390 POSTGIS_DEBUGF(3,
" sample_extent: %s",
nd_box_to_json(&sample_extent, ndims));
1396 for ( d = 0; d < ndims; d++ )
1399 avg.
min[d] = sum.
min[d] / notnull_cnt;
1400 avg.
max[d] = sum.
max[d] / notnull_cnt;
1403 for ( i = 0; i < notnull_cnt; i++ )
1405 const ND_BOX *ndb = sample_boxes[i];
1406 stddev.
min[d] += (ndb->
min[d] - avg.
min[d]) * (ndb->
min[d] - avg.
min[d]);
1407 stddev.
max[d] += (ndb->
max[d] - avg.
max[d]) * (ndb->
max[d] - avg.
max[d]);
1409 stddev.
min[d] = sqrt(stddev.
min[d] / notnull_cnt);
1410 stddev.
max[d] = sqrt(stddev.
max[d] / notnull_cnt);
1423 for ( i = 0; i < notnull_cnt; i++ )
1425 const ND_BOX *ndb = sample_boxes[i];
1429 POSTGIS_DEBUGF(4,
" feature %d is a hard deviant, skipped", i);
1430 sample_boxes[i] = NULL;
1441 histo_extent = histo_extent_new;
1456 sample_distribution);
1472 for ( d = 0; d < ndims; d++ )
1474 if ( sample_distribution[d] > 0 )
1478 if ( histo_ndims == 0 )
1482 POSTGIS_DEBUG(3,
" special case: no axes have variability");
1483 histo_cells_new = 1;
1484 for ( d = 0; d < ndims; d++ )
1486 histo_size[d] = (int)pow((
double)histo_cells_target, 1/(double)ndims);
1487 if ( ! histo_size[d] )
1489 POSTGIS_DEBUGF(3,
" histo_size[d]: %d", histo_size[d]);
1490 histo_cells_new *= histo_size[d];
1492 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1501 POSTGIS_DEBUG(3,
" allocating histogram axes based on axis variability");
1502 total_distribution =
total_double(sample_distribution, ndims);
1503 POSTGIS_DEBUGF(3,
" total_distribution: %.8g", total_distribution);
1504 histo_cells_new = 1;
1505 for ( d = 0; d < ndims; d++ )
1507 if ( sample_distribution[d] == 0 )
1514 float edge_ratio = (float)sample_distribution[d] / (
float)total_distribution;
1524 histo_cells_new *= histo_size[d];
1526 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1530 histo_cells = histo_cells_new;
1531 POSTGIS_DEBUGF(3,
" histo_cells: %d", histo_cells);
1536 old_context = MemoryContextSwitchTo(stats->anl_context);
1537 nd_stats_size =
sizeof(
ND_STATS) + ((histo_cells - 1) *
sizeof(float4));
1538 nd_stats = palloc(nd_stats_size);
1539 memset(nd_stats, 0, nd_stats_size);
1540 MemoryContextSwitchTo(old_context);
1543 nd_stats->
ndims = ndims;
1544 nd_stats->
extent = histo_extent;
1549 for ( d = 0; d < ndims; d++ )
1550 nd_stats->
size[d] = histo_size[d];
1565 for ( i = 0; i < notnull_cnt; i++ )
1570 double num_cells = 0;
1571 double min[
ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1572 double max[
ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1573 double cellsize[
ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1575 nd_box = sample_boxes[i];
1576 if ( ! nd_box )
continue;
1579#if POSTGIS_PGSQL_VERSION >= 180
1580 vacuum_delay_point(
true);
1582 vacuum_delay_point();
1587 memset(at, 0,
sizeof(
int)*
ND_DIMS);
1589 POSTGIS_DEBUGF(3,
" feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1590 nd_ibox.
min[0], nd_ibox.
min[1], nd_ibox.
min[2], nd_ibox.
min[3],
1591 nd_ibox.
max[0], nd_ibox.
max[1], nd_ibox.
max[2], nd_ibox.
max[3]);
1593 for ( d = 0; d < nd_stats->
ndims; d++ )
1596 at[d] = nd_ibox.
min[d];
1599 cellsize[d] = (max[d] - min[d])/(nd_stats->
size[d]);
1608 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1611 for ( d = 0; d < nd_stats->
ndims; d++ )
1613 nd_cell.
min[d] = min[d] + (at[d]+0) * cellsize[d];
1614 nd_cell.
max[d] = min[d] + (at[d]+1) * cellsize[d];
1625 POSTGIS_DEBUGF(3,
" ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1626 POSTGIS_DEBUGF(3,
" at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1631 total_cell_count += num_cells;
1633 histogram_features++;
1636 POSTGIS_DEBUGF(3,
" histogram_features: %d", histogram_features);
1637 POSTGIS_DEBUGF(3,
" sample_rows: %d", sample_rows);
1638 POSTGIS_DEBUGF(3,
" table_rows: %.6g", total_rows);
1641 if ( ! histogram_features )
1643 POSTGIS_DEBUG(3,
" no stats have been gathered");
1644 elog(NOTICE,
" no features lie in the stats histogram, invalid stats");
1645 stats->stats_valid =
false;
1666 stats->stakind[stats_slot] = stats_kind;
1667 stats->staop[stats_slot] = InvalidOid;
1668 stats->stanumbers[stats_slot] = (float4*)nd_stats;
1669 stats->numnumbers[stats_slot] = nd_stats_size/
sizeof(float4);
1670 stats->stanullfrac = (float4)null_cnt/sample_rows;
1671 stats->stawidth = total_width/notnull_cnt;
1672 stats->stadistinct = -1.0;
1673 stats->stats_valid =
true;
1675 POSTGIS_DEBUGF(3,
" out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1676 POSTGIS_DEBUGF(3,
" out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1677 POSTGIS_DEBUGF(3,
" out: slot 0: numnumbers %d", stats->numnumbers[0]);
1678 POSTGIS_DEBUGF(3,
" out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1679 POSTGIS_DEBUGF(3,
" out: average width: %d bytes", stats->stawidth);
1680 POSTGIS_DEBUG (3,
" out: distinct values: all (no check done)");
1707 int sample_rows,
double total_rows)
1713 stats->extra_data = extra_data;
1718 if (stats->stats_valid)
1754 VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1759 if (!std_typanalyze(stats))
1760 PG_RETURN_BOOL(
false);
1767 stats->extra_data = extra_data;
1770 PG_RETURN_BOOL(
true);
1796 double total_count = 0.0;
1802 elog(NOTICE,
" estimate_selectivity called with null input");
1816 POSTGIS_DEBUGF(3,
" mode: %d", mode);
1819 POSTGIS_DEBUG(3,
" in 2d mode, stripping the computation down to 2d");
1837 POSTGIS_DEBUG(3,
" search box does not overlap histogram, returning 0");
1844 POSTGIS_DEBUG(3,
" search box contains histogram, returning 1");
1851 POSTGIS_DEBUG(3,
" search box overlap with stats histogram failed");
1856 for ( d = 0; d < nd_stats->
ndims; d++ )
1861 cell_size[d] = (max[d] - min[d]) / nd_stats->
size[d];
1862 POSTGIS_DEBUGF(3,
" cell_size[%d] : %.9g", d, cell_size[d]);
1865 at[d] = nd_ibox.
min[d];
1871 float cell_count, ratio;
1872 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1875 for ( d = 0; d < nd_stats->
ndims; d++ )
1877 nd_cell.
min[d] = min[d] + (at[d]+0) * cell_size[d];
1878 nd_cell.
max[d] = min[d] + (at[d]+1) * cell_size[d];
1885 total_count += (double)cell_count * ratio;
1886 POSTGIS_DEBUGF(4,
" cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
1893 POSTGIS_DEBUGF(3,
" nd_stats->histogram_features = %f", nd_stats->
histogram_features);
1894 POSTGIS_DEBUGF(3,
" nd_stats->histogram_cells = %f", nd_stats->
histogram_cells);
1895 POSTGIS_DEBUGF(3,
" sum(overlapped histogram cells) = %f", total_count);
1896 POSTGIS_DEBUGF(3,
" selectivity = %f", selectivity);
1899 if (selectivity > 1.0) selectivity = 1.0;
1900 else if (selectivity < 0.0) selectivity = 0.0;
1914 Oid table_oid = PG_GETARG_OID(0);
1915 text *att_text = PG_GETARG_TEXT_P(1);
1920 bool only_parent =
false;
1923 if ( ! PG_ARGISNULL(2) )
1929 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
1934 json = cstring_to_text(
str);
1938 PG_RETURN_TEXT_P(json);
1949 Oid table_oid = PG_GETARG_OID(0);
1950 text *att_text = PG_GETARG_TEXT_P(1);
1951 Datum geom_datum = PG_GETARG_DATUM(2);
1953 float8 selectivity = 0;
1958 if ( ! PG_ARGISNULL(3) )
1965 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text_to_cstring(att_text));
1969 elog(ERROR,
"unable to calculate bounding box from geometry");
1977 PG_RETURN_FLOAT8(selectivity);
1988 Oid table_oid1 = PG_GETARG_OID(0);
1989 text *att_text1 = PG_GETARG_TEXT_P(1);
1990 Oid table_oid2 = PG_GETARG_OID(2);
1991 text *att_text2 = PG_GETARG_TEXT_P(3);
1993 float8 selectivity = 0;
2002 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text_to_cstring(att_text1));
2005 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid2), text_to_cstring(att_text2));
2008 if ( ! PG_ARGISNULL(4) )
2010 text *modetxt = PG_GETARG_TEXT_P(4);
2011 char *modestr = text_to_cstring(modetxt);
2012 if ( modestr[0] ==
'N' )
2021 PG_RETURN_FLOAT8(selectivity);
2031 PG_RETURN_DATUM(DirectFunctionCall5(
2033 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2034 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2046 PG_RETURN_DATUM(DirectFunctionCall5(
2048 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2049 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2072 VariableStatData vardata;
2078 float8 selectivity = 0;
2081 POSTGIS_DEBUGF(2,
"%s: entered function", __func__);
2083 if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
2085 POSTGIS_DEBUGF(2,
"%s: could not find vardata", __func__);
2089 if (!IsA(other, Const))
2091 ReleaseVariableStats(vardata);
2092 POSTGIS_DEBUGF(2,
"%s: no constant argument, returning default selectivity %g", __func__,
DEFAULT_ND_SEL);
2096 otherConst = (Const*)other;
2097 if ((!otherConst) || otherConst->constisnull)
2099 ReleaseVariableStats(vardata);
2100 POSTGIS_DEBUGF(2,
"%s: constant argument is NULL", __func__);
2106 ReleaseVariableStats(vardata);
2107 POSTGIS_DEBUGF(2,
"%s: search box is EMPTY", __func__);
2111 if (!vardata.statsTuple)
2113 POSTGIS_DEBUGF(1,
"%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2118 ReleaseVariableStats(vardata);
2129 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2131 List *args = (List *) PG_GETARG_POINTER(2);
2132 int varRelid = PG_GETARG_INT32(3);
2133 int mode = PG_GETARG_INT32(4);
2135 POSTGIS_DEBUGF(2,
"%s: selectivity is %g", __func__, selectivity);
2136 PG_RETURN_FLOAT8(selectivity);
2150 HeapTuple index_tuple;
2151 Form_pg_index index_form;
2152 int16 index_attnum = InvalidAttrNumber;
2155 index_tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
2156 if (!HeapTupleIsValid(index_tuple))
2157 elog(ERROR,
"cache lookup failed for index %u", index_oid);
2159 index_form = (Form_pg_index) GETSTRUCT(index_tuple);
2162 if (index_form->indrelid != table_oid)
2163 elog(ERROR,
"table=%u and index=%u are not related", table_oid, index_oid);
2166 for (int16 i = 0; i < (int16)(index_form->indkey.dim1); i++)
2168 if (index_form->indkey.values[i] == table_attnum)
2174 ReleaseSysCache(index_tuple);
2175 return index_attnum;
2187 Form_pg_class index_rel_form;
2188 HeapTuple index_rel_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(index_oid));
2190 if (!HeapTupleIsValid(index_rel_tuple))
2191 elog(ERROR,
"cache lookup failed for index %u", index_oid);
2193 index_rel_form = (Form_pg_class) GETSTRUCT(index_rel_tuple);
2194 index_am = index_rel_form->relam;
2195 ReleaseSysCache(index_rel_tuple);
2207 Oid atttypid = InvalidOid;
2208 Form_pg_attribute att_form;
2211 HeapTuple att_tuple = SearchSysCache2(ATTNUM,
2212 ObjectIdGetDatum(index_oid),
2213 Int16GetDatum(index_attnum));
2215 if (!HeapTupleIsValid(att_tuple))
2216 elog(ERROR,
"cache lookup failed for index %u attribute %d", index_oid, index_attnum);
2218 att_form = (Form_pg_attribute) GETSTRUCT(att_tuple);
2219 atttypid = att_form->atttypid;
2220 ReleaseSysCache(att_tuple);
2240 Oid b2d_oid = postgis_oid(BOX2DFOID);
2241 Oid gdx_oid = postgis_oid(GIDXOID);
2243 if (!(b2d_oid && gdx_oid))
2247 table_rel = RelationIdGetRelation(table_oid);
2248 index_list = RelationGetIndexList(table_rel);
2249 RelationClose(table_rel);
2252 foreach(lc, index_list)
2254 Oid index_oid = lfirst_oid(lc);
2261 if (*idx_attnum == InvalidAttrNumber)
2271 if (atttypid == b2d_oid || atttypid == gdx_oid)
2290 BOX2DF *bounds_2df = NULL;
2291 GIDX *bounds_gidx = NULL;
2296 unsigned long offset;
2297 unsigned long offset_max;
2302 idx_rel = index_open(idx_oid, AccessShareLock);
2303 buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2304 page = (Page) BufferGetPage(
buffer);
2305 offset = FirstOffsetNumber;
2306 offset_max = PageGetMaxOffsetNumber(page);
2307 while (offset <= offset_max)
2309 ItemId iid = PageGetItemId(page, offset);
2314 index_close(idx_rel, AccessShareLock);
2317 ituple = (IndexTuple) PageGetItem(page, iid);
2318 if (!GistTupleIsInvalid(ituple))
2321 Datum idx_attr = index_getattr(ituple, idx_att_num, idx_rel->rd_att, &isnull);
2326 BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2334 GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2346 index_close(idx_rel, AccessShareLock);
2363 gbox_from_gidx(bounds_gidx, gbox, flags);
2383 int16 att_num, idx_att_num = InvalidAttrNumber;
2384 Oid tbl_oid = PG_GETARG_DATUM(0);
2385 char *col = text_to_cstring(PG_GETARG_TEXT_P(1));
2392 postgis_initialize_cache();
2394 att_num = get_attnum(tbl_oid, col);
2395 if (att_num == InvalidAttrNumber)
2406 PG_RETURN_POINTER(gbox);
2417 HeapTuple att_tuple;
2418 Form_pg_attribute att;
2420 if (!attnum || !atttypid)
2421 elog(ERROR,
"%s got null input parameters", __func__);
2424 att_tuple = SearchSysCache2(ATTNAME,
2425 ObjectIdGetDatum(table_oid),
2426 PointerGetDatum(col));
2428 if (!HeapTupleIsValid(att_tuple))
2431 att = (Form_pg_attribute) GETSTRUCT(att_tuple);
2432 *atttypid = att->atttypid;
2433 *attnum = att->attnum;
2434 ReleaseSysCache(att_tuple);
2447 text *coltxt = NULL;
2449 int16 attnum, idx_attnum;
2450 Oid atttypid = InvalidOid;
2451 char nsp_tbl[2*NAMEDATALEN+6];
2453 Oid tbl_oid, idx_oid = 0;
2456 bool only_parent =
false;
2458 Oid geographyOid = postgis_oid(GEOGRAPHYOID);
2459 Oid geometryOid = postgis_oid(GEOMETRYOID);
2462 postgis_initialize_cache();
2464 if (PG_NARGS() < 2 || PG_NARGS() > 4)
2465 elog(ERROR,
"ST_EstimatedExtent() called with wrong number of arguments");
2467 if ( PG_NARGS() == 4 )
2469 only_parent = PG_GETARG_BOOL(3);
2471 if ( PG_NARGS() >= 3 )
2473 char *nsp = text_to_cstring(PG_GETARG_TEXT_P(0));
2474 tbl = text_to_cstring(PG_GETARG_TEXT_P(1));
2475 coltxt = PG_GETARG_TEXT_P(2);
2476 snprintf(nsp_tbl,
sizeof(nsp_tbl),
"\"%s\".\"%s\"", nsp, tbl);
2478 if ( PG_NARGS() == 2 )
2480 tbl = text_to_cstring(PG_GETARG_TEXT_P(0));
2481 coltxt = PG_GETARG_TEXT_P(1);
2482 snprintf(nsp_tbl,
sizeof(nsp_tbl),
"\"%s\"", tbl);
2486 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2488 elog(ERROR,
"cannot lookup table %s", nsp_tbl);
2491 col = text_to_cstring(coltxt);
2493 elog(ERROR,
"column %s.\"%s\" does not exist", nsp_tbl, col);
2496 if ((atttypid != geographyOid) && (atttypid != geometryOid))
2498 elog(ERROR,
"column %s.\"%s\" must be a geometry or geography", nsp_tbl, col);
2504 if (idx_oid != InvalidOid)
2508 elog(DEBUG3,
"index for %s.\"%s\" exists, reading gbox from there", nsp_tbl, col);
2509 if (!gbox) PG_RETURN_NULL();
2516 elog(DEBUG3,
"index for %s.\"%s\" does not exist", nsp_tbl, col);
2519 if (atttypid == geographyOid)
2528 elog(WARNING,
"stats for \"%s.%s\" do not exist", tbl, col);
2538 if (stats_mode != 2)
2550 if (atttypid == geographyOid)
2554 PG_RETURN_POINTER(gbox_planar);
2557 PG_RETURN_POINTER(gbox);
2566 if ( PG_NARGS() == 3 )
2572 PG_GETARG_DATUM(2)));
2574 else if ( PG_NARGS() == 2 )
2579 PG_GETARG_DATUM(1)));
2582 elog(ERROR,
"geometry_estimated_extent() called with wrong number of arguments");
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int index_get_keytype(Oid index_oid, int16 index_attnum)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static int range_full(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Expand the bounds of target to include source.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
static bool get_attnum_attypid(Oid table_oid, const char *col, int16 *attnum, Oid *atttypid)
static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type)
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum)
static int index_get_am(Oid index_oid)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
static char * nd_stats_to_grid(const ND_STATS *stats)
Create a printable view of the ND_STATS histogram.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogeneously distributed or contentrated within one dimension,...
static int16 index_has_attr(Oid index_oid, Oid table_oid, int16 table_attnum)
struct ND_STATS_T ND_STATS
static int histogram_axis_cells(int histo_cells_target, int histo_ndims, double edge_ratio)
static double nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
static int histogram_cell_budget(double total_rows, int ndims, int attstattarget)
static int nd_stats_value_index(const ND_STATS *stats, const int *indexes)
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
int gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
Given a GSERIALIZED datum, as quickly as possible (peaking into the top of the memory) return the gbo...
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define FLAGS_GET_Z(flags)
#define FLAGS_GET_M(flags)
#define FLAGS_SET_M(flags, value)
#define FLAGS_SET_Z(flags, value)
#define FLAGS_GET_GEODETIC(flags)
This library is the generic geometry handling section of PostGIS.
int gbox_geocentric_get_gbox_cartesian(const GBOX *gbox_geocentric, GBOX *gbox_planar)
Datum buffer(PG_FUNCTION_ARGS)
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
float4 histogram_features