64 #include "access/genam.h"
65 #include "access/gin.h"
66 #include "access/gist.h"
67 #include "access/gist_private.h"
68 #include "access/gistscan.h"
69 #include "utils/datum.h"
70 #include "access/heapam.h"
71 #include "catalog/index.h"
72 #include "catalog/pg_am.h"
73 #include "miscadmin.h"
74 #include "storage/lmgr.h"
75 #include "catalog/namespace.h"
76 #include "catalog/indexing.h"
77 #if PG_VERSION_NUM >= 100000
78 #include "utils/regproc.h"
79 #include "utils/varlena.h"
81 #include "utils/builtins.h"
82 #include "utils/datum.h"
83 #include "utils/snapmgr.h"
84 #include "utils/fmgroids.h"
86 #include "access/heapam.h"
87 #include "catalog/pg_type.h"
88 #include "access/relscan.h"
90 #include "executor/spi.h"
92 #include "commands/vacuum.h"
93 #if PG_VERSION_NUM < 120000
94 #include "nodes/relation.h"
96 #include "nodes/pathnodes.h"
98 #include "parser/parsetree.h"
99 #include "utils/array.h"
100 #include "utils/lsyscache.h"
101 #include "utils/builtins.h"
102 #include "utils/syscache.h"
103 #include "utils/rel.h"
104 #include "utils/selfuncs.h"
106 #include "../postgis_config.h"
108 #include "access/htup_details.h"
112 #include "lwgeom_pg.h"
113 #include "gserialized_gist.h"
165 #define STATISTIC_KIND_ND 102
166 #define STATISTIC_KIND_2D 103
167 #define STATISTIC_SLOT_ND 0
168 #define STATISTIC_SLOT_2D 1
176 #define SDFACTOR 3.25
191 #define MIN_DIMENSION_WIDTH 0.000000001
197 #define MAX_DIMENSION_WIDTH 1.0E+20
202 #define DEFAULT_ND_SEL 0.0001
203 #define DEFAULT_ND_JOINSEL 0.001
208 #define FALLBACK_ND_SEL 0.2
209 #define FALLBACK_ND_JOINSEL 0.3
305 if (VARSIZE_ANY_EXHDR(txt) <= 0)
307 modestr = (
char*)VARDATA(txt);
308 if ( modestr[0] ==
'N' )
320 int ia = *((
const int*)a);
321 int ib = *((
const int*)b);
338 qsort(vals, nvals,
sizeof(
int),
cmp_int);
339 return vals[4*nvals/5] - vals[nvals/5];
351 for ( i = 0; i < nvals; i++ )
357 #if POSTGIS_DEBUG_LEVEL >= 3
363 total_int(
const int *vals,
int nvals)
368 for ( i = 0; i < nvals; i++ )
378 avg(
const int *vals,
int nvals)
380 int t = total_int(vals, nvals);
381 return (
double)t / (double)nvals;
388 stddev(
const int *vals,
int nvals)
392 double mean = avg(vals, nvals);
395 for ( i = 0; i < nvals; i++ )
397 double v = (double)(vals[i]);
398 sigma2 += (mean - v) * (mean - v);
400 return sqrt(sigma2 / nvals);
412 int accum = 1, vdx = 0;
417 for ( d = 0; d < (int)(stats->
ndims); d++ )
419 int size = (int)(stats->
size[d]);
420 if ( indexes[d] < 0 || indexes[d] >= size )
422 POSTGIS_DEBUGF(3,
" bad index at (%d, %d)", indexes[0], indexes[1]);
425 vdx += indexes[d] * accum;
442 for ( i = 0; i < ndims; i++ )
448 for ( i = 0; i < ndims; i++ )
468 char *json_extent, *
str;
471 int ndims = (int)roundf(nd_stats->
ndims);
478 for ( d = 0; d < ndims; d++ )
538 for ( d = 0; d <
ND_DIMS; d++ )
540 target->
min[d] = Min(target->
min[d], source->
min[d]);
541 target->
max[d] = Max(target->
max[d], source->
max[d]);
550 memset(a, 0,
sizeof(
ND_BOX));
563 for ( d = 0; d <
ND_DIMS; d++ )
566 a->
max[d] = -1 * FLT_MAX;
613 for ( d = 0; d < ndims; d++ )
615 if ( (a->
min[d] > b->
max[d]) || (a->
max[d] < b->
min[d]) )
628 for ( d = 0; d < ndims; d++ )
630 if ( ! ((a->
min[d] < b->
min[d]) && (a->
max[d] > b->
max[d])) )
645 for ( d = 0; d <
ND_DIMS; d++ )
647 size = nd_box->
max[d] - nd_box->
min[d];
651 nd_box->
min[d] -= size * expansion_factor / 2;
652 nd_box->
max[d] += size * expansion_factor / 2;
669 memset(nd_ibox, 0,
sizeof(
ND_IBOX));
672 for ( d = 0; d < nd_stats->
ndims; d++ )
676 double width = smax - smin;
684 int size = (int)roundf(nd_stats->
size[d]);
687 nd_ibox->
min[d] = floor(size * (nd_box->
min[d] - smin) / width);
688 nd_ibox->
max[d] = floor(size * (nd_box->
max[d] - smin) / width);
690 POSTGIS_DEBUGF(5,
" stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
691 POSTGIS_DEBUGF(5,
" overlap: dim %d: (%d, %d)", d, nd_ibox->
min[d], nd_ibox->
max[d]);
694 nd_ibox->
min[d] = Max(nd_ibox->
min[d], 0);
695 nd_ibox->
max[d] = Min(nd_ibox->
max[d], size - 1);
713 for ( d = 0 ; d < ndims; d++ )
715 if ( b1->
max[d] <= b2->
min[d] || b1->
min[d] >= b2->
max[d] )
718 if ( b1->
min[d] > b2->
min[d] || b1->
max[d] < b2->
max[d] )
725 for ( d = 0; d < ndims; d++ )
727 double width1 = b1->
max[d] - b1->
min[d];
728 double width2 = b2->
max[d] - b2->
min[d];
729 double imin, imax, iwidth;
734 imin = Max(b1->
min[d], b2->
min[d]);
735 imax = Min(b1->
max[d], b2->
max[d]);
736 iwidth = imax - imin;
737 iwidth = Max(0.0, iwidth);
773 #if POSTGIS_DEBUG_LEVEL >= 3
774 double average, sdev, sdev_ratio;
780 for ( d = 0; d < ndims; d++ )
783 memset(counts, 0,
sizeof(counts));
785 smin = extent->
min[d];
786 smax = extent->
max[d];
787 swidth = smax - smin;
801 for ( i = 0; i < num_boxes; i++ )
803 double minoffset, maxoffset;
807 if ( ! ndb )
continue;
810 minoffset = ndb->
min[d] - smin;
811 maxoffset = ndb->
max[d] - smin;
814 if ( minoffset < 0 || minoffset > swidth ||
815 maxoffset < 0 || maxoffset > swidth )
821 bmin = floor(
NUM_BINS * minoffset / swidth);
822 bmax = floor(
NUM_BINS * maxoffset / swidth);
828 POSTGIS_DEBUGF(4,
" dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
831 for ( k = bmin; k <= bmax; k++ )
841 #if POSTGIS_DEBUG_LEVEL >= 3
844 sdev_ratio = sdev/average;
846 POSTGIS_DEBUGF(3,
" dimension %d: range = %d", d, range);
847 POSTGIS_DEBUGF(3,
" dimension %d: average = %.6g", d, average);
848 POSTGIS_DEBUGF(3,
" dimension %d: stddev = %.6g", d, sdev);
849 POSTGIS_DEBUGF(3,
" dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
852 distribution[d] = range;
870 if ( counter[d] < ibox->
max[d] )
875 counter[d] = ibox->
min[d];
898 #if POSTGIS_PGSQL_VERSION < 100
903 rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
904 NULL, NULL, NULL, &floatptr, &nvalues);
907 POSTGIS_DEBUGF(2,
"no slot of kind %d in stats tuple", stats_kind);
912 nd_stats = palloc(
sizeof(
float) * nvalues);
913 memcpy(nd_stats, floatptr,
sizeof(
float) * nvalues);
916 free_attstatsslot(0, NULL, 0, floatptr, nvalues);
921 rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
922 ATTSTATSSLOT_NUMBERS);
924 POSTGIS_DEBUGF(2,
"no slot of kind %d in stats tuple", stats_kind);
929 nd_stats = palloc(
sizeof(float4) * sslot.nnumbers);
930 memcpy(nd_stats, sslot.numbers,
sizeof(float4) * sslot.nnumbers);
932 free_attstatsslot(&sslot);
946 HeapTuple stats_tuple = NULL;
952 POSTGIS_DEBUGF(2,
"searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
953 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(
true));
955 POSTGIS_DEBUGF(2,
"found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
958 if ( only_parent || ! stats_tuple )
960 POSTGIS_DEBUGF(2,
"searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
961 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(
false));
963 POSTGIS_DEBUGF(2,
"found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
967 POSTGIS_DEBUGF(2,
"stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
972 ReleaseSysCache(stats_tuple);
976 "histogram for attribute %d of table \"%s\" does not exist?",
977 att_num, get_rel_name(table_oid));
1001 att_num = get_attnum(table_oid, att_name);
1003 elog(ERROR,
"attribute \"%s\" does not exist", att_name);
1009 elog(ERROR,
"attribute name is null");
1032 int ncells1, ncells2;
1033 int ndims1, ndims2, ndims;
1035 double ntuples_not_null1, ntuples_not_null2;
1054 if ( ! ( s1 && s2 ) )
1056 elog(NOTICE,
" estimate_join_selectivity called with null inputs");
1065 if ( ncells1 > ncells2 )
1083 ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1086 ndims1 = (int)roundf(s1->
ndims);
1087 ndims2 = (int)roundf(s2->
ndims);
1088 ndims = Max(ndims1, ndims2);
1097 POSTGIS_DEBUG(3,
"relation stats do not intersect, returning 0");
1098 PG_RETURN_FLOAT8(0.0);
1107 POSTGIS_DEBUG(3,
"could not calculate overlap of relations");
1112 for ( d = 0; d < ndims1; d++ )
1114 at1[d] = ibox1.
min[d];
1117 size1[d] = (int)roundf(s1->
size[d]);
1118 cellsize1[d] = width1[d] / size1[d];
1122 for ( d = 0; d < ndims2; d++ )
1126 size2[d] = (int)roundf(s2->
size[d]);
1127 cellsize2[d] = width2[d] / size2[d];
1137 for ( d = 0; d < ndims1; d++ )
1139 nd_cell1.
min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1140 nd_cell1.
max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1147 for ( d = 0; d < ndims2; d++ )
1149 at2[d] = ibox2.
min[d];
1152 POSTGIS_DEBUGF(3,
"at1 %d,%d %s", at1[0], at1[1],
nd_box_to_json(&nd_cell1, ndims1));
1166 for ( d = 0; d < ndims2; d++ )
1168 nd_cell2.
min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1169 nd_cell2.
max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1172 POSTGIS_DEBUGF(3,
" at2 %d,%d %s", at2[0], at2[1],
nd_box_to_json(&nd_cell2, ndims2));
1175 ratio2 =
nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1179 POSTGIS_DEBUGF(3,
" val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1180 val += val1 * (val2 * ratio2);
1187 POSTGIS_DEBUGF(3,
"val of histogram = %g", val);
1198 POSTGIS_DEBUGF(3,
"val scaled to full table size = %g", val);
1215 selectivity = val / ntuples_max;
1218 if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1222 else if ( selectivity > 1.0 )
1237 PG_RETURN_DATUM(DirectFunctionCall5(
1239 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1240 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1252 PG_RETURN_DATUM(DirectFunctionCall5(
1254 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1255 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1266 Node *arg1 = (Node*) linitial(
args);
1267 Node *arg2 = (Node*) lsecond(
args);
1268 Var *var1 = (Var*) arg1;
1269 Var *var2 = (Var*) arg2;
1271 POSTGIS_DEBUGF(2,
"%s: entered function", __func__);
1275 if (!IsA(arg1, Var) || !IsA(arg2, Var))
1277 POSTGIS_DEBUGF(1,
"%s called with arguments that are not column references", __func__);
1282 relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1283 relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1292 POSTGIS_DEBUGF(2,
"%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL");
1297 POSTGIS_DEBUGF(2,
"%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL");
1302 POSTGIS_DEBUGF(2,
"got selectivity %g", selectivity);
1319 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1321 List *
args = (List *) PG_GETARG_POINTER(2);
1322 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1323 int mode = PG_GETARG_INT32(4);
1325 POSTGIS_DEBUGF(2,
"%s: entered function", __func__);
1328 if (list_length(
args) != 2)
1330 POSTGIS_DEBUGF(2,
"%s: got nargs == %d", __func__, list_length(
args));
1335 if (jointype != JOIN_INNER)
1337 POSTGIS_DEBUGF(1,
"%s: jointype %d not supported", __func__, jointype);
1367 int sample_rows,
double total_rows,
int mode)
1369 MemoryContext old_context;
1371 int notnull_cnt = 0;
1373 int histogram_features = 0;
1376 size_t nd_stats_size;
1378 double total_width = 0;
1379 double total_sample_volume = 0;
1380 double total_cell_count = 0;
1386 const ND_BOX **sample_boxes;
1391 int histo_cells_target;
1393 int histo_cells_new = 1;
1396 int histo_ndims = 0;
1397 double sample_distribution[
ND_DIMS];
1398 double total_distribution;
1416 POSTGIS_DEBUG(2,
"compute_gserialized_stats called");
1417 POSTGIS_DEBUGF(3,
" # sample_rows: %d", sample_rows);
1418 POSTGIS_DEBUGF(3,
" estimate of total_rows: %.6g", total_rows);
1424 sample_boxes = palloc(
sizeof(
ND_BOX*) * sample_rows);
1436 for ( i = 0; i < sample_rows; i++ )
1445 datum = fetchfunc(stats, i, &is_null);
1450 POSTGIS_DEBUGF(4,
" skipped null geometry %d", i);
1457 is_copy = VARATT_IS_EXTENDED(datum);
1461 POSTGIS_DEBUGF(3,
" skipped empty geometry %d", i);
1472 POSTGIS_DEBUGF(3,
" skipped infinite/nan geometry %d", i);
1484 nd_box = palloc(
sizeof(
ND_BOX));
1488 sample_boxes[notnull_cnt] = nd_box;
1491 if ( ! notnull_cnt )
1498 total_width += VARSIZE(geom);
1501 for ( d = 0; d < ndims; d++ )
1503 sum.
min[d] += nd_box->
min[d];
1504 sum.
max[d] += nd_box->
max[d];
1515 vacuum_delay_point();
1526 histo_cells_target = (int)pow((
double)(stats->attr->attstattarget), (
double)ndims);
1527 histo_cells_target = Min(histo_cells_target, ndims * 10000);
1528 histo_cells_target = Min(histo_cells_target, (
int)(total_rows/5));
1529 POSTGIS_DEBUGF(3,
" stats->attr->attstattarget: %d", stats->attr->attstattarget);
1530 POSTGIS_DEBUGF(3,
" target # of histogram cells: %d", histo_cells_target);
1533 if ( ! notnull_cnt )
1535 Oid relation_oid = stats->attr->attrelid;
1536 char *relation_name = get_rel_name(relation_oid);
1538 "PostGIS: Unable to compute statistics for \"%s.%s\": No non-null/empty features",
1539 relation_name ? relation_name :
"(NULL)",
1540 stats->attr->attname.data);
1541 stats->stats_valid =
false;
1545 POSTGIS_DEBUGF(3,
" sample_extent: %s",
nd_box_to_json(&sample_extent, ndims));
1551 for ( d = 0; d < ndims; d++ )
1554 avg.
min[d] = sum.
min[d] / notnull_cnt;
1555 avg.
max[d] = sum.
max[d] / notnull_cnt;
1558 for ( i = 0; i < notnull_cnt; i++ )
1560 const ND_BOX *ndb = sample_boxes[i];
1561 stddev.
min[d] += (ndb->
min[d] - avg.
min[d]) * (ndb->
min[d] - avg.
min[d]);
1562 stddev.
max[d] += (ndb->
max[d] - avg.
max[d]) * (ndb->
max[d] - avg.
max[d]);
1564 stddev.
min[d] = sqrt(stddev.
min[d] / notnull_cnt);
1565 stddev.
max[d] = sqrt(stddev.
max[d] / notnull_cnt);
1578 for ( i = 0; i < notnull_cnt; i++ )
1580 const ND_BOX *ndb = sample_boxes[i];
1584 POSTGIS_DEBUGF(4,
" feature %d is a hard deviant, skipped", i);
1585 sample_boxes[i] = NULL;
1596 histo_extent = histo_extent_new;
1611 sample_distribution);
1627 for ( d = 0; d < ndims; d++ )
1629 if ( sample_distribution[d] > 0 )
1633 if ( histo_ndims == 0 )
1637 POSTGIS_DEBUG(3,
" special case: no axes have variability");
1638 histo_cells_new = 1;
1639 for ( d = 0; d < ndims; d++ )
1641 histo_size[d] = (int)pow((
double)histo_cells_target, 1/(double)ndims);
1642 if ( ! histo_size[d] )
1644 POSTGIS_DEBUGF(3,
" histo_size[d]: %d", histo_size[d]);
1645 histo_cells_new *= histo_size[d];
1647 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1656 POSTGIS_DEBUG(3,
" allocating histogram axes based on axis variability");
1657 total_distribution =
total_double(sample_distribution, ndims);
1658 POSTGIS_DEBUGF(3,
" total_distribution: %.8g", total_distribution);
1659 histo_cells_new = 1;
1660 for ( d = 0; d < ndims; d++ )
1662 if ( sample_distribution[d] == 0 )
1669 float edge_ratio = (float)sample_distribution[d] / (
float)total_distribution;
1675 histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(
double)histo_ndims);
1677 if ( ! histo_size[d] )
1680 histo_cells_new *= histo_size[d];
1682 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1686 histo_cells = histo_cells_new;
1687 POSTGIS_DEBUGF(3,
" histo_cells: %d", histo_cells);
1692 old_context = MemoryContextSwitchTo(stats->anl_context);
1693 nd_stats_size =
sizeof(
ND_STATS) + ((histo_cells - 1) *
sizeof(float4));
1694 nd_stats = palloc(nd_stats_size);
1695 memset(nd_stats, 0, nd_stats_size);
1696 MemoryContextSwitchTo(old_context);
1699 nd_stats->
ndims = ndims;
1700 nd_stats->
extent = histo_extent;
1705 for ( d = 0; d < ndims; d++ )
1706 nd_stats->
size[d] = histo_size[d];
1721 for ( i = 0; i < notnull_cnt; i++ )
1727 double num_cells = 0;
1728 double tmp_volume = 1.0;
1729 double min[
ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1730 double max[
ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1731 double cellsize[
ND_DIMS] = {0.0, 0.0, 0.0, 0.0};
1733 nd_box = sample_boxes[i];
1734 if ( ! nd_box )
continue;
1737 vacuum_delay_point();
1741 memset(at, 0,
sizeof(
int)*
ND_DIMS);
1743 POSTGIS_DEBUGF(3,
" feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1744 nd_ibox.
min[0], nd_ibox.
min[1], nd_ibox.
min[2], nd_ibox.
min[3],
1745 nd_ibox.
max[0], nd_ibox.
max[1], nd_ibox.
max[2], nd_ibox.
max[3]);
1747 for ( d = 0; d < nd_stats->
ndims; d++ )
1750 at[d] = nd_ibox.
min[d];
1753 cellsize[d] = (max[d] - min[d])/(nd_stats->
size[d]);
1756 tmp_volume *= (nd_box->
max[d] - nd_box->
min[d]);
1760 total_sample_volume += tmp_volume;
1768 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1771 for ( d = 0; d < nd_stats->
ndims; d++ )
1773 nd_cell.
min[d] = min[d] + (at[d]+0) * cellsize[d];
1774 nd_cell.
max[d] = min[d] + (at[d]+1) * cellsize[d];
1785 POSTGIS_DEBUGF(3,
" ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1786 POSTGIS_DEBUGF(3,
" at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1791 total_cell_count += num_cells;
1793 histogram_features++;
1796 POSTGIS_DEBUGF(3,
" histogram_features: %d", histogram_features);
1797 POSTGIS_DEBUGF(3,
" sample_rows: %d", sample_rows);
1798 POSTGIS_DEBUGF(3,
" table_rows: %.6g", total_rows);
1801 if ( ! histogram_features )
1803 POSTGIS_DEBUG(3,
" no stats have been gathered");
1804 elog(NOTICE,
" no features lie in the stats histogram, invalid stats");
1805 stats->stats_valid =
false;
1826 stats->stakind[stats_slot] = stats_kind;
1827 stats->staop[stats_slot] = InvalidOid;
1828 stats->stanumbers[stats_slot] = (float4*)nd_stats;
1829 stats->numnumbers[stats_slot] = nd_stats_size/
sizeof(float4);
1830 stats->stanullfrac = (float4)null_cnt/sample_rows;
1831 stats->stawidth = total_width/notnull_cnt;
1832 stats->stadistinct = -1.0;
1833 stats->stats_valid =
true;
1835 POSTGIS_DEBUGF(3,
" out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1836 POSTGIS_DEBUGF(3,
" out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1837 POSTGIS_DEBUGF(3,
" out: slot 0: numnumbers %d", stats->numnumbers[0]);
1838 POSTGIS_DEBUGF(3,
" out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1839 POSTGIS_DEBUGF(3,
" out: average width: %d bytes", stats->stawidth);
1840 POSTGIS_DEBUG (3,
" out: distinct values: all (no check done)");
1869 int sample_rows,
double total_rows)
1874 if (stats->stats_valid)
1912 VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1913 Form_pg_attribute attr = stats->attr;
1915 POSTGIS_DEBUG(2,
"gserialized_analyze_nd called");
1919 if (attr->attstattarget < 0)
1920 attr->attstattarget = default_statistics_target;
1922 POSTGIS_DEBUGF(3,
" attribute stat target: %d", attr->attstattarget);
1927 stats->minrows = 300 * stats->attr->attstattarget;
1930 POSTGIS_DEBUGF(3,
" minrows: %d", stats->minrows);
1933 PG_RETURN_BOOL(
true);
1959 double total_count = 0.0;
1965 elog(NOTICE,
" estimate_selectivity called with null input");
1979 POSTGIS_DEBUGF(3,
" mode: %d", mode);
1982 POSTGIS_DEBUG(3,
" in 2d mode, stripping the computation down to 2d");
1998 POSTGIS_DEBUG(3,
" search box does not overlap histogram, returning 0");
2005 POSTGIS_DEBUG(3,
" search box contains histogram, returning 1");
2012 POSTGIS_DEBUG(3,
" search box overlap with stats histogram failed");
2017 for ( d = 0; d < nd_stats->
ndims; d++ )
2022 cell_size[d] = (max[d] - min[d]) / nd_stats->
size[d];
2023 POSTGIS_DEBUGF(3,
" cell_size[%d] : %.9g", d, cell_size[d]);
2026 at[d] = nd_ibox.
min[d];
2032 float cell_count, ratio;
2033 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2036 for ( d = 0; d < nd_stats->
ndims; d++ )
2038 nd_cell.
min[d] = min[d] + (at[d]+0) * cell_size[d];
2039 nd_cell.
max[d] = min[d] + (at[d]+1) * cell_size[d];
2046 total_count += cell_count * ratio;
2047 POSTGIS_DEBUGF(4,
" cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2054 POSTGIS_DEBUGF(3,
" nd_stats->histogram_features = %f", nd_stats->
histogram_features);
2055 POSTGIS_DEBUGF(3,
" nd_stats->histogram_cells = %f", nd_stats->
histogram_cells);
2056 POSTGIS_DEBUGF(3,
" sum(overlapped histogram cells) = %f", total_count);
2057 POSTGIS_DEBUGF(3,
" selectivity = %f", selectivity);
2060 if (selectivity > 1.0) selectivity = 1.0;
2061 else if (selectivity < 0.0) selectivity = 0.0;
2075 Oid table_oid = PG_GETARG_OID(0);
2076 text *att_text = PG_GETARG_TEXT_P(1);
2081 bool only_parent =
false;
2084 if ( ! PG_ARGISNULL(2) )
2090 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid),
text_to_cstring(att_text));
2094 json = cstring_to_text(
str);
2097 PG_RETURN_TEXT_P(json);
2108 Oid table_oid = PG_GETARG_OID(0);
2109 text *att_text = PG_GETARG_TEXT_P(1);
2110 Datum geom_datum = PG_GETARG_DATUM(2);
2112 float8 selectivity = 0;
2117 if ( ! PG_ARGISNULL(3) )
2124 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid),
text_to_cstring(att_text));
2127 if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2128 elog(ERROR,
"unable to calculate bounding box from geometry");
2136 PG_RETURN_FLOAT8(selectivity);
2147 Oid table_oid1 = PG_GETARG_OID(0);
2148 text *att_text1 = PG_GETARG_TEXT_P(1);
2149 Oid table_oid2 = PG_GETARG_OID(2);
2150 text *att_text2 = PG_GETARG_TEXT_P(3);
2152 float8 selectivity = 0;
2161 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid1),
text_to_cstring(att_text1));
2164 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid2),
text_to_cstring(att_text2));
2167 if ( ! PG_ARGISNULL(4) )
2169 text *modetxt = PG_GETARG_TEXT_P(4);
2171 if ( modestr[0] ==
'N' )
2180 PG_RETURN_FLOAT8(selectivity);
2190 PG_RETURN_DATUM(DirectFunctionCall5(
2192 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2193 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2205 PG_RETURN_DATUM(DirectFunctionCall5(
2207 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2208 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2231 VariableStatData vardata;
2237 float8 selectivity = 0;
2240 POSTGIS_DEBUGF(2,
"%s: entered function", __func__);
2242 if (!get_restriction_variable(root,
args, varRelid, &vardata, &other, &varonleft))
2244 POSTGIS_DEBUGF(2,
"%s: could not find vardata", __func__);
2248 if (!IsA(other, Const))
2250 ReleaseVariableStats(vardata);
2251 POSTGIS_DEBUGF(2,
"%s: no constant argument, returning default selectivity %g", __func__,
DEFAULT_ND_SEL);
2255 otherConst = (Const*)other;
2256 if ((!otherConst) || otherConst->constisnull)
2258 ReleaseVariableStats(vardata);
2259 POSTGIS_DEBUGF(2,
"%s: constant argument is NULL", __func__);
2263 if (!gserialized_datum_get_gbox_p(otherConst->constvalue, &search_box))
2265 ReleaseVariableStats(vardata);
2266 POSTGIS_DEBUGF(2,
"%s: search box is EMPTY", __func__);
2270 if (!vardata.statsTuple)
2272 POSTGIS_DEBUGF(1,
"%s: no statistics available on table. Empty? Need to ANALYZE?", __func__);
2277 ReleaseVariableStats(vardata);
2286 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2288 List *
args = (List *) PG_GETARG_POINTER(2);
2289 int varRelid = PG_GETARG_INT32(3);
2290 int mode = PG_GETARG_INT32(4);
2292 POSTGIS_DEBUGF(2,
"%s: selectivity is %g", __func__, selectivity);
2293 PG_RETURN_FLOAT8(selectivity);
2309 char *nsp_tbl = NULL;
2310 Oid tbl_oid, idx_oid = 0;
2313 bool only_parent =
false;
2314 int key_type, att_num;
2318 postgis_initialize_cache(fcinfo);
2320 if ( PG_NARGS() == 4 )
2324 col = PG_GETARG_TEXT_P(2);
2325 only_parent = PG_GETARG_BOOL(3);
2326 sz = strlen(nsp) + strlen(tbl) + 6;
2327 nsp_tbl = palloc(sz);
2328 snprintf(nsp_tbl, sz,
"\"%s\".\"%s\"", nsp, tbl);
2329 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2332 else if ( PG_NARGS() == 3 )
2336 col = PG_GETARG_TEXT_P(2);
2337 sz = strlen(nsp) + strlen(tbl) + 6;
2338 nsp_tbl = palloc(sz);
2339 snprintf(nsp_tbl, sz,
"\"%s\".\"%s\"", nsp, tbl);
2340 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2343 else if ( PG_NARGS() == 2 )
2346 col = PG_GETARG_TEXT_P(1);
2347 sz = strlen(tbl) + 3;
2348 nsp_tbl = palloc(sz);
2349 snprintf(nsp_tbl, sz,
"\"%s\"", tbl);
2350 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2355 elog(ERROR,
"estimated_extent() called with wrong number of arguments");
2366 POSTGIS_DEBUGF(2,
"index for \"%s.%s\" exists, reading gbox from there", tbl,
text_to_cstring(col));
2367 if ( ! gbox ) PG_RETURN_NULL();
2371 POSTGIS_DEBUGF(2,
"index for \"%s.%s\" does not exist", tbl,
text_to_cstring(col));
2380 elog(WARNING,
"stats for \"%s.%s\" do not exist", tbl,
text_to_cstring(col));
2385 gbox = palloc(
sizeof(
GBOX));
2396 PG_RETURN_POINTER(gbox);
2408 if ( PG_NARGS() == 3 )
2414 PG_GETARG_DATUM(2)));
2416 else if ( PG_NARGS() == 2 )
2421 PG_GETARG_DATUM(1)));
2424 elog(ERROR,
"geometry_estimated_extent() called with wrong number of arguments");
2436 Oid result = InvalidOid;
2440 Oid b2d_oid = postgis_oid(BOX2DFOID);
2441 Oid gdx_oid = postgis_oid(BOX3DOID);
2443 if (!(b2d_oid && gdx_oid))
2446 tbl_rel = RelationIdGetRelation(tbl_oid);
2447 idx_list = RelationGetIndexList(tbl_rel);
2448 RelationClose(tbl_rel);
2451 foreach(lc, idx_list)
2453 Form_pg_class idx_form;
2456 Oid idx_oid = lfirst_oid(lc);
2458 idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
2459 if (!HeapTupleIsValid(idx_tup))
2460 elog(ERROR,
"%s: unable to lookup index %u in syscache", __func__, idx_oid);
2461 idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
2462 idx_relam = idx_form->relam;
2463 ReleaseSysCache(idx_tup);
2466 if (idx_relam == GIST_AM_OID)
2468 Form_pg_attribute att;
2472 HeapTuple att_tup = SearchSysCache2(ATTNAME,
2473 ObjectIdGetDatum(idx_oid),
2474 PointerGetDatum(colname));
2475 if (!HeapTupleIsValid(att_tup))
2478 att = (Form_pg_attribute) GETSTRUCT(att_tup);
2479 atttypid = att->atttypid;
2480 attnum = att->attnum;
2481 ReleaseSysCache(att_tup);
2484 if (b2d_oid == atttypid || gdx_oid == atttypid)
2502 BOX2DF *bounds_2df = NULL;
2503 GIDX *bounds_gidx = NULL;
2508 OffsetNumber offset;
2509 unsigned long offset_max;
2514 idx_rel = index_open(idx_oid, AccessShareLock);
2515 buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
2516 page = (Page) BufferGetPage(
buffer);
2517 offset = FirstOffsetNumber;
2518 offset_max = PageGetMaxOffsetNumber(page);
2519 while (offset <= offset_max)
2521 ItemId iid = PageGetItemId(page, offset);
2526 index_close(idx_rel, AccessShareLock);
2529 ituple = (IndexTuple) PageGetItem(page, iid);
2530 if (!GistTupleIsInvalid(ituple))
2533 Datum idx_attr = index_getattr(ituple, att_num, idx_rel->rd_att, &isnull);
2538 BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
2546 GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
2558 index_close(idx_rel, AccessShareLock);
2572 gbox_from_gidx(bounds_gidx, gbox, 0);
2593 Oid tbl_oid = PG_GETARG_DATUM(0);
2594 text *col = PG_GETARG_TEXT_P(1);
2601 postgis_initialize_cache(fcinfo);
2611 PG_RETURN_POINTER(gbox);
GBOX * gbox_new(lwflags_t flags)
Create a new gbox with the dimensionality indicated by the flags.
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *gbox)
Read the box from the GSERIALIZED or calculate it if necessary.
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
#define STATISTIC_SLOT_ND
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define STATISTIC_KIND_2D
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
#define DEFAULT_ND_JOINSEL
#define STATISTIC_KIND_ND
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
#define FALLBACK_ND_JOINSEL
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type, int att_num)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
This function should return an estimation of the number of rows returned by a query involving an over...
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type, int *att_num)
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator?
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension,...
void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
bool box2df_is_empty(const BOX2DF *a)
int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
BOX2DF * box2df_copy(BOX2DF *b)
bool gidx_is_unknown(const GIDX *a)
GIDX * gidx_copy(GIDX *b)
void gidx_merge(GIDX **b_union, GIDX *b_new)
#define FLAGS_GET_Z(flags)
#define FLAGS_GET_M(flags)
#define FLAGS_SET_GEODETIC(flags, value)
#define FLAGS_SET_M(flags, value)
#define FLAGS_SET_Z(flags, value)
#define FLAGS_GET_GEODETIC(flags)
This library is the generic geometry handling section of PostGIS.
Datum buffer(PG_FUNCTION_ARGS)
char * text_to_cstring(const text *textptr)
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided.
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string.
static void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
N-dimensional box index type.
float4 histogram_features
N-dimensional statistics structure.