63 #include "executor/spi.h" 65 #include "commands/vacuum.h" 66 #if PG_VERSION_NUM < 120000 67 #include "nodes/relation.h" 69 #include "nodes/pathnodes.h" 71 #include "parser/parsetree.h" 72 #include "utils/array.h" 73 #include "utils/lsyscache.h" 74 #include "utils/builtins.h" 75 #include "utils/syscache.h" 76 #include "utils/rel.h" 77 #include "utils/selfuncs.h" 79 #include "../postgis_config.h" 81 #if POSTGIS_PGSQL_VERSION >= 93 82 #include "access/htup_details.h" 87 #include "lwgeom_pg.h" 88 #include "gserialized_gist.h" 101 #ifndef HAVE_ISFINITE 102 # ifdef HAVE_GNU_ISFINITE 105 # define isfinite finite 136 #define STATISTIC_KIND_ND 102 137 #define STATISTIC_KIND_2D 103 138 #define STATISTIC_SLOT_ND 0 139 #define STATISTIC_SLOT_2D 1 147 #define SDFACTOR 3.25 162 #define MIN_DIMENSION_WIDTH 0.000000001 168 #define MAX_DIMENSION_WIDTH 1.0E+20 173 #define DEFAULT_ND_SEL 0.0001 174 #define DEFAULT_ND_JOINSEL 0.001 179 #define FALLBACK_ND_SEL 0.2 180 #define FALLBACK_ND_JOINSEL 0.3 276 if ( modestr[0] ==
'N' )
289 int ia = *((
const int*)a);
290 int ib = *((
const int*)b);
307 qsort(vals, nvals,
sizeof(
int),
cmp_int);
308 return vals[4*nvals/5] - vals[nvals/5];
320 for ( i = 0; i < nvals; i++ )
326 #if POSTGIS_DEBUG_LEVEL >= 3 332 total_int(
const int *vals,
int nvals)
337 for ( i = 0; i < nvals; i++ )
347 avg(
const int *vals,
int nvals)
349 int t = total_int(vals, nvals);
350 return (
double)t / (double)nvals;
357 stddev(
const int *vals,
int nvals)
361 double mean = avg(vals, nvals);
364 for ( i = 0; i < nvals; i++ )
366 double v = (double)(vals[i]);
367 sigma2 += (mean - v) * (mean - v);
369 return sqrt(sigma2 / nvals);
381 int accum = 1, vdx = 0;
386 for ( d = 0; d < (int)(stats->
ndims); d++ )
388 int size = (int)(stats->
size[d]);
389 if ( indexes[d] < 0 || indexes[d] >= size )
391 POSTGIS_DEBUGF(3,
" bad index at (%d, %d)", indexes[0], indexes[1]);
394 vdx += indexes[d] * accum;
411 for ( i = 0; i < ndims; i++ )
417 for ( i = 0; i < ndims; i++ )
437 char *json_extent, *str;
440 int ndims = (int)roundf(nd_stats->
ndims);
447 for ( d = 0; d < ndims; d++ )
507 for ( d = 0; d <
ND_DIMS; d++ )
509 target->
min[d] = Min(target->
min[d], source->
min[d]);
510 target->
max[d] = Max(target->
max[d], source->
max[d]);
519 memset(a, 0,
sizeof(
ND_BOX));
532 for ( d = 0; d <
ND_DIMS; d++ )
535 a->
max[d] = -1 * FLT_MAX;
582 for ( d = 0; d < ndims; d++ )
584 if ( (a->
min[d] > b->
max[d]) || (a->
max[d] < b->
min[d]) )
597 for ( d = 0; d < ndims; d++ )
599 if ( ! ((a->
min[d] < b->
min[d]) && (a->
max[d] > b->
max[d])) )
614 for ( d = 0; d <
ND_DIMS; d++ )
616 size = nd_box->
max[d] - nd_box->
min[d];
620 nd_box->
min[d] -= size * expansion_factor / 2;
621 nd_box->
max[d] += size * expansion_factor / 2;
638 memset(nd_ibox, 0,
sizeof(
ND_IBOX));
641 for ( d = 0; d < nd_stats->
ndims; d++ )
645 double width = smax - smin;
653 int size = (int)roundf(nd_stats->
size[d]);
656 nd_ibox->
min[d] = floor(size * (nd_box->
min[d] - smin) / width);
657 nd_ibox->
max[d] = floor(size * (nd_box->
max[d] - smin) / width);
659 POSTGIS_DEBUGF(5,
" stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
660 POSTGIS_DEBUGF(5,
" overlap: dim %d: (%d, %d)", d, nd_ibox->
min[d], nd_ibox->
max[d]);
663 nd_ibox->
min[d] = Max(nd_ibox->
min[d], 0);
664 nd_ibox->
max[d] = Min(nd_ibox->
max[d], size - 1);
682 for ( d = 0 ; d < ndims; d++ )
684 if ( b1->
max[d] <= b2->
min[d] || b1->
min[d] >= b2->
max[d] )
687 if ( b1->
min[d] > b2->
min[d] || b1->
max[d] < b2->
max[d] )
694 for ( d = 0; d < ndims; d++ )
696 double width1 = b1->
max[d] - b1->
min[d];
697 double width2 = b2->
max[d] - b2->
min[d];
698 double imin, imax, iwidth;
703 imin = Max(b1->
min[d], b2->
min[d]);
704 imax = Min(b1->
max[d], b2->
max[d]);
705 iwidth = imax - imin;
706 iwidth = Max(0.0, iwidth);
742 #if POSTGIS_DEBUG_LEVEL >= 3 743 double average, sdev, sdev_ratio;
749 for ( d = 0; d < ndims; d++ )
752 memset(counts, 0,
sizeof(counts));
754 smin = extent->
min[d];
755 smax = extent->
max[d];
756 swidth = smax - smin;
770 for ( i = 0; i < num_boxes; i++ )
772 double minoffset, maxoffset;
776 if ( ! ndb )
continue;
779 minoffset = ndb->
min[d] - smin;
780 maxoffset = ndb->
max[d] - smin;
783 if ( minoffset < 0 || minoffset > swidth ||
784 maxoffset < 0 || maxoffset > swidth )
790 bmin = floor(
NUM_BINS * minoffset / swidth);
791 bmax = floor(
NUM_BINS * maxoffset / swidth);
797 POSTGIS_DEBUGF(4,
" dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
800 for ( k = bmin; k <= bmax; k++ )
810 #if POSTGIS_DEBUG_LEVEL >= 3 813 sdev_ratio = sdev/average;
815 POSTGIS_DEBUGF(3,
" dimension %d: range = %d", d, range);
816 POSTGIS_DEBUGF(3,
" dimension %d: average = %.6g", d, average);
817 POSTGIS_DEBUGF(3,
" dimension %d: stddev = %.6g", d, sdev);
818 POSTGIS_DEBUGF(3,
" dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
821 distribution[d] = range;
839 if ( counter[d] < ibox->
max[d] )
844 counter[d] = ibox->
min[d];
867 #if POSTGIS_PGSQL_VERSION < 100 871 rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid,
872 NULL, NULL, NULL, &floatptr, &nvalues);
876 "no slot of kind %d in stats tuple", stats_kind);
881 nd_stats = palloc(
sizeof(
float) * nvalues);
882 memcpy(nd_stats, floatptr,
sizeof(
float) * nvalues);
885 free_attstatsslot(0, NULL, 0, floatptr, nvalues);
888 rv = get_attstatsslot(&sslot, stats_tuple, stats_kind, InvalidOid,
889 ATTSTATSSLOT_NUMBERS);
892 "no slot of kind %d in stats tuple", stats_kind);
897 nd_stats = palloc(
sizeof(float4) * sslot.nnumbers);
898 memcpy(nd_stats, sslot.numbers,
sizeof(float4) * sslot.nnumbers);
900 free_attstatsslot(&sslot);
913 HeapTuple stats_tuple = NULL;
919 POSTGIS_DEBUGF(2,
"searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
920 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(
true));
922 POSTGIS_DEBUGF(2,
"found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
925 if ( only_parent || ! stats_tuple )
927 POSTGIS_DEBUGF(2,
"searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
928 stats_tuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(table_oid), Int16GetDatum(att_num), BoolGetDatum(
false));
930 POSTGIS_DEBUGF(2,
"found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
934 POSTGIS_DEBUGF(2,
"stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
939 ReleaseSysCache(stats_tuple);
943 "histogram for attribute %d of table \"%s\" does not exist?",
944 att_num, get_rel_name(table_oid));
968 att_num = get_attnum(table_oid, att_name);
970 elog(ERROR,
"attribute \"%s\" does not exist", att_name);
976 elog(ERROR,
"attribute name is null");
999 int ncells1, ncells2;
1000 int ndims1, ndims2, ndims;
1002 double ntuples_not_null1, ntuples_not_null2;
1021 if ( ! ( s1 && s2 ) )
1023 elog(NOTICE,
" estimate_join_selectivity called with null inputs");
1032 if ( ncells1 > ncells2 )
1050 ntuples_max = ntuples_not_null1 * ntuples_not_null2;
1053 ndims1 = (int)roundf(s1->
ndims);
1054 ndims2 = (int)roundf(s2->
ndims);
1055 ndims = Max(ndims1, ndims2);
1064 POSTGIS_DEBUG(3,
"relation stats do not intersect, returning 0");
1065 PG_RETURN_FLOAT8(0.0);
1074 POSTGIS_DEBUG(3,
"could not calculate overlap of relations");
1079 for ( d = 0; d < ndims1; d++ )
1081 at1[d] = ibox1.
min[d];
1084 size1[d] = (int)roundf(s1->
size[d]);
1085 cellsize1[d] = width1[d] / size1[d];
1089 for ( d = 0; d < ndims2; d++ )
1093 size2[d] = (int)roundf(s2->
size[d]);
1094 cellsize2[d] = width2[d] / size2[d];
1104 for ( d = 0; d < ndims1; d++ )
1106 nd_cell1.
min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1107 nd_cell1.
max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1114 for ( d = 0; d < ndims2; d++ )
1116 at2[d] = ibox2.
min[d];
1119 POSTGIS_DEBUGF(3,
"at1 %d,%d %s", at1[0], at1[1],
nd_box_to_json(&nd_cell1, ndims1));
1133 for ( d = 0; d < ndims2; d++ )
1135 nd_cell2.
min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1136 nd_cell2.
max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1139 POSTGIS_DEBUGF(3,
" at2 %d,%d %s", at2[0], at2[1],
nd_box_to_json(&nd_cell2, ndims2));
1142 ratio2 =
nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1146 POSTGIS_DEBUGF(3,
" val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1147 val += val1 * (val2 * ratio2);
1154 POSTGIS_DEBUGF(3,
"val of histogram = %g", val);
1165 POSTGIS_DEBUGF(3,
"val scaled to full table size = %g", val);
1182 selectivity = val / ntuples_max;
1185 if ( isnan(selectivity) || ! isfinite(selectivity) || selectivity < 0.0 )
1189 else if ( selectivity > 1.0 )
1204 PG_RETURN_DATUM(DirectFunctionCall5(
1206 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1207 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1219 PG_RETURN_DATUM(DirectFunctionCall5(
1221 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1222 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1238 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1240 List *
args = (List *) PG_GETARG_POINTER(2);
1241 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1242 int mode = PG_GETARG_INT32(4);
1252 if (jointype != JOIN_INNER)
1254 elog(DEBUG1,
"%s: jointype %d not supported", __func__, jointype);
1259 arg1 = (Node*) linitial(args);
1260 arg2 = (Node*) lsecond(args);
1266 if (!IsA(arg1, Var) || !IsA(arg2, Var))
1268 elog(DEBUG1,
"%s called with arguments that are not column references", __func__);
1273 relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
1274 relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
1276 POSTGIS_DEBUGF(3,
"using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1277 get_rel_name(relid1) ? get_rel_name(relid1) :
"NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL", relid2);
1286 POSTGIS_DEBUGF(3,
"unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) :
"NULL" , relid1);
1289 else if ( ! stats2 )
1291 POSTGIS_DEBUGF(3,
"unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL", relid2);
1296 POSTGIS_DEBUGF(2,
"got selectivity %g", selectivity);
1300 PG_RETURN_FLOAT8(selectivity);
1326 int sample_rows,
double total_rows,
int mode)
1328 MemoryContext old_context;
1330 int notnull_cnt = 0;
1332 int histogram_features = 0;
1335 size_t nd_stats_size;
1337 double total_width = 0;
1338 double total_sample_volume = 0;
1339 double total_cell_count = 0;
1345 const ND_BOX **sample_boxes;
1350 int histo_cells_target;
1352 int histo_cells_new = 1;
1355 int histo_ndims = 0;
1356 double sample_distribution[
ND_DIMS];
1357 double total_distribution;
1375 POSTGIS_DEBUG(2,
"compute_gserialized_stats called");
1376 POSTGIS_DEBUGF(3,
" # sample_rows: %d", sample_rows);
1377 POSTGIS_DEBUGF(3,
" estimate of total_rows: %.6g", total_rows);
1383 sample_boxes = palloc(
sizeof(
ND_BOX*) * sample_rows);
1395 for ( i = 0; i < sample_rows; i++ )
1404 datum = fetchfunc(stats, i, &is_null);
1409 POSTGIS_DEBUGF(4,
" skipped null geometry %d", i);
1416 is_copy = VARATT_IS_EXTENDED(datum);
1420 POSTGIS_DEBUGF(3,
" skipped empty geometry %d", i);
1431 POSTGIS_DEBUGF(3,
" skipped infinite/nan geometry %d", i);
1443 nd_box = palloc(
sizeof(
ND_BOX));
1447 sample_boxes[notnull_cnt] = nd_box;
1450 if ( ! notnull_cnt )
1457 total_width += VARSIZE(geom);
1460 for ( d = 0; d < ndims; d++ )
1462 sum.
min[d] += nd_box->
min[d];
1463 sum.
max[d] += nd_box->
max[d];
1474 vacuum_delay_point();
1485 histo_cells_target = (int)pow((
double)(stats->attr->attstattarget), (
double)ndims);
1486 histo_cells_target = Min(histo_cells_target, ndims * 10000);
1487 histo_cells_target = Min(histo_cells_target, (
int)(total_rows/5));
1488 POSTGIS_DEBUGF(3,
" stats->attr->attstattarget: %d", stats->attr->attstattarget);
1489 POSTGIS_DEBUGF(3,
" target # of histogram cells: %d", histo_cells_target);
1492 if ( ! notnull_cnt )
1494 elog(NOTICE,
"no non-null/empty features, unable to compute statistics");
1495 stats->stats_valid =
false;
1499 POSTGIS_DEBUGF(3,
" sample_extent: %s",
nd_box_to_json(&sample_extent, ndims));
1505 for ( d = 0; d < ndims; d++ )
1508 avg.
min[d] = sum.
min[d] / notnull_cnt;
1509 avg.
max[d] = sum.
max[d] / notnull_cnt;
1512 for ( i = 0; i < notnull_cnt; i++ )
1514 const ND_BOX *ndb = sample_boxes[i];
1515 stddev.
min[d] += (ndb->
min[d] - avg.
min[d]) * (ndb->
min[d] - avg.
min[d]);
1516 stddev.
max[d] += (ndb->
max[d] - avg.
max[d]) * (ndb->
max[d] - avg.
max[d]);
1518 stddev.
min[d] = sqrt(stddev.
min[d] / notnull_cnt);
1519 stddev.
max[d] = sqrt(stddev.
max[d] / notnull_cnt);
1532 for ( i = 0; i < notnull_cnt; i++ )
1534 const ND_BOX *ndb = sample_boxes[i];
1538 POSTGIS_DEBUGF(4,
" feature %d is a hard deviant, skipped", i);
1539 sample_boxes[i] = NULL;
1550 histo_extent = histo_extent_new;
1565 sample_distribution);
1581 for ( d = 0; d < ndims; d++ )
1583 if ( sample_distribution[d] > 0 )
1587 if ( histo_ndims == 0 )
1591 POSTGIS_DEBUG(3,
" special case: no axes have variability");
1592 histo_cells_new = 1;
1593 for ( d = 0; d < ndims; d++ )
1595 histo_size[d] = 1 + (int)pow((
double)histo_cells_target, 1/(double)ndims);
1596 POSTGIS_DEBUGF(3,
" histo_size[d]: %d", histo_size[d]);
1597 histo_cells_new *= histo_size[d];
1599 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1608 POSTGIS_DEBUG(3,
" allocating histogram axes based on axis variability");
1609 total_distribution =
total_double(sample_distribution, ndims);
1610 POSTGIS_DEBUGF(3,
" total_distribution: %.8g", total_distribution);
1611 histo_cells_new = 1;
1612 for ( d = 0; d < ndims; d++ )
1614 if ( sample_distribution[d] == 0 )
1621 float edge_ratio = (float)sample_distribution[d] / (
float)total_distribution;
1627 histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(
double)histo_ndims);
1629 if ( ! histo_size[d] )
1632 histo_cells_new *= histo_size[d];
1634 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1638 histo_cells = histo_cells_new;
1639 POSTGIS_DEBUGF(3,
" histo_cells: %d", histo_cells);
1644 old_context = MemoryContextSwitchTo(stats->anl_context);
1645 nd_stats_size =
sizeof(
ND_STATS) + ((histo_cells - 1) *
sizeof(float4));
1646 nd_stats = palloc(nd_stats_size);
1647 memset(nd_stats, 0, nd_stats_size);
1648 MemoryContextSwitchTo(old_context);
1651 nd_stats->
ndims = ndims;
1652 nd_stats->
extent = histo_extent;
1657 for ( d = 0; d < ndims; d++ )
1658 nd_stats->
size[d] = histo_size[d];
1673 for ( i = 0; i < notnull_cnt; i++ )
1679 double num_cells = 0;
1680 double tmp_volume = 1.0;
1685 nd_box = sample_boxes[i];
1686 if ( ! nd_box )
continue;
1689 vacuum_delay_point();
1693 memset(at, 0,
sizeof(
int)*
ND_DIMS);
1695 POSTGIS_DEBUGF(3,
" feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1696 nd_ibox.
min[0], nd_ibox.
min[1], nd_ibox.
min[2], nd_ibox.
min[3],
1697 nd_ibox.
max[0], nd_ibox.
max[1], nd_ibox.
max[2], nd_ibox.
max[3]);
1699 for ( d = 0; d < nd_stats->
ndims; d++ )
1702 at[d] = nd_ibox.
min[d];
1705 cellsize[d] = (max[d] - min[d])/(nd_stats->
size[d]);
1708 tmp_volume *= (nd_box->
max[d] - nd_box->
min[d]);
1712 total_sample_volume += tmp_volume;
1723 for ( d = 0; d < nd_stats->
ndims; d++ )
1725 nd_cell.
min[d] = min[d] + (at[d]+0) * cellsize[d];
1726 nd_cell.
max[d] = min[d] + (at[d]+1) * cellsize[d];
1737 POSTGIS_DEBUGF(3,
" ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1738 POSTGIS_DEBUGF(3,
" at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1743 total_cell_count += num_cells;
1745 histogram_features++;
1748 POSTGIS_DEBUGF(3,
" histogram_features: %d", histogram_features);
1749 POSTGIS_DEBUGF(3,
" sample_rows: %d", sample_rows);
1750 POSTGIS_DEBUGF(3,
" table_rows: %.6g", total_rows);
1753 if ( ! histogram_features )
1755 POSTGIS_DEBUG(3,
" no stats have been gathered");
1756 elog(NOTICE,
" no features lie in the stats histogram, invalid stats");
1757 stats->stats_valid =
false;
1778 stats->stakind[stats_slot] = stats_kind;
1779 stats->staop[stats_slot] = InvalidOid;
1780 stats->stanumbers[stats_slot] = (float4*)nd_stats;
1781 stats->numnumbers[stats_slot] = nd_stats_size/
sizeof(float4);
1782 stats->stanullfrac = (float4)null_cnt/sample_rows;
1783 stats->stawidth = total_width/notnull_cnt;
1784 stats->stadistinct = -1.0;
1785 stats->stats_valid =
true;
1787 POSTGIS_DEBUGF(3,
" out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1788 POSTGIS_DEBUGF(3,
" out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1789 POSTGIS_DEBUGF(3,
" out: slot 0: numnumbers %d", stats->numnumbers[0]);
1790 POSTGIS_DEBUGF(3,
" out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1791 POSTGIS_DEBUGF(3,
" out: average width: %d bytes", stats->stawidth);
1792 POSTGIS_DEBUG (3,
" out: distinct values: all (no check done)");
1821 int sample_rows,
double total_rows)
1860 VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1861 Form_pg_attribute attr = stats->attr;
1863 POSTGIS_DEBUG(2,
"gserialized_analyze_nd called");
1867 if (attr->attstattarget < 0)
1868 attr->attstattarget = default_statistics_target;
1870 POSTGIS_DEBUGF(3,
" attribute stat target: %d", attr->attstattarget);
1875 stats->minrows = 300 * stats->attr->attstattarget;
1878 POSTGIS_DEBUGF(3,
" minrows: %d", stats->minrows);
1881 PG_RETURN_BOOL(
true);
1907 double total_count = 0.0;
1914 elog(NOTICE,
" estimate_selectivity called with null input");
1926 POSTGIS_DEBUGF(3,
" mode: %d", mode);
1929 POSTGIS_DEBUG(3,
" in 2d mode, stripping the computation down to 2d");
1945 POSTGIS_DEBUG(3,
" search box does not overlap histogram, returning 0");
1952 POSTGIS_DEBUG(3,
" search box contains histogram, returning 1");
1959 POSTGIS_DEBUG(3,
" search box overlap with stats histogram failed");
1964 for ( d = 0; d < nd_stats->
ndims; d++ )
1969 cell_size[d] = (max[d] - min[d]) / nd_stats->
size[d];
1970 POSTGIS_DEBUGF(3,
" cell_size[%d] : %.9g", d, cell_size[d]);
1973 at[d] = nd_ibox.
min[d];
1979 float cell_count, ratio;
1983 for ( d = 0; d < nd_stats->
ndims; d++ )
1985 nd_cell.
min[d] = min[d] + (at[d]+0) * cell_size[d];
1986 nd_cell.
max[d] = min[d] + (at[d]+1) * cell_size[d];
1993 total_count += cell_count * ratio;
1994 POSTGIS_DEBUGF(4,
" cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2001 POSTGIS_DEBUGF(3,
" nd_stats->histogram_features = %f", nd_stats->
histogram_features);
2002 POSTGIS_DEBUGF(3,
" nd_stats->histogram_cells = %f", nd_stats->
histogram_cells);
2003 POSTGIS_DEBUGF(3,
" sum(overlapped histogram cells) = %f", total_count);
2004 POSTGIS_DEBUGF(3,
" selectivity = %f", selectivity);
2007 if (selectivity > 1.0) selectivity = 1.0;
2008 else if (selectivity < 0.0) selectivity = 0.0;
2022 Oid table_oid = PG_GETARG_OID(0);
2023 text *att_text = PG_GETARG_TEXT_P(1);
2028 bool only_parent =
FALSE;
2031 if ( ! PG_ARGISNULL(2) )
2037 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid),
text2cstring(att_text));
2041 json = cstring2text(str);
2044 PG_RETURN_TEXT_P(json);
2055 Oid table_oid = PG_GETARG_OID(0);
2056 text *att_text = PG_GETARG_TEXT_P(1);
2057 Datum geom_datum = PG_GETARG_DATUM(2);
2059 float8 selectivity = 0;
2064 if ( ! PG_ARGISNULL(3) )
2071 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid),
text2cstring(att_text));
2074 if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
2075 elog(ERROR,
"unable to calculate bounding box from geometry");
2083 PG_RETURN_FLOAT8(selectivity);
2094 Oid table_oid1 = PG_GETARG_OID(0);
2095 text *att_text1 = PG_GETARG_TEXT_P(1);
2096 Oid table_oid2 = PG_GETARG_OID(2);
2097 text *att_text2 = PG_GETARG_TEXT_P(3);
2099 float8 selectivity = 0;
2108 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid1),
text2cstring(att_text1));
2111 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid2),
text2cstring(att_text2));
2114 if ( ! PG_ARGISNULL(4) )
2116 text *modetxt = PG_GETARG_TEXT_P(4);
2118 if ( modestr[0] ==
'N' )
2127 PG_RETURN_FLOAT8(selectivity);
2137 PG_RETURN_DATUM(DirectFunctionCall5(
2139 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2140 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2152 PG_RETURN_DATUM(DirectFunctionCall5(
2154 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2155 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2176 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2178 List *
args = (List *) PG_GETARG_POINTER(2);
2180 int mode = PG_GETARG_INT32(4);
2182 VariableStatData vardata;
2188 float8 selectivity = 0;
2190 POSTGIS_DEBUG(2,
"gserialized_gist_sel called");
2202 if (list_length(args) != 2)
2204 POSTGIS_DEBUG(3,
"gserialized_gist_sel: not a binary opclause");
2209 other = (Node *) linitial(args);
2210 if ( ! IsA(other, Const) )
2212 self = (Var *)other;
2213 other = (Node *) lsecond(args);
2217 self = (Var *) lsecond(args);
2220 if ( ! IsA(other, Const) )
2222 POSTGIS_DEBUG(3,
" no constant arguments - returning a default selectivity");
2227 if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2229 POSTGIS_DEBUG(3,
"search box is EMPTY");
2230 PG_RETURN_FLOAT8(0.0);
2232 POSTGIS_DEBUGF(4,
" requested search box is: %s",
gbox_to_string(&search_box));
2235 examine_variable(root, (Node*)
self, 0, &vardata);
2236 if ( vardata.statsTuple ) {
2239 ReleaseVariableStats(vardata);
2243 POSTGIS_DEBUG(3,
" unable to load stats from syscache, not analyzed yet?");
2251 POSTGIS_DEBUGF(3,
" returning computed value: %f", selectivity);
2254 PG_RETURN_FLOAT8(selectivity);
2270 char *nsp_tbl = NULL;
2274 bool only_parent =
FALSE;
2276 if ( PG_NARGS() == 4 )
2280 col = PG_GETARG_TEXT_P(2);
2281 only_parent = PG_GETARG_BOOL(3);
2282 nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2283 sprintf(nsp_tbl,
"\"%s\".\"%s\"", nsp, tbl);
2284 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2287 else if ( PG_NARGS() == 3 )
2291 col = PG_GETARG_TEXT_P(2);
2292 nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2293 sprintf(nsp_tbl,
"\"%s\".\"%s\"", nsp, tbl);
2294 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2297 else if ( PG_NARGS() == 2 )
2300 col = PG_GETARG_TEXT_P(1);
2301 nsp_tbl = palloc(strlen(tbl) + 3);
2302 sprintf(nsp_tbl,
"\"%s\"", tbl);
2303 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2308 elog(ERROR,
"estimated_extent() called with wrong number of arguments");
2317 elog(WARNING,
"stats for \"%s.%s\" do not exist", tbl,
text2cstring(col));
2322 gbox = palloc(
sizeof(
GBOX));
2332 PG_RETURN_POINTER(gbox);
2344 if ( PG_NARGS() == 3 )
2350 PG_GETARG_DATUM(2)));
2352 else if ( PG_NARGS() == 2 )
2357 PG_GETARG_DATUM(1)));
2360 elog(ERROR,
"geometry_estimated_extent() called with wrong number of arguments");
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
#define DEFAULT_ND_JOINSEL
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
#define FLAGS_GET_GEODETIC(flags)
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string...
#define ND_DIMS
The maximum number of dimensions our code can handle.
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return TRUE if ND_BOX a contains b, false otherwise.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
#define FALLBACK_ND_JOINSEL
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
#define FLAGS_SET_GEODETIC(flags, value)
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided...
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define FLAGS_SET_Z(flags, value)
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension...
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return TRUE if ND_BOX a overlaps b, false otherwise.
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
static ND_STATS * pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode)
char * text2cstring(const text *textptr)
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
N-dimensional box index type.
#define FLAGS_GET_Z(flags)
Macros for manipulating the 'flags' byte.
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator...
float4 histogram_features
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
#define STATISTIC_KIND_2D
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
#define FLAGS_GET_M(flags)
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_ND
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
N-dimensional statistics structure.
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
Pull the stats object from the PgSQL system catalogs.
#define MAX_DIMENSION_WIDTH
Maximum width of a dimension that we'll bother trying to compute statistics on.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
This library is the generic geometry handling section of PostGIS.
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
#define STATISTIC_KIND_ND
Assign a number to the n-dimensional statistics kind.
#define FLAGS_SET_M(flags, value)