49 #include "executor/spi.h"
51 #include "commands/vacuum.h"
52 #include "nodes/relation.h"
53 #include "parser/parsetree.h"
54 #include "utils/array.h"
55 #include "utils/lsyscache.h"
56 #include "utils/builtins.h"
57 #include "utils/syscache.h"
58 #include "utils/rel.h"
60 #include "../postgis_config.h"
62 #if POSTGIS_PGSQL_VERSION >= 93
63 #include "access/htup_details.h"
68 #include "lwgeom_pg.h"
69 #include "gserialized_gist.h"
107 #define STATISTIC_KIND_ND 102
108 #define STATISTIC_KIND_2D 103
109 #define STATISTIC_SLOT_ND 0
110 #define STATISTIC_SLOT_2D 1
118 #define SDFACTOR 3.25
133 #define MIN_DIMENSION_WIDTH 0.000000001
138 #define DEFAULT_ND_SEL 0.0001
139 #define DEFAULT_ND_JOINSEL 0.001
144 #define FALLBACK_ND_SEL 0.2
145 #define FALLBACK_ND_JOINSEL 0.3
241 if ( modestr[0] ==
'N' )
254 int ia = *((
const int*)a);
255 int ib = *((
const int*)b);
272 qsort(vals, nvals,
sizeof(
int),
cmp_int);
273 return vals[4*nvals/5] - vals[nvals/5];
285 for ( i = 0; i < nvals; i++ )
291 #if POSTGIS_DEBUG_LEVEL >= 3
297 total_int(
const int *vals,
int nvals)
302 for ( i = 0; i < nvals; i++ )
312 avg(
const int *vals,
int nvals)
314 int t = total_int(vals, nvals);
315 return (
double)t / (double)nvals;
322 stddev(
const int *vals,
int nvals)
326 double mean = avg(vals, nvals);
329 for ( i = 0; i < nvals; i++ )
331 double v = (double)(vals[i]);
332 sigma2 += (mean - v) * (mean - v);
334 return sqrt(sigma2 / nvals);
346 int accum = 1, vdx = 0;
351 for ( d = 0; d < (int)(stats->
ndims); d++ )
353 int size = (int)(stats->
size[d]);
354 if ( indexes[d] < 0 || indexes[d] >= size )
356 POSTGIS_DEBUGF(3,
" bad index at (%d, %d)", indexes[0], indexes[1]);
359 vdx += indexes[d] * accum;
376 for ( i = 0; i < ndims; i++ )
382 for ( i = 0; i < ndims; i++ )
402 char *json_extent, *str;
405 int ndims = (int)roundf(nd_stats->
ndims);
412 for ( d = 0; d < ndims; d++ )
472 for ( d = 0; d <
ND_DIMS; d++ )
474 target->
min[d] = Min(target->
min[d], source->
min[d]);
475 target->
max[d] = Max(target->
max[d], source->
max[d]);
484 memset(a, 0,
sizeof(
ND_BOX));
497 for ( d = 0; d <
ND_DIMS; d++ )
500 a->
max[d] = -1 * FLT_MAX;
547 for ( d = 0; d < ndims; d++ )
549 if ( (a->
min[d] > b->
max[d]) || (a->
max[d] < b->
min[d]) )
562 for ( d = 0; d < ndims; d++ )
564 if ( ! ((a->
min[d] < b->
min[d]) && (a->
max[d] > b->
max[d])) )
579 for ( d = 0; d <
ND_DIMS; d++ )
581 size = nd_box->
max[d] - nd_box->
min[d];
582 if ( size <= 0 )
continue;
583 nd_box->
min[d] -= size * expansion_factor / 2;
584 nd_box->
max[d] += size * expansion_factor / 2;
601 memset(nd_ibox, 0,
sizeof(
ND_IBOX));
604 for ( d = 0; d < nd_stats->
ndims; d++ )
608 double width = smax - smin;
609 int size = roundf(nd_stats->
size[d]);
612 nd_ibox->
min[d] = floor(size * (nd_box->
min[d] - smin) / width);
613 nd_ibox->
max[d] = floor(size * (nd_box->
max[d] - smin) / width);
615 POSTGIS_DEBUGF(5,
" stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
616 POSTGIS_DEBUGF(5,
" overlap: dim %d: (%d, %d)", d, nd_ibox->
min[d], nd_ibox->
max[d]);
619 nd_ibox->
min[d] = Max(nd_ibox->
min[d], 0);
620 nd_ibox->
max[d] = Min(nd_ibox->
max[d], size-1);
637 for ( d = 0 ; d < ndims; d++ )
639 if ( b1->
max[d] <= b2->
min[d] || b1->
min[d] >= b2->
max[d] )
642 if ( b1->
min[d] > b2->
min[d] || b1->
max[d] < b2->
max[d] )
649 for ( d = 0; d < ndims; d++ )
651 double width1 = b1->
max[d] - b1->
min[d];
652 double width2 = b2->
max[d] - b2->
min[d];
653 double imin, imax, iwidth;
658 imin = Max(b1->
min[d], b2->
min[d]);
659 imax = Min(b1->
max[d], b2->
max[d]);
660 iwidth = imax - imin;
661 iwidth = Max(0.0, iwidth);
692 static int num_bins = 50;
694 int counts[num_bins];
697 #if POSTGIS_DEBUG_LEVEL >= 3
698 double average, sdev, sdev_ratio;
704 for ( d = 0; d < ndims; d++ )
707 memset(counts, 0,
sizeof(
int)*num_bins);
709 smin = extent->
min[d];
710 smax = extent->
max[d];
711 swidth = smax - smin;
721 for ( i = 0; i < num_boxes; i++ )
723 double minoffset, maxoffset;
727 if ( ! ndb )
continue;
730 minoffset = ndb->
min[d] - smin;
731 maxoffset = ndb->
max[d] - smin;
734 if ( minoffset < 0 || minoffset > swidth ||
735 maxoffset < 0 || maxoffset > swidth )
741 bmin = num_bins * (minoffset) / swidth;
742 bmax = num_bins * (maxoffset) / swidth;
744 POSTGIS_DEBUGF(4,
" dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
747 for ( k = bmin; k <= bmax; k++ )
757 #if POSTGIS_DEBUG_LEVEL >= 3
758 average = avg(counts, num_bins);
759 sdev = stddev(counts, num_bins);
760 sdev_ratio = sdev/average;
762 POSTGIS_DEBUGF(3,
" dimension %d: range = %d", d, range);
763 POSTGIS_DEBUGF(3,
" dimension %d: average = %.6g", d, average);
764 POSTGIS_DEBUGF(3,
" dimension %d: stddev = %.6g", d, sdev);
765 POSTGIS_DEBUGF(3,
" dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
768 distribution[d] = range;
786 if ( counter[d] < ibox->
max[d] )
791 counter[d] = ibox->
min[d];
810 HeapTuple stats_tuple;
817 stats_tuple = SearchSysCache2(STATRELATT, table_oid, att_num);
820 POSTGIS_DEBUGF(2,
"stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) :
"NULL");
829 rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid, NULL, NULL, NULL, &floatptr, &nvalues);
832 ReleaseSysCache(stats_tuple);
833 POSTGIS_DEBUGF(2,
"histogram for \"%s\" does not exist?", get_rel_name(table_oid));
838 nd_stats = palloc(
sizeof(
float) * nvalues);
839 memcpy(nd_stats, floatptr,
sizeof(
float) * nvalues);
842 free_attstatsslot(0, NULL, 0, floatptr, nvalues);
843 ReleaseSysCache(stats_tuple);
864 att_num = get_attnum(table_oid, att_name);
866 elog(ERROR,
"attribute \"%s\" does not exist", att_name);
872 elog(ERROR,
"attribute name is null");
895 int ncells1, ncells2;
896 int ndims1, ndims2, ndims;
898 double ntuples_not_null1, ntuples_not_null2;
917 if ( ! ( s1 && s2 ) )
919 elog(NOTICE,
" estimate_join_selectivity called with null inputs");
928 if ( ncells1 > ncells2 )
946 ntuples_max = ntuples_not_null1 * ntuples_not_null2;
949 ndims1 = (int)roundf(s1->
ndims);
950 ndims2 = (int)roundf(s2->
ndims);
951 ndims = Max(ndims1, ndims2);
960 POSTGIS_DEBUG(3,
"relation stats do not intersect, returning 0");
961 PG_RETURN_FLOAT8(0.0);
970 POSTGIS_DEBUG(3,
"could not calculate overlap of relations");
975 for ( d = 0; d < ndims1; d++ )
977 at1[d] = ibox1.
min[d];
980 size1[d] = (int)roundf(s1->
size[d]);
981 cellsize1[d] = width1[d] / size1[d];
985 for ( d = 0; d < ndims2; d++ )
989 size2[d] = (int)roundf(s2->
size[d]);
990 cellsize2[d] = width2[d] / size2[d];
1000 for ( d = 0; d < ndims1; d++ )
1002 nd_cell1.
min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
1003 nd_cell1.
max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
1010 for ( d = 0; d < ndims2; d++ )
1012 at2[d] = ibox2.
min[d];
1015 POSTGIS_DEBUGF(3,
"at1 %d,%d %s", at1[0], at1[1],
nd_box_to_json(&nd_cell1, ndims1));
1029 for ( d = 0; d < ndims2; d++ )
1031 nd_cell2.
min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
1032 nd_cell2.
max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
1035 POSTGIS_DEBUGF(3,
" at2 %d,%d %s", at2[0], at2[1],
nd_box_to_json(&nd_cell2, ndims2));
1038 ratio2 =
nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
1042 POSTGIS_DEBUGF(3,
" val1 %.6g val2 %.6g ratio %.6g", val1, val2, ratio2);
1043 val += val1 * (val2 * ratio2);
1050 POSTGIS_DEBUGF(3,
"val of histogram = %g", val);
1061 POSTGIS_DEBUGF(3,
"val scaled to full table size = %g", val);
1078 selectivity = val / ntuples_max;
1081 if ( selectivity > 1.0 )
1094 PG_RETURN_DATUM(DirectFunctionCall5(
1096 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1097 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1109 PG_RETURN_DATUM(DirectFunctionCall5(
1111 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
1112 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
1128 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1130 List *args = (List *) PG_GETARG_POINTER(2);
1131 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
1132 int mode = PG_GETARG_INT32(4);
1142 if (jointype != JOIN_INNER)
1144 elog(NOTICE,
"gserialized_gist_joinsel: jointype %d not supported", jointype);
1149 arg1 = (Node*) linitial(args);
1150 arg2 = (Node*) lsecond(args);
1156 if (!IsA(arg1, Var) || !IsA(arg2, Var))
1158 elog(DEBUG1,
"gserialized_gist_joinsel called with arguments that are not column references");
1163 relid1 = getrelid(var1->varno, root->parse->rtable);
1164 relid2 = getrelid(var2->varno, root->parse->rtable);
1166 POSTGIS_DEBUGF(3,
"using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
1167 get_rel_name(relid1) ? get_rel_name(relid1) :
"NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL", relid2);
1176 POSTGIS_DEBUGF(3,
"unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) :
"NULL" , relid1);
1179 else if ( ! stats2 )
1181 POSTGIS_DEBUGF(3,
"unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) :
"NULL", relid2);
1186 POSTGIS_DEBUGF(2,
"got selectivity %g", selectivity);
1190 PG_RETURN_FLOAT8(selectivity);
1216 int sample_rows,
double total_rows,
int mode)
1218 MemoryContext old_context;
1220 int notnull_cnt = 0;
1222 int histogram_features = 0;
1225 size_t nd_stats_size;
1227 double total_width = 0;
1228 double total_sample_volume = 0;
1229 double total_cell_count = 0;
1235 const ND_BOX **sample_boxes;
1240 int histo_cells_target;
1242 int histo_cells_new = 1;
1245 int histo_ndims = 0;
1246 double sample_distribution[
ND_DIMS];
1247 double total_distribution;
1262 POSTGIS_DEBUG(2,
"compute_gserialized_stats called");
1263 POSTGIS_DEBUGF(3,
" # sample_rows: %d", sample_rows);
1264 POSTGIS_DEBUGF(3,
" estimate of total_rows: %.6g", total_rows);
1270 sample_boxes = palloc(
sizeof(
ND_BOX*) * sample_rows);
1282 for ( i = 0; i < sample_rows; i++ )
1290 datum = fetchfunc(stats, i, &is_null);
1295 POSTGIS_DEBUGF(4,
" skipped null geometry %d", i);
1305 POSTGIS_DEBUGF(3,
" skipped empty geometry %d", i);
1316 POSTGIS_DEBUGF(3,
" skipped infinite/nan geometry %d", i);
1328 nd_box = palloc(
sizeof(
ND_BOX));
1332 sample_boxes[notnull_cnt] = nd_box;
1335 if ( ! notnull_cnt )
1342 total_width += VARSIZE(geom);
1345 for ( d = 0; d < ndims; d++ )
1347 sum.
min[d] += nd_box->
min[d];
1348 sum.
max[d] += nd_box->
max[d];
1355 vacuum_delay_point();
1366 histo_cells_target = (int)pow((
double)(stats->attr->attstattarget), (
double)ndims);
1367 histo_cells_target = Min(histo_cells_target, ndims * 10000);
1368 histo_cells_target = Min(histo_cells_target, (
int)(total_rows/5));
1369 POSTGIS_DEBUGF(3,
" stats->attr->attstattarget: %d", stats->attr->attstattarget);
1370 POSTGIS_DEBUGF(3,
" target # of histogram cells: %d", histo_cells_target);
1373 if ( ! notnull_cnt )
1375 elog(NOTICE,
"no non-null/empty features, unable to compute statistics");
1376 stats->stats_valid =
false;
1380 POSTGIS_DEBUGF(3,
" sample_extent: %s",
nd_box_to_json(&sample_extent, ndims));
1386 for ( d = 0; d < ndims; d++ )
1389 avg.
min[d] = sum.
min[d] / notnull_cnt;
1390 avg.
max[d] = sum.
max[d] / notnull_cnt;
1393 for ( i = 0; i < notnull_cnt; i++ )
1395 const ND_BOX *ndb = sample_boxes[i];
1396 stddev.
min[d] += (ndb->
min[d] - avg.
min[d]) * (ndb->
min[d] - avg.
min[d]);
1397 stddev.
max[d] += (ndb->
max[d] - avg.
max[d]) * (ndb->
max[d] - avg.
max[d]);
1399 stddev.
min[d] = sqrt(stddev.
min[d] / notnull_cnt);
1400 stddev.
max[d] = sqrt(stddev.
max[d] / notnull_cnt);
1413 for ( i = 0; i < notnull_cnt; i++ )
1415 const ND_BOX *ndb = sample_boxes[i];
1419 POSTGIS_DEBUGF(4,
" feature %d is a hard deviant, skipped", i);
1420 sample_boxes[i] = NULL;
1431 histo_extent = histo_extent_new;
1446 sample_distribution);
1462 for ( d = 0; d < ndims; d++ )
1464 if ( sample_distribution[d] > 0 )
1468 if ( histo_ndims == 0 )
1472 POSTGIS_DEBUG(3,
" special case: no axes have variability");
1473 histo_cells_new = 1;
1474 for ( d = 0; d < ndims; d++ )
1476 histo_size[d] = 1 + (int)pow((
double)histo_cells_target, 1/(double)ndims);
1477 POSTGIS_DEBUGF(3,
" histo_size[d]: %d", histo_size[d]);
1478 histo_cells_new *= histo_size[d];
1480 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1489 POSTGIS_DEBUG(3,
" allocating histogram axes based on axis variability");
1490 total_distribution =
total_double(sample_distribution, ndims);
1491 POSTGIS_DEBUGF(3,
" total_distribution: %.8g", total_distribution);
1492 histo_cells_new = 1;
1493 for ( d = 0; d < ndims; d++ )
1495 if ( sample_distribution[d] == 0 )
1502 float edge_ratio = (float)sample_distribution[d] / (
float)total_distribution;
1508 histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(
double)histo_ndims);
1510 if ( ! histo_size[d] )
1513 histo_cells_new *= histo_size[d];
1515 POSTGIS_DEBUGF(3,
" histo_cells_new: %d", histo_cells_new);
1519 histo_cells = histo_cells_new;
1520 POSTGIS_DEBUGF(3,
" histo_cells: %d", histo_cells);
1525 old_context = MemoryContextSwitchTo(stats->anl_context);
1526 nd_stats_size =
sizeof(
ND_STATS) + ((histo_cells - 1) *
sizeof(float4));
1527 nd_stats = palloc(nd_stats_size);
1528 memset(nd_stats, 0, nd_stats_size);
1529 MemoryContextSwitchTo(old_context);
1532 nd_stats->
ndims = ndims;
1533 nd_stats->
extent = histo_extent;
1538 for ( d = 0; d < ndims; d++ )
1539 nd_stats->
size[d] = histo_size[d];
1554 for ( i = 0; i < notnull_cnt; i++ )
1560 double num_cells = 0;
1561 double tmp_volume = 1.0;
1566 nd_box = sample_boxes[i];
1567 if ( ! nd_box )
continue;
1570 vacuum_delay_point();
1574 memset(at, 0,
sizeof(
int)*
ND_DIMS);
1576 POSTGIS_DEBUGF(3,
" feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i,
1577 nd_ibox.
min[0], nd_ibox.
min[1], nd_ibox.
min[2], nd_ibox.
min[3],
1578 nd_ibox.
max[0], nd_ibox.
max[1], nd_ibox.
max[2], nd_ibox.
max[3]);
1580 for ( d = 0; d < nd_stats->
ndims; d++ )
1583 at[d] = nd_ibox.
min[d];
1586 cellsize[d] = (max[d] - min[d])/(nd_stats->
size[d]);
1589 tmp_volume *= (nd_box->
max[d] - nd_box->
min[d]);
1593 total_sample_volume += tmp_volume;
1604 for ( d = 0; d < nd_stats->
ndims; d++ )
1606 nd_cell.
min[d] = min[d] + (at[d]+0) * cellsize[d];
1607 nd_cell.
max[d] = min[d] + (at[d]+1) * cellsize[d];
1618 POSTGIS_DEBUGF(3,
" ratio (%.8g) num_cells (%.8g)", ratio, num_cells);
1619 POSTGIS_DEBUGF(3,
" at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
1624 total_cell_count += num_cells;
1626 histogram_features++;
1629 POSTGIS_DEBUGF(3,
" histogram_features: %d", histogram_features);
1630 POSTGIS_DEBUGF(3,
" sample_rows: %d", sample_rows);
1631 POSTGIS_DEBUGF(3,
" table_rows: %.6g", total_rows);
1634 if ( ! histogram_features )
1636 POSTGIS_DEBUG(3,
" no stats have been gathered");
1637 elog(NOTICE,
" no features lie in the stats histogram, invalid stats");
1638 stats->stats_valid =
false;
1659 stats->stakind[stats_slot] = stats_kind;
1660 stats->staop[stats_slot] = InvalidOid;
1661 stats->stanumbers[stats_slot] = (float4*)nd_stats;
1662 stats->numnumbers[stats_slot] = nd_stats_size/
sizeof(float4);
1663 stats->stanullfrac = (float4)null_cnt/sample_rows;
1664 stats->stawidth = total_width/notnull_cnt;
1665 stats->stadistinct = -1.0;
1666 stats->stats_valid =
true;
1668 POSTGIS_DEBUGF(3,
" out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
1669 POSTGIS_DEBUGF(3,
" out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1670 POSTGIS_DEBUGF(3,
" out: slot 0: numnumbers %d", stats->numnumbers[0]);
1671 POSTGIS_DEBUGF(3,
" out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
1672 POSTGIS_DEBUGF(3,
" out: average width: %d bytes", stats->stawidth);
1673 POSTGIS_DEBUG (3,
" out: distinct values: all (no check done)");
1702 int sample_rows,
double total_rows)
1741 VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1742 Form_pg_attribute attr = stats->attr;
1744 POSTGIS_DEBUG(2,
"gserialized_analyze_nd called");
1748 if (attr->attstattarget < 0)
1749 attr->attstattarget = default_statistics_target;
1751 POSTGIS_DEBUGF(3,
" attribute stat target: %d", attr->attstattarget);
1754 stats->minrows = 300 * stats->attr->attstattarget;
1757 POSTGIS_DEBUGF(3,
" minrows: %d", stats->minrows);
1760 PG_RETURN_BOOL(
true);
1786 double total_count = 0.0;
1793 elog(NOTICE,
" estimate_selectivity called with null input");
1805 POSTGIS_DEBUGF(3,
" mode: %d", mode);
1808 POSTGIS_DEBUG(3,
" in 2d mode, stripping the computation down to 2d");
1824 POSTGIS_DEBUG(3,
" search box does not overlap histogram, returning 0");
1831 POSTGIS_DEBUG(3,
" search box contains histogram, returning 1");
1838 POSTGIS_DEBUG(3,
" search box overlap with stats histogram failed");
1843 for ( d = 0; d < nd_stats->
ndims; d++ )
1848 cell_size[d] = (max[d] - min[d]) / nd_stats->
size[d];
1849 POSTGIS_DEBUGF(3,
" cell_size[%d] : %.9g", d, cell_size[d]);
1852 at[d] = nd_ibox.
min[d];
1858 float cell_count, ratio;
1862 for ( d = 0; d < nd_stats->
ndims; d++ )
1864 nd_cell.
min[d] = min[d] + (at[d]+0) * cell_size[d];
1865 nd_cell.
max[d] = min[d] + (at[d]+1) * cell_size[d];
1872 total_count += cell_count * ratio;
1873 POSTGIS_DEBUGF(4,
" cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
1880 POSTGIS_DEBUGF(3,
" nd_stats->histogram_features = %f", nd_stats->
histogram_features);
1881 POSTGIS_DEBUGF(3,
" nd_stats->histogram_cells = %f", nd_stats->
histogram_cells);
1882 POSTGIS_DEBUGF(3,
" sum(overlapped histogram cells) = %f", total_count);
1883 POSTGIS_DEBUGF(3,
" selectivity = %f", selectivity);
1886 if (selectivity > 1.0) selectivity = 1.0;
1887 else if (selectivity < 0.0) selectivity = 0.0;
1901 Oid table_oid = PG_GETARG_OID(0);
1902 text *att_text = PG_GETARG_TEXT_P(1);
1909 if ( ! PG_ARGISNULL(2) )
1915 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid),
text2cstring(att_text));
1919 json = cstring2text(str);
1922 PG_RETURN_TEXT_P(json);
1933 Oid table_oid = PG_GETARG_OID(0);
1934 text *att_text = PG_GETARG_TEXT_P(1);
1935 Datum geom_datum = PG_GETARG_DATUM(2);
1937 float8 selectivity = 0;
1942 if ( ! PG_ARGISNULL(3) )
1949 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid),
text2cstring(att_text));
1952 if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
1953 elog(ERROR,
"unable to calculate bounding box from geometry");
1961 PG_RETURN_FLOAT8(selectivity);
1972 Oid table_oid1 = PG_GETARG_OID(0);
1973 text *att_text1 = PG_GETARG_TEXT_P(1);
1974 Oid table_oid2 = PG_GETARG_OID(2);
1975 text *att_text2 = PG_GETARG_TEXT_P(3);
1977 float8 selectivity = 0;
1986 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid1),
text2cstring(att_text1));
1989 elog(ERROR,
"stats for \"%s.%s\" do not exist", get_rel_name(table_oid2),
text2cstring(att_text2));
1992 if ( ! PG_ARGISNULL(4) )
1994 text *modetxt = PG_GETARG_TEXT_P(4);
1996 if ( modestr[0] ==
'N' )
2005 PG_RETURN_FLOAT8(selectivity);
2015 PG_RETURN_DATUM(DirectFunctionCall5(
2017 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2018 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2030 PG_RETURN_DATUM(DirectFunctionCall5(
2032 PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
2033 PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
2054 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2056 List *args = (List *) PG_GETARG_POINTER(2);
2058 int mode = PG_GETARG_INT32(4);
2066 float8 selectivity = 0;
2068 POSTGIS_DEBUG(2,
"gserialized_gist_sel called");
2080 if (list_length(args) != 2)
2082 POSTGIS_DEBUG(3,
"gserialized_gist_sel: not a binary opclause");
2087 other = (Node *) linitial(args);
2088 if ( ! IsA(other, Const) )
2090 self = (Var *)other;
2091 other = (Node *) lsecond(args);
2095 self = (Var *) lsecond(args);
2098 if ( ! IsA(other, Const) )
2100 POSTGIS_DEBUG(3,
" no constant arguments - returning a default selectivity");
2117 if ( ! IsA(
self, Var) )
2119 POSTGIS_DEBUG(3,
" no bare variable argument ? - returning a moderate selectivity");
2124 if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
2126 POSTGIS_DEBUG(3,
"search box is EMPTY");
2127 PG_RETURN_FLOAT8(0.0);
2129 POSTGIS_DEBUGF(4,
" requested search box is: %s",
gbox_to_string(&search_box));
2132 relid = getrelid(self->varno, root->parse->rtable);
2136 POSTGIS_DEBUG(3,
" unable to load stats from syscache, not analyzed yet?");
2143 POSTGIS_DEBUGF(3,
" returning computed value: %f", selectivity);
2146 PG_RETURN_FLOAT8(selectivity);
2162 char *nsp_tbl = NULL;
2167 if ( PG_NARGS() == 3 )
2171 col = PG_GETARG_TEXT_P(2);
2172 nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6);
2173 sprintf(nsp_tbl,
"\"%s\".\"%s\"", nsp, tbl);
2174 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2177 else if ( PG_NARGS() == 2 )
2180 col = PG_GETARG_TEXT_P(1);
2181 nsp_tbl = palloc(strlen(tbl) + 3);
2182 sprintf(nsp_tbl,
"\"%s\"", tbl);
2183 tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
2188 elog(ERROR,
"estimated_extent() called with wrong number of arguments");
2197 elog(ERROR,
"stats for \"%s.%s\" do not exist", tbl,
text2cstring(col));
2200 gbox = palloc(
sizeof(
GBOX));
2210 PG_RETURN_POINTER(gbox);
2222 if ( PG_NARGS() == 3 )
2228 PG_GETARG_DATUM(2)));
2230 else if ( PG_NARGS() == 2 )
2235 PG_GETARG_DATUM(1)));
2238 elog(ERROR,
"geometry_estimated_extent() called with wrong number of arguments");
int gserialized_get_gbox_p(const GSERIALIZED *g, GBOX *box)
Read the bounding box off a serialization and calculate one if it is not already there.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static int text_p_get_mode(const text *txt)
Utility function to see if the first letter of the mode argument is 'N'.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd)
For (geometry &&& geometry) and (geography && geography) we call into the N-D mode.
#define DEFAULT_ND_JOINSEL
static void compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generat...
stringbuffer_t * stringbuffer_create(void)
Allocate a new stringbuffer_t.
int gbox_is_valid(const GBOX *gbox)
Return false if any of the dimensions is NaN or infinite.
#define MIN_DIMENSION_WIDTH
Minimum width of a dimension that we'll bother trying to compute statistics on.
char * gbox_to_string(const GBOX *gbox)
Allocate a string representation of the GBOX, based on dimensionality of flags.
#define FLAGS_GET_GEODETIC(flags)
char * stringbuffer_getstringcopy(stringbuffer_t *s)
Returns a newly allocated string large enough to contain the current state of the string...
#define ND_DIMS
The maximum number of dimensions our code can handle.
static void compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
The gserialized_analyze_nd sets this function as a callback on the stats object when called by the AN...
Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return TRUE if ND_BOX a contains b, false otherwise.
Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_2D
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
struct ND_BOX_T ND_BOX
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
#define FALLBACK_ND_JOINSEL
Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
#define FLAGS_SET_GEODETIC(flags, value)
Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
static float8 estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_ST...
int stringbuffer_aprintf(stringbuffer_t *s, const char *fmt,...)
Appends a formatted string to the current string buffer, using the format and argument list provided...
static int nd_box_init(ND_BOX *a)
Zero out an ND_BOX.
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
#define FLAGS_SET_Z(flags, value)
Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
static int nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension...
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return TRUE if ND_BOX a overlaps b, false otherwise.
static double total_double(const double *vals, int nvals)
Given double array, return sum of values.
char * text2cstring(const text *textptr)
Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
static int cmp_int(const void *a, const void *b)
Integer comparison function for qsort.
static int nd_box_init_bounds(ND_BOX *a)
Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins t...
N-dimensional box index type.
#define FLAGS_GET_Z(flags)
Macros for manipulating the 'flags' byte.
static int range_quintile(int *vals, int nvals)
The difference between the fourth and first quintile values, the "inter-quintile range".
Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
static float8 estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator...
static ND_STATS * pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode)
Pull the stats object from the PgSQL system catalogs.
float4 histogram_features
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
void stringbuffer_append(stringbuffer_t *s, const char *a)
Append the specified string to the stringbuffer_t.
#define STATISTIC_KIND_2D
struct ND_IBOX_T ND_IBOX
N-dimensional box index type.
void stringbuffer_destroy(stringbuffer_t *s)
Free the stringbuffer_t and all memory managed within it.
struct ND_STATS_T ND_STATS
N-dimensional statistics structure.
static int nd_box_merge(const ND_BOX *source, ND_BOX *target)
Create a printable view of the ND_STATS histogram.
static int nd_box_expand(ND_BOX *nd_box, double expansion_factor)
Expand an ND_BOX ever so slightly.
static ND_STATS * pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode)
Pull the stats object from the PgSQL system catalogs.
#define FLAGS_GET_M(flags)
static char * nd_stats_to_json(const ND_STATS *nd_stats)
Convert an ND_STATS to a JSON representation for external use.
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
#define STATISTIC_SLOT_ND
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
N-dimensional statistics structure.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
This library is the generic geometry handling section of PostGIS.
#define DEFAULT_ND_SEL
Default geometry selectivity factor.
#define STATISTIC_KIND_ND
Assign a number to the n-dimensional statistics kind.
#define FLAGS_SET_M(flags, value)