The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.
To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.
1950{
1951 int d;
1952 float8 selectivity;
1959 double total_count = 0.0;
1960 int ndims_max;
1961
1962
1963 if ( ! nd_stats )
1964 {
1965 elog(NOTICE, " estimate_selectivity called with null input");
1967 }
1968
1970
1971
1973
1974
1975
1976
1977
1978
1979 POSTGIS_DEBUGF(3, " mode: %d", mode);
1980 if ( mode == 2 )
1981 {
1982 POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1983 ndims_max = 2;
1984 }
1985
1988
1989
1990
1991
1992
1993
1994
1995
1997 {
1998 POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1999 return 0.0;
2000 }
2001
2002
2004 {
2005 POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
2006 return 1.0;
2007 }
2008
2009
2011 {
2012 POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2014 }
2015
2016
2017 for ( d = 0; d < nd_stats->
ndims; d++ )
2018 {
2019
2022 cell_size[d] = (max[d] - min[d]) / nd_stats->
size[d];
2023 POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2024
2025
2026 at[d] = nd_ibox.
min[d];
2027 }
2028
2029
2030 do
2031 {
2032 float cell_count, ratio;
2033 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2034
2035
2036 for ( d = 0; d < nd_stats->
ndims; d++ )
2037 {
2038 nd_cell.
min[d] = min[d] + (at[d]+0) * cell_size[d];
2039 nd_cell.
max[d] = min[d] + (at[d]+1) * cell_size[d];
2040 }
2041
2044
2045
2046 total_count += cell_count * ratio;
2047 POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2048 }
2050
2051
2053
2054 POSTGIS_DEBUGF(3,
" nd_stats->histogram_features = %f", nd_stats->
histogram_features);
2055 POSTGIS_DEBUGF(3,
" nd_stats->histogram_cells = %f", nd_stats->
histogram_cells);
2056 POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2057 POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2058
2059
2060 if (selectivity > 1.0) selectivity = 1.0;
2061 else if (selectivity < 0.0) selectivity = 0.0;
2062
2063 return selectivity;
2064}
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
N-dimensional box index type.
float4 histogram_features