PostGIS  2.5.7dev-r@@SVN_REVISION@@

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1937 of file gserialized_estimate.c.

1938 {
1939  int d; /* counter */
1940  float8 selectivity;
1941  ND_BOX nd_box;
1942  ND_IBOX nd_ibox;
1943  int at[ND_DIMS];
1944  double cell_size[ND_DIMS];
1945  double min[ND_DIMS];
1946  double max[ND_DIMS];
1947  double total_count = 0.0;
1948  int ndims_max;
1949 
1950  /* Calculate the overlap of the box on the histogram */
1951  if ( ! nd_stats )
1952  {
1953  elog(NOTICE, " estimate_selectivity called with null input");
1954  return FALLBACK_ND_SEL;
1955  }
1956 
1957  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1958 
1959  /* Initialize nd_box. */
1960  nd_box_from_gbox(box, &nd_box);
1961 
1962  /*
1963  * To return 2D stats on an ND sample, we need to make the
1964  * 2D box cover the full range of the other dimensions in the
1965  * histogram.
1966  */
1967  POSTGIS_DEBUGF(3, " mode: %d", mode);
1968  if ( mode == 2 )
1969  {
1970  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1971  ndims_max = 2;
1972  }
1973 
1974  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1975  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1976 
1977  /*
1978  * Search box completely misses histogram extent?
1979  * We have to intersect in all N dimensions or else we have
1980  * zero interaction under the &&& operator. It's important
1981  * to short circuit in this case, as some of the tests below
1982  * will return junk results when run on non-intersecting inputs.
1983  */
1984  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1985  {
1986  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1987  return 0.0;
1988  }
1989 
1990  /* Search box completely contains histogram extent! */
1991  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1992  {
1993  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1994  return 1.0;
1995  }
1996 
1997  /* Calculate the overlap of the box on the histogram */
1998  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1999  {
2000  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2001  return FALLBACK_ND_SEL;
2002  }
2003 
2004  /* Work out some measurements of the histogram */
2005  for ( d = 0; d < nd_stats->ndims; d++ )
2006  {
2007  /* Cell size in each dim */
2008  min[d] = nd_stats->extent.min[d];
2009  max[d] = nd_stats->extent.max[d];
2010  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2011  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2012 
2013  /* Initialize the counter */
2014  at[d] = nd_ibox.min[d];
2015  }
2016 
2017  /* Move through all the overlap values and sum them */
2018  do
2019  {
2020  float cell_count, ratio;
2021  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2022 
2023  /* We have to pro-rate partially overlapped cells. */
2024  for ( d = 0; d < nd_stats->ndims; d++ )
2025  {
2026  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2027  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2028  }
2029 
2030  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2031  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2032 
2033  /* Add the pro-rated count for this cell to the overall total */
2034  total_count += cell_count * ratio;
2035  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2036  }
2037  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2038 
2039  /* Scale by the number of features in our histogram to get the proportion */
2040  selectivity = total_count / nd_stats->histogram_features;
2041 
2042  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2043  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2044  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2045  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2046 
2047  /* Prevent rounding overflows */
2048  if (selectivity > 1.0) selectivity = 1.0;
2049  else if (selectivity < 0.0) selectivity = 0.0;
2050 
2051  return selectivity;
2052 }
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]

References ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_gist_sel().

Here is the call graph for this function:
Here is the caller graph for this function: