PostGIS  2.5.1dev-r@@SVN_REVISION@@

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1929 of file gserialized_estimate.c.

References _postgis_gserialized_stats(), ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, PG_FUNCTION_INFO_V1(), ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_gist_sel().

1930 {
1931  int d; /* counter */
1932  float8 selectivity;
1933  ND_BOX nd_box;
1934  ND_IBOX nd_ibox;
1935  int at[ND_DIMS];
1936  double cell_size[ND_DIMS];
1937  double min[ND_DIMS];
1938  double max[ND_DIMS];
1939  double total_count = 0.0;
1940  int ndims_max;
1941 
1942  /* Calculate the overlap of the box on the histogram */
1943  if ( ! nd_stats )
1944  {
1945  elog(NOTICE, " estimate_selectivity called with null input");
1946  return FALLBACK_ND_SEL;
1947  }
1948 
1949  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1950 
1951  /* Initialize nd_box. */
1952  nd_box_from_gbox(box, &nd_box);
1953 
1954  /*
1955  * To return 2D stats on an ND sample, we need to make the
1956  * 2D box cover the full range of the other dimensions in the
1957  * histogram.
1958  */
1959  POSTGIS_DEBUGF(3, " mode: %d", mode);
1960  if ( mode == 2 )
1961  {
1962  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1963  ndims_max = 2;
1964  }
1965 
1966  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1967  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1968 
1969  /*
1970  * Search box completely misses histogram extent?
1971  * We have to intersect in all N dimensions or else we have
1972  * zero interaction under the &&& operator. It's important
1973  * to short circuit in this case, as some of the tests below
1974  * will return junk results when run on non-intersecting inputs.
1975  */
1976  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1977  {
1978  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1979  return 0.0;
1980  }
1981 
1982  /* Search box completely contains histogram extent! */
1983  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1984  {
1985  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1986  return 1.0;
1987  }
1988 
1989  /* Calculate the overlap of the box on the histogram */
1990  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1991  {
1992  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1993  return FALLBACK_ND_SEL;
1994  }
1995 
1996  /* Work out some measurements of the histogram */
1997  for ( d = 0; d < nd_stats->ndims; d++ )
1998  {
1999  /* Cell size in each dim */
2000  min[d] = nd_stats->extent.min[d];
2001  max[d] = nd_stats->extent.max[d];
2002  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2003  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2004 
2005  /* Initialize the counter */
2006  at[d] = nd_ibox.min[d];
2007  }
2008 
2009  /* Move through all the overlap values and sum them */
2010  do
2011  {
2012  float cell_count, ratio;
2013  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2014 
2015  /* We have to pro-rate partially overlapped cells. */
2016  for ( d = 0; d < nd_stats->ndims; d++ )
2017  {
2018  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2019  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2020  }
2021 
2022  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2023  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2024 
2025  /* Add the pro-rated count for this cell to the overall total */
2026  total_count += cell_count * ratio;
2027  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2028  }
2029  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2030 
2031  /* Scale by the number of features in our histogram to get the proportion */
2032  selectivity = total_count / nd_stats->histogram_features;
2033 
2034  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2035  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2036  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2037  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2038 
2039  /* Prevent rounding overflows */
2040  if (selectivity > 1.0) selectivity = 1.0;
2041  else if (selectivity < 0.0) selectivity = 0.0;
2042 
2043  return selectivity;
2044 }
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
#define ND_DIMS
The maximum number of dimensions our code can handle.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
float4 size[ND_DIMS]
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
int min[ND_DIMS]
N-dimensional box index type.
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
float4 min[ND_DIMS]
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Here is the call graph for this function:
Here is the caller graph for this function: