PostGIS  3.0.6dev-r@@SVN_REVISION@@

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1949 of file gserialized_estimate.c.

1950 {
1951  int d; /* counter */
1952  float8 selectivity;
1953  ND_BOX nd_box;
1954  ND_IBOX nd_ibox;
1955  int at[ND_DIMS];
1956  double cell_size[ND_DIMS];
1957  double min[ND_DIMS];
1958  double max[ND_DIMS];
1959  double total_count = 0.0;
1960  int ndims_max;
1961 
1962  /* Calculate the overlap of the box on the histogram */
1963  if ( ! nd_stats )
1964  {
1965  elog(NOTICE, " estimate_selectivity called with null input");
1966  return FALLBACK_ND_SEL;
1967  }
1968 
1969  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1970 
1971  /* Initialize nd_box. */
1972  nd_box_from_gbox(box, &nd_box);
1973 
1974  /*
1975  * To return 2D stats on an ND sample, we need to make the
1976  * 2D box cover the full range of the other dimensions in the
1977  * histogram.
1978  */
1979  POSTGIS_DEBUGF(3, " mode: %d", mode);
1980  if ( mode == 2 )
1981  {
1982  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1983  ndims_max = 2;
1984  }
1985 
1986  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1987  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1988 
1989  /*
1990  * Search box completely misses histogram extent?
1991  * We have to intersect in all N dimensions or else we have
1992  * zero interaction under the &&& operator. It's important
1993  * to short circuit in this case, as some of the tests below
1994  * will return junk results when run on non-intersecting inputs.
1995  */
1996  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1997  {
1998  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1999  return 0.0;
2000  }
2001 
2002  /* Search box completely contains histogram extent! */
2003  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
2004  {
2005  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
2006  return 1.0;
2007  }
2008 
2009  /* Calculate the overlap of the box on the histogram */
2010  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2011  {
2012  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2013  return FALLBACK_ND_SEL;
2014  }
2015 
2016  /* Work out some measurements of the histogram */
2017  for ( d = 0; d < nd_stats->ndims; d++ )
2018  {
2019  /* Cell size in each dim */
2020  min[d] = nd_stats->extent.min[d];
2021  max[d] = nd_stats->extent.max[d];
2022  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2023  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2024 
2025  /* Initialize the counter */
2026  at[d] = nd_ibox.min[d];
2027  }
2028 
2029  /* Move through all the overlap values and sum them */
2030  do
2031  {
2032  float cell_count, ratio;
2033  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2034 
2035  /* We have to pro-rate partially overlapped cells. */
2036  for ( d = 0; d < nd_stats->ndims; d++ )
2037  {
2038  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2039  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2040  }
2041 
2042  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2043  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2044 
2045  /* Add the pro-rated count for this cell to the overall total */
2046  total_count += cell_count * ratio;
2047  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2048  }
2049  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2050 
2051  /* Scale by the number of features in our histogram to get the proportion */
2052  selectivity = total_count / nd_stats->histogram_features;
2053 
2054  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2055  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2056  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2057  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2058 
2059  /* Prevent rounding overflows */
2060  if (selectivity > 1.0) selectivity = 1.0;
2061  else if (selectivity < 0.0) selectivity = 0.0;
2062 
2063  return selectivity;
2064 }
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]

References ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_sel_internal().

Here is the call graph for this function:
Here is the caller graph for this function: