PostGIS  3.7.0dev-r@@SVN_REVISION@@

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1940 of file gserialized_estimate.c.

1941 {
1942  int d; /* counter */
1943  float8 selectivity;
1944  ND_BOX nd_box;
1945  ND_IBOX nd_ibox;
1946  int at[ND_DIMS];
1947  double cell_size[ND_DIMS];
1948  double min[ND_DIMS];
1949  double max[ND_DIMS];
1950  double total_count = 0.0;
1951  int ndims_max;
1952 
1953  /* Calculate the overlap of the box on the histogram */
1954  if ( ! nd_stats )
1955  {
1956  elog(NOTICE, " estimate_selectivity called with null input");
1957  return FALLBACK_ND_SEL;
1958  }
1959 
1960  ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1961 
1962  /* Initialize nd_box. */
1963  nd_box_from_gbox(box, &nd_box);
1964 
1965  /*
1966  * To return 2D stats on an ND sample, we need to make the
1967  * 2D box cover the full range of the other dimensions in the
1968  * histogram.
1969  */
1970  POSTGIS_DEBUGF(3, " mode: %d", mode);
1971  if ( mode == 2 )
1972  {
1973  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1974  ndims_max = 2;
1975  }
1976 
1977  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1978  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1979 
1980  // elog(DEBUG1, "out histogram:\n%s", nd_stats_to_grid(nd_stats));
1981 
1982  /*
1983  * Search box completely misses histogram extent?
1984  * We have to intersect in all N dimensions or else we have
1985  * zero interaction under the &&& operator. It's important
1986  * to short circuit in this case, as some of the tests below
1987  * will return junk results when run on non-intersecting inputs.
1988  */
1989  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1990  {
1991  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1992  return 0.0;
1993  }
1994 
1995  /* Search box completely contains histogram extent! */
1996  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1997  {
1998  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1999  return 1.0;
2000  }
2001 
2002  /* Calculate the overlap of the box on the histogram */
2003  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
2004  {
2005  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
2006  return FALLBACK_ND_SEL;
2007  }
2008 
2009  /* Work out some measurements of the histogram */
2010  for ( d = 0; d < nd_stats->ndims; d++ )
2011  {
2012  /* Cell size in each dim */
2013  min[d] = nd_stats->extent.min[d];
2014  max[d] = nd_stats->extent.max[d];
2015  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
2016  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
2017 
2018  /* Initialize the counter */
2019  at[d] = nd_ibox.min[d];
2020  }
2021 
2022  /* Move through all the overlap values and sum them */
2023  do
2024  {
2025  float cell_count, ratio;
2026  ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
2027 
2028  /* We have to pro-rate partially overlapped cells. */
2029  for ( d = 0; d < nd_stats->ndims; d++ )
2030  {
2031  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
2032  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
2033  }
2034 
2035  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
2036  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
2037 
2038  /* Add the pro-rated count for this cell to the overall total */
2039  total_count += (double)cell_count * ratio;
2040  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
2041  }
2042  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
2043 
2044  /* Scale by the number of features in our histogram to get the proportion */
2045  selectivity = total_count / nd_stats->histogram_features;
2046 
2047  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
2048  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
2049  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
2050  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
2051 
2052  /* Prevent rounding overflows */
2053  if (selectivity > 1.0) selectivity = 1.0;
2054  else if (selectivity < 0.0) selectivity = 0.0;
2055 
2056  return selectivity;
2057 }
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
#define ND_DIMS
The maximum number of dimensions our code can handle.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array.
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
float4 min[ND_DIMS]
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
int min[ND_DIMS]
N-dimensional box index type.
float4 size[ND_DIMS]

References ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_sel_internal().

Here is the call graph for this function:
Here is the caller graph for this function: