PostGIS  2.2.8dev-r@@SVN_REVISION@@

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1802 of file gserialized_estimate.c.

References _postgis_gserialized_stats(), ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, PG_FUNCTION_INFO_V1(), ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_gist_sel().

1803 {
1804  int d; /* counter */
1805  float8 selectivity;
1806  ND_BOX nd_box;
1807  ND_IBOX nd_ibox;
1808  int at[ND_DIMS];
1809  double cell_size[ND_DIMS];
1810  double min[ND_DIMS];
1811  double max[ND_DIMS];
1812  double total_count = 0.0;
1813  int ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1814 // int ndims_min = Min(nd_stats->ndims, gbox_ndims(box));
1815 
1816  /* Calculate the overlap of the box on the histogram */
1817  if ( ! nd_stats )
1818  {
1819  elog(NOTICE, " estimate_selectivity called with null input");
1820  return FALLBACK_ND_SEL;
1821  }
1822 
1823  /* Initialize nd_box. */
1824  nd_box_from_gbox(box, &nd_box);
1825 
1826  /*
1827  * To return 2D stats on an ND sample, we need to make the
1828  * 2D box cover the full range of the other dimensions in the
1829  * histogram.
1830  */
1831  POSTGIS_DEBUGF(3, " mode: %d", mode);
1832  if ( mode == 2 )
1833  {
1834  POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1835  ndims_max = 2;
1836  }
1837 
1838  POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1839  POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1840 
1841  /*
1842  * Search box completely misses histogram extent?
1843  * We have to intersect in all N dimensions or else we have
1844  * zero interaction under the &&& operator. It's important
1845  * to short circuit in this case, as some of the tests below
1846  * will return junk results when run on non-intersecting inputs.
1847  */
1848  if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1849  {
1850  POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1851  return 0.0;
1852  }
1853 
1854  /* Search box completely contains histogram extent! */
1855  if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1856  {
1857  POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1858  return 1.0;
1859  }
1860 
1861  /* Calculate the overlap of the box on the histogram */
1862  if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1863  {
1864  POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1865  return FALLBACK_ND_SEL;
1866  }
1867 
1868  /* Work out some measurements of the histogram */
1869  for ( d = 0; d < nd_stats->ndims; d++ )
1870  {
1871  /* Cell size in each dim */
1872  min[d] = nd_stats->extent.min[d];
1873  max[d] = nd_stats->extent.max[d];
1874  cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
1875  POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
1876 
1877  /* Initialize the counter */
1878  at[d] = nd_ibox.min[d];
1879  }
1880 
1881  /* Move through all the overlap values and sum them */
1882  do
1883  {
1884  float cell_count, ratio;
1885  ND_BOX nd_cell;
1886 
1887  /* We have to pro-rate partially overlapped cells. */
1888  for ( d = 0; d < nd_stats->ndims; d++ )
1889  {
1890  nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
1891  nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
1892  }
1893 
1894  ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
1895  cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
1896 
1897  /* Add the pro-rated count for this cell to the overall total */
1898  total_count += cell_count * ratio;
1899  POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
1900  }
1901  while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1902 
1903  /* Scale by the number of features in our histogram to get the proportion */
1904  selectivity = total_count / nd_stats->histogram_features;
1905 
1906  POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
1907  POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
1908  POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
1909  POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
1910 
1911  /* Prevent rounding overflows */
1912  if (selectivity > 1.0) selectivity = 1.0;
1913  else if (selectivity < 0.0) selectivity = 0.0;
1914 
1915  return selectivity;
1916 }
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one...
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
#define ND_DIMS
The maximum number of dimensions our code can handle.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return TRUE if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_stats_value_index(const ND_STATS *stats, int *indexes)
Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array...
float4 size[ND_DIMS]
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return TRUE if ND_BOX a overlaps b, false otherwise.
int min[ND_DIMS]
N-dimensional box index type.
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
float4 max[ND_DIMS]
float4 min[ND_DIMS]
static double nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
Returns the proportion of b2 that is covered by b1.
N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all ca...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
Here is the call graph for this function:
Here is the caller graph for this function: