PostGIS 3.6.2dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1786 of file gserialized_estimate.c.

1787{
1788 int d; /* counter */
1789 float8 selectivity;
1790 ND_BOX nd_box;
1791 ND_IBOX nd_ibox;
1792 int at[ND_DIMS];
1793 double cell_size[ND_DIMS];
1794 double min[ND_DIMS];
1795 double max[ND_DIMS];
1796 double total_count = 0.0;
1797 int ndims_max;
1798
1799 /* Calculate the overlap of the box on the histogram */
1800 if ( ! nd_stats )
1801 {
1802 elog(NOTICE, " estimate_selectivity called with null input");
1803 return FALLBACK_ND_SEL;
1804 }
1805
1806 ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1807
1808 /* Initialize nd_box. */
1809 nd_box_from_gbox(box, &nd_box);
1810
1811 /*
1812 * To return 2D stats on an ND sample, we need to make the
1813 * 2D box cover the full range of the other dimensions in the
1814 * histogram.
1815 */
1816 POSTGIS_DEBUGF(3, " mode: %d", mode);
1817 if ( mode == 2 )
1818 {
1819 POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1820 ndims_max = 2;
1821 }
1822
1823 POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1824 POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1825
1826 // elog(DEBUG1, "out histogram:\n%s", nd_stats_to_grid(nd_stats));
1827
1828 /*
1829 * Search box completely misses histogram extent?
1830 * We have to intersect in all N dimensions or else we have
1831 * zero interaction under the &&& operator. It's important
1832 * to short circuit in this case, as some of the tests below
1833 * will return junk results when run on non-intersecting inputs.
1834 */
1835 if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1836 {
1837 POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1838 return 0.0;
1839 }
1840
1841 /* Search box completely contains histogram extent! */
1842 if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1843 {
1844 POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1845 return 1.0;
1846 }
1847
1848 /* Calculate the overlap of the box on the histogram */
1849 if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1850 {
1851 POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1852 return FALLBACK_ND_SEL;
1853 }
1854
1855 /* Work out some measurements of the histogram */
1856 for ( d = 0; d < nd_stats->ndims; d++ )
1857 {
1858 /* Cell size in each dim */
1859 min[d] = nd_stats->extent.min[d];
1860 max[d] = nd_stats->extent.max[d];
1861 cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
1862 POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
1863
1864 /* Initialize the counter */
1865 at[d] = nd_ibox.min[d];
1866 }
1867
1868 /* Move through all the overlap values and sum them */
1869 do
1870 {
1871 float cell_count, ratio;
1872 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1873
1874 /* We have to pro-rate partially overlapped cells. */
1875 for ( d = 0; d < nd_stats->ndims; d++ )
1876 {
1877 nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
1878 nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
1879 }
1880
1881 ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
1882 cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
1883
1884 /* Add the pro-rated count for this cell to the overall total */
1885 total_count += (double)cell_count * ratio;
1886 POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
1887 }
1888 while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1889
1890 /* Scale by the number of features in our histogram to get the proportion */
1891 selectivity = total_count / nd_stats->histogram_features;
1892
1893 POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
1894 POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
1895 POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
1896 POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
1897
1898 /* Prevent rounding overflows */
1899 if (selectivity > 1.0) selectivity = 1.0;
1900 else if (selectivity < 0.0) selectivity = 0.0;
1901
1902 return selectivity;
1903}
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
static int nd_stats_value_index(const ND_STATS *stats, const int *indexes)

References ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_sel_internal().

Here is the call graph for this function:
Here is the caller graph for this function: