PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ estimate_selectivity()

static float8 estimate_selectivity ( const GBOX box,
const ND_STATS nd_stats,
int  mode 
)
static

This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure.

The selectivity is a float from 0-1, that estimates the proportion of the rows in the table that will be returned as a result of the search box.

To get our estimate, we need "only" sum up the values * the proportion of each cell in the histogram that falls within the search box, then divide by the number of features that generated the histogram.

Definition at line 1782 of file gserialized_estimate.c.

1783{
1784 int d; /* counter */
1785 float8 selectivity;
1786 ND_BOX nd_box;
1787 ND_IBOX nd_ibox;
1788 int at[ND_DIMS];
1789 double cell_size[ND_DIMS];
1790 double min[ND_DIMS];
1791 double max[ND_DIMS];
1792 double total_count = 0.0;
1793 int ndims_max;
1794
1795 /* Calculate the overlap of the box on the histogram */
1796 if ( ! nd_stats )
1797 {
1798 elog(NOTICE, " estimate_selectivity called with null input");
1799 return FALLBACK_ND_SEL;
1800 }
1801
1802 ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
1803
1804 /* Initialize nd_box. */
1805 nd_box_from_gbox(box, &nd_box);
1806
1807 /*
1808 * To return 2D stats on an ND sample, we need to make the
1809 * 2D box cover the full range of the other dimensions in the
1810 * histogram.
1811 */
1812 POSTGIS_DEBUGF(3, " mode: %d", mode);
1813 if ( mode == 2 )
1814 {
1815 POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
1816 ndims_max = 2;
1817 }
1818
1819 POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
1820 POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
1821
1822 // elog(DEBUG1, "out histogram:\n%s", nd_stats_to_grid(nd_stats));
1823
1824 /*
1825 * Search box completely misses histogram extent?
1826 * We have to intersect in all N dimensions or else we have
1827 * zero interaction under the &&& operator. It's important
1828 * to short circuit in this case, as some of the tests below
1829 * will return junk results when run on non-intersecting inputs.
1830 */
1831 if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
1832 {
1833 POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
1834 return 0.0;
1835 }
1836
1837 /* Search box completely contains histogram extent! */
1838 if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
1839 {
1840 POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
1841 return 1.0;
1842 }
1843
1844 /* Calculate the overlap of the box on the histogram */
1845 if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
1846 {
1847 POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
1848 return FALLBACK_ND_SEL;
1849 }
1850
1851 /* Work out some measurements of the histogram */
1852 for ( d = 0; d < nd_stats->ndims; d++ )
1853 {
1854 /* Cell size in each dim */
1855 min[d] = nd_stats->extent.min[d];
1856 max[d] = nd_stats->extent.max[d];
1857 cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
1858 POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
1859
1860 /* Initialize the counter */
1861 at[d] = nd_ibox.min[d];
1862 }
1863
1864 /* Move through all the overlap values and sum them */
1865 do
1866 {
1867 float cell_count, ratio;
1868 ND_BOX nd_cell = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
1869
1870 /* We have to pro-rate partially overlapped cells. */
1871 for ( d = 0; d < nd_stats->ndims; d++ )
1872 {
1873 nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
1874 nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
1875 }
1876
1877 ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
1878 cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
1879
1880 /* Add the pro-rated count for this cell to the overall total */
1881 total_count += (double)cell_count * ratio;
1882 POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);
1883 }
1884 while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
1885
1886 /* Scale by the number of features in our histogram to get the proportion */
1887 selectivity = total_count / nd_stats->histogram_features;
1888
1889 POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
1890 POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
1891 POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
1892 POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
1893
1894 /* Prevent rounding overflows */
1895 if (selectivity > 1.0) selectivity = 1.0;
1896 else if (selectivity < 0.0) selectivity = 0.0;
1897
1898 return selectivity;
1899}
static int nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a overlaps b, false otherwise.
static int nd_increment(ND_IBOX *ibox, int ndims, int *counter)
Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one,...
static int gbox_ndims(const GBOX *gbox)
Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other bo...
static char * nd_box_to_json(const ND_BOX *nd_box, int ndims)
Convert an ND_BOX to a JSON string for printing.
#define FALLBACK_ND_SEL
More modest fallback selectivity factor.
static int nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
Return true if ND_BOX a contains b, false otherwise.
static void nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
Set the values of an ND_BOX from a GBOX.
static int nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the high...
static double nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
static int nd_stats_value_index(const ND_STATS *stats, const int *indexes)

References ND_STATS_T::extent, FALLBACK_ND_SEL, gbox_ndims(), ND_STATS_T::histogram_cells, ND_STATS_T::histogram_features, ND_BOX_T::max, ND_BOX_T::min, ND_IBOX_T::min, nd_box_contains(), nd_box_from_gbox(), nd_box_intersects(), nd_box_overlap(), nd_box_ratio(), nd_box_to_json(), ND_DIMS, nd_increment(), nd_stats_value_index(), ND_STATS_T::ndims, ND_STATS_T::size, and ND_STATS_T::value.

Referenced by _postgis_gserialized_sel(), and gserialized_sel_internal().

Here is the call graph for this function:
Here is the caller graph for this function: