PostGIS 3.6.2dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

PG!6 Abs was removed in favor of standard c lib

Definition at line 1760 of file gserialized_gist_2d.c.

1761{
1762 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1763 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1764 OffsetNumber i,
1765 maxoff;
1766 ConsiderSplitContext context;
1767 BOX2DF *box,
1768 *leftBox,
1769 *rightBox;
1770 int dim,
1771 commonEntriesCount;
1772 SplitInterval *intervalsLower,
1773 *intervalsUpper;
1774 CommonEntry *commonEntries;
1775 int nentries;
1776
1777 POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1778
1779 memset(&context, 0, sizeof(ConsiderSplitContext));
1780
1781 maxoff = entryvec->n - 1;
1782 nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1783
1784 /* Allocate arrays for intervals along axes */
1785 intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1786 intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1787
1788 /*
1789 * Calculate the overall minimum bounding box over all the entries.
1790 */
1791 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1792 {
1793 box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1794 if (i == FirstOffsetNumber)
1795 context.boundingBox = *box;
1796 else
1797 adjustBox(&context.boundingBox, box);
1798 }
1799
1800 POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1801 &context.boundingBox));
1802
1803 /*
1804 * Iterate over axes for optimal split searching.
1805 */
1806 context.first = true; /* nothing selected yet */
1807 for (dim = 0; dim < 2; dim++)
1808 {
1809 float leftUpper,
1810 rightLower;
1811 int i1,
1812 i2;
1813
1814 /* Project each entry as an interval on the selected axis. */
1815 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1816 {
1817 box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1818 if (dim == 0)
1819 {
1820 intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1821 intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1822 }
1823 else
1824 {
1825 intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1826 intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1827 }
1828 }
1829
1830 /*
1831 * Make two arrays of intervals: one sorted by lower bound and another
1832 * sorted by upper bound.
1833 */
1834 memcpy(intervalsUpper, intervalsLower,
1835 sizeof(SplitInterval) * nentries);
1836 qsort(intervalsLower, nentries, sizeof(SplitInterval),
1838 qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1840
1841 /*----
1842 * The goal is to form a left and right interval, so that every entry
1843 * interval is contained by either left or right interval (or both).
1844 *
1845 * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1846 *
1847 * 0 1 2 3 4
1848 * +-+
1849 * +---+
1850 * +-+
1851 * +---+
1852 *
1853 * The left and right intervals are of the form (0,a) and (b,4).
1854 * We first consider splits where b is the lower bound of an entry.
1855 * We iterate through all entries, and for each b, calculate the
1856 * smallest possible a. Then we consider splits where a is the
1857 * upper bound of an entry, and for each a, calculate the greatest
1858 * possible b.
1859 *
1860 * In the above example, the first loop would consider splits:
1861 * b=0: (0,1)-(0,4)
1862 * b=1: (0,1)-(1,4)
1863 * b=2: (0,3)-(2,4)
1864 *
1865 * And the second loop:
1866 * a=1: (0,1)-(1,4)
1867 * a=3: (0,3)-(2,4)
1868 * a=4: (0,4)-(2,4)
1869 */
1870
1871 /*
1872 * Iterate over lower bound of right group, finding smallest possible
1873 * upper bound of left group.
1874 */
1875 i1 = 0;
1876 i2 = 0;
1877 rightLower = intervalsLower[i1].lower;
1878 leftUpper = intervalsUpper[i2].lower;
1879 while (true)
1880 {
1881 /*
1882 * Find next lower bound of right group.
1883 */
1884 while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1885 isnan(intervalsLower[i1].lower)))
1886 {
1887 leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1888 i1++;
1889 }
1890 if (i1 >= nentries)
1891 break;
1892 rightLower = intervalsLower[i1].lower;
1893
1894 /*
1895 * Find count of intervals which anyway should be placed to the
1896 * left group.
1897 */
1898 while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1899 i2++;
1900
1901 /*
1902 * Consider found split.
1903 */
1904 g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1905 }
1906
1907 /*
1908 * Iterate over upper bound of left group finding greatest possible
1909 * lower bound of right group.
1910 */
1911 i1 = nentries - 1;
1912 i2 = nentries - 1;
1913 rightLower = intervalsLower[i1].upper;
1914 leftUpper = intervalsUpper[i2].upper;
1915 while (true)
1916 {
1917 /*
1918 * Find next upper bound of left group.
1919 */
1920 while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1921 isnan(intervalsUpper[i2].upper)))
1922 {
1923 rightLower = Min(rightLower, intervalsUpper[i2].lower);
1924 i2--;
1925 }
1926 if (i2 < 0)
1927 break;
1928 leftUpper = intervalsUpper[i2].upper;
1929
1930 /*
1931 * Find count of intervals which anyway should be placed to the
1932 * right group.
1933 */
1934 while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1935 i1--;
1936
1937 /*
1938 * Consider found split.
1939 */
1940 g_box_consider_split(&context, dim,
1941 rightLower, i1 + 1, leftUpper, i2 + 1);
1942 }
1943 }
1944
1945 /*
1946 * If we failed to find any acceptable splits, use trivial split.
1947 */
1948 if (context.first)
1949 {
1950 POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1951 fallbackSplit(entryvec, v);
1952 PG_RETURN_POINTER(v);
1953 }
1954
1955 /*
1956 * Ok, we have now selected the split across one axis.
1957 *
1958 * While considering the splits, we already determined that there will be
1959 * enough entries in both groups to reach the desired ratio, but we did
1960 * not memorize which entries go to which group. So determine that now.
1961 */
1962
1963 POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1964
1965 /* Allocate vectors for results */
1966 v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1967 v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1968 v->spl_nleft = 0;
1969 v->spl_nright = 0;
1970
1971 /* Allocate bounding boxes of left and right groups */
1972 leftBox = palloc0(sizeof(BOX2DF));
1973 rightBox = palloc0(sizeof(BOX2DF));
1974
1975 /*
1976 * Allocate an array for "common entries" - entries which can be placed to
1977 * either group without affecting overlap along selected axis.
1978 */
1979 commonEntriesCount = 0;
1980 commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1981
1982 /* Helper macros to place an entry in the left or right group */
1983#define PLACE_LEFT(box, off) \
1984 do { \
1985 if (v->spl_nleft > 0) \
1986 adjustBox(leftBox, box); \
1987 else \
1988 *leftBox = *(box); \
1989 v->spl_left[v->spl_nleft++] = off; \
1990 } while(0)
1991
1992#define PLACE_RIGHT(box, off) \
1993 do { \
1994 if (v->spl_nright > 0) \
1995 adjustBox(rightBox, box); \
1996 else \
1997 *rightBox = *(box); \
1998 v->spl_right[v->spl_nright++] = off; \
1999 } while(0)
2000
2001 /*
2002 * Distribute entries which can be distributed unambiguously, and collect
2003 * common entries.
2004 */
2005 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
2006 {
2007 float lower,
2008 upper;
2009
2010 /*
2011 * Get upper and lower bounds along selected axis.
2012 */
2013 box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
2014 if (context.dim == 0)
2015 {
2016 lower = box->xmin;
2017 upper = box->xmax;
2018 }
2019 else
2020 {
2021 lower = box->ymin;
2022 upper = box->ymax;
2023 }
2024
2025 if (upper <= context.leftUpper || isnan(upper))
2026 {
2027 /* Fits to the left group */
2028 if (lower >= context.rightLower || isnan(lower))
2029 {
2030 /* Fits also to the right group, so "common entry" */
2031 commonEntries[commonEntriesCount++].index = i;
2032 }
2033 else
2034 {
2035 /* Doesn't fit to the right group, so join to the left group */
2036 PLACE_LEFT(box, i);
2037 }
2038 }
2039 else
2040 {
2041 /*
2042 * Each entry should fit on either left or right group. Since this
2043 * entry didn't fit on the left group, it better fit in the right
2044 * group.
2045 */
2046 Assert(lower >= context.rightLower);
2047
2048 /* Doesn't fit to the left group, so join to the right group */
2049 PLACE_RIGHT(box, i);
2050 }
2051 }
2052
2053 POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2054 POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2055
2056 /*
2057 * Distribute "common entries", if any.
2058 */
2059 if (commonEntriesCount > 0)
2060 {
2061 /* Calculate minimum number of entries that must be placed in both groups, to reach LIMIT_RATIO. */
2062 int m = ceil(LIMIT_RATIO * (double)nentries);
2063
2064 /* Recursive picksplit called, this is going to be the last split, keep split into 2 parts */
2065 if (nentries > INDEX_TUPLES_PER_PAGE && nentries <= 2 * INDEX_TUPLES_PER_PAGE)
2066 m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
2067
2068 /*
2069 * Calculate delta between penalties of join "common entries" to
2070 * different groups.
2071 */
2072 for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2073 {
2074 box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2075 commonEntries[i].index].key);
2077 commonEntries[i].delta = fabs(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box));
2078 }
2079
2080 /*
2081 * Sort "common entries" by calculated deltas in order to distribute
2082 * the most ambiguous entries first.
2083 */
2084 qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2085
2086 /*
2087 * Distribute "common entries" between groups.
2088 */
2089 for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2090 {
2091 float right_penalty, left_penalty;
2092 bool place_right = true;
2093 box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2094
2095 /* Recheck penalties. After we put undecided tuples to some side they're changed */
2096 left_penalty = box2df_penalty(leftBox, box);
2097 right_penalty = box2df_penalty(rightBox, box);
2098
2099 /* Check if penalty is absolutely telling a tuple should go to some side */
2100 if (right_penalty > 0 && left_penalty == 0)
2101 place_right = false;
2102 else if (right_penalty == 0 && left_penalty > 0)
2103 place_right = true;
2104 /* Check if we have to place this entry in either group to achieve LIMIT_RATIO */
2105 else if (v->spl_nleft + (commonEntriesCount - i) <= m)
2106 place_right = false;
2107 else if (v->spl_nright + (commonEntriesCount - i) <= m)
2108 place_right = true;
2109 /* Otherwise select the group by smaller penalty */
2110 else if (left_penalty < right_penalty)
2111 place_right = false;
2112 else if (right_penalty < left_penalty)
2113 place_right = true;
2114 /* or just put tuple into smaller group */
2115 else if (v->spl_nleft < v->spl_nright)
2116 place_right = false;
2117 else
2118 place_right = true;
2119
2120 if (place_right)
2121 PLACE_RIGHT(box, commonEntries[i].index);
2122 else
2123 PLACE_LEFT(box, commonEntries[i].index);
2124 }
2125 }
2126 v->spl_ldatum = PointerGetDatum(leftBox);
2127 v->spl_rdatum = PointerGetDatum(rightBox);
2128
2129 POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2130
2131 PG_RETURN_POINTER(v);
2132}
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static char * box2df_to_string(const BOX2DF *a)
#define INDEX_TUPLES_PER_PAGE
static int interval_cmp_upper(const void *i1, const void *i2)
#define LIMIT_RATIO
#define PLACE_RIGHT(box, off)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_LEFT(box, off)
static float box2df_penalty(const BOX2DF *b1, const BOX2DF *b2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_penalty(), box2df_to_string(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, INDEX_TUPLES_PER_PAGE, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Here is the call graph for this function: