PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1781 of file gserialized_gist_2d.c.

1782{
1783 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1784 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1785 OffsetNumber i,
1786 maxoff;
1787 ConsiderSplitContext context;
1788 BOX2DF *leftBox, *rightBox;
1789 int dim,
1790 commonEntriesCount;
1791 SplitInterval *intervalsLower,
1792 *intervalsUpper;
1793 CommonEntry *commonEntries;
1794 int nentries;
1795
1796 POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1797
1798 memset(&context, 0, sizeof(ConsiderSplitContext));
1799
1800 maxoff = entryvec->n - 1;
1801 nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1802
1803 /* Allocate arrays for intervals along axes */
1804 intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1805 intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1806
1807 /*
1808 * Calculate the overall minimum bounding box over all the entries.
1809 */
1810 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1811 {
1812 BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[i].key);
1813 if (i == FirstOffsetNumber)
1814 context.boundingBox = *box;
1815 else
1816 adjustBox(&context.boundingBox, box);
1817 }
1818
1819 POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1820 &context.boundingBox));
1821
1822 /*
1823 * Iterate over axes for optimal split searching.
1824 */
1825 context.first = true; /* nothing selected yet */
1826 for (dim = 0; dim < 2; dim++)
1827 {
1828 float leftUpper,
1829 rightLower;
1830 int i1,
1831 i2;
1832
1833 /* Project each entry as an interval on the selected axis. */
1834 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1835 {
1836 BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[i].key);
1837 if (dim == 0)
1838 {
1839 intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1840 intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1841 }
1842 else
1843 {
1844 intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1845 intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1846 }
1847 }
1848
1849 /*
1850 * Make two arrays of intervals: one sorted by lower bound and another
1851 * sorted by upper bound.
1852 */
1853 memcpy(intervalsUpper, intervalsLower,
1854 sizeof(SplitInterval) * nentries);
1855 qsort(intervalsLower, nentries, sizeof(SplitInterval),
1857 qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1859
1860 /*----
1861 * The goal is to form a left and right interval, so that every entry
1862 * interval is contained by either left or right interval (or both).
1863 *
1864 * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1865 *
1866 * 0 1 2 3 4
1867 * +-+
1868 * +---+
1869 * +-+
1870 * +---+
1871 *
1872 * The left and right intervals are of the form (0,a) and (b,4).
1873 * We first consider splits where b is the lower bound of an entry.
1874 * We iterate through all entries, and for each b, calculate the
1875 * smallest possible a. Then we consider splits where a is the
1876 * upper bound of an entry, and for each a, calculate the greatest
1877 * possible b.
1878 *
1879 * In the above example, the first loop would consider splits:
1880 * b=0: (0,1)-(0,4)
1881 * b=1: (0,1)-(1,4)
1882 * b=2: (0,3)-(2,4)
1883 *
1884 * And the second loop:
1885 * a=1: (0,1)-(1,4)
1886 * a=3: (0,3)-(2,4)
1887 * a=4: (0,4)-(2,4)
1888 */
1889
1890 /*
1891 * Iterate over lower bound of right group, finding smallest possible
1892 * upper bound of left group.
1893 */
1894 i1 = 0;
1895 i2 = 0;
1896 rightLower = intervalsLower[i1].lower;
1897 leftUpper = intervalsUpper[i2].lower;
1898 while (true)
1899 {
1900 /*
1901 * Find next lower bound of right group.
1902 */
1903 while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1904 isnan(intervalsLower[i1].lower)))
1905 {
1906 leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1907 i1++;
1908 }
1909 if (i1 >= nentries)
1910 break;
1911 rightLower = intervalsLower[i1].lower;
1912
1913 /*
1914 * Find count of intervals which anyway should be placed to the
1915 * left group.
1916 */
1917 while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1918 i2++;
1919
1920 /*
1921 * Consider found split.
1922 */
1923 g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1924 }
1925
1926 /*
1927 * Iterate over upper bound of left group finding greatest possible
1928 * lower bound of right group.
1929 */
1930 i1 = nentries - 1;
1931 i2 = nentries - 1;
1932 rightLower = intervalsLower[i1].upper;
1933 leftUpper = intervalsUpper[i2].upper;
1934 while (true)
1935 {
1936 /*
1937 * Find next upper bound of left group.
1938 */
1939 while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1940 isnan(intervalsUpper[i2].upper)))
1941 {
1942 rightLower = Min(rightLower, intervalsUpper[i2].lower);
1943 i2--;
1944 }
1945 if (i2 < 0)
1946 break;
1947 leftUpper = intervalsUpper[i2].upper;
1948
1949 /*
1950 * Find count of intervals which anyway should be placed to the
1951 * right group.
1952 */
1953 while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1954 i1--;
1955
1956 /*
1957 * Consider found split.
1958 */
1959 g_box_consider_split(&context, dim,
1960 rightLower, i1 + 1, leftUpper, i2 + 1);
1961 }
1962 }
1963
1964 /*
1965 * If we failed to find any acceptable splits, use trivial split.
1966 */
1967 if (context.first)
1968 {
1969 POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1970 fallbackSplit(entryvec, v);
1971 PG_RETURN_POINTER(v);
1972 }
1973
1974 /*
1975 * Ok, we have now selected the split across one axis.
1976 *
1977 * While considering the splits, we already determined that there will be
1978 * enough entries in both groups to reach the desired ratio, but we did
1979 * not memorize which entries go to which group. So determine that now.
1980 */
1981
1982 POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1983
1984 /* Allocate vectors for results */
1985 v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1986 v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1987 v->spl_nleft = 0;
1988 v->spl_nright = 0;
1989
1990 /* Allocate bounding boxes of left and right groups */
1991 leftBox = palloc0(sizeof(BOX2DF));
1992 rightBox = palloc0(sizeof(BOX2DF));
1993
1994 /*
1995 * Allocate an array for "common entries" - entries which can be placed to
1996 * either group without affecting overlap along selected axis.
1997 */
1998 commonEntriesCount = 0;
1999 commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
2000
2001 /* Helper macros to place an entry in the left or right group */
2002#define PLACE_LEFT(box, off) \
2003 do { \
2004 if (v->spl_nleft > 0) \
2005 adjustBox(leftBox, box); \
2006 else \
2007 *leftBox = *(box); \
2008 v->spl_left[v->spl_nleft++] = off; \
2009 } while(0)
2010
2011#define PLACE_RIGHT(box, off) \
2012 do { \
2013 if (v->spl_nright > 0) \
2014 adjustBox(rightBox, box); \
2015 else \
2016 *rightBox = *(box); \
2017 v->spl_right[v->spl_nright++] = off; \
2018 } while(0)
2019
2020 /*
2021 * Distribute entries which can be distributed unambiguously, and collect
2022 * common entries.
2023 */
2024 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
2025 {
2026 float lower,
2027 upper;
2028
2029 /*
2030 * Get upper and lower bounds along selected axis.
2031 */
2032 BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[i].key);
2033 if (context.dim == 0)
2034 {
2035 lower = box->xmin;
2036 upper = box->xmax;
2037 }
2038 else
2039 {
2040 lower = box->ymin;
2041 upper = box->ymax;
2042 }
2043
2044 if (upper <= context.leftUpper || isnan(upper))
2045 {
2046 /* Fits to the left group */
2047 if (lower >= context.rightLower || isnan(lower))
2048 {
2049 /* Fits also to the right group, so "common entry" */
2050 commonEntries[commonEntriesCount++].index = i;
2051 }
2052 else
2053 {
2054 /* Doesn't fit to the right group, so join to the left group */
2055 PLACE_LEFT(box, i);
2056 }
2057 }
2058 else
2059 {
2060 /*
2061 * Each entry should fit on either left or right group. Since this
2062 * entry didn't fit on the left group, it better fit in the right
2063 * group.
2064 */
2065 Assert(lower >= context.rightLower);
2066
2067 /* Doesn't fit to the left group, so join to the right group */
2068 PLACE_RIGHT(box, i);
2069 }
2070 }
2071
2072 POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2073 POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2074
2075 /*
2076 * Distribute "common entries", if any.
2077 */
2078 if (commonEntriesCount > 0)
2079 {
2080 /* Calculate minimum number of entries that must be placed in both groups, to reach LIMIT_RATIO. */
2081 int m = ceil(LIMIT_RATIO * (double)nentries);
2082
2083 /* Recursive picksplit called, this is going to be the last split, keep split into 2 parts */
2084 if (nentries > INDEX_TUPLES_PER_PAGE && nentries <= 2 * INDEX_TUPLES_PER_PAGE)
2085 m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
2086
2087 Assert(m >= 1);
2088
2089 for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2090 {
2091 BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2092
2093 if (v->spl_nright > 0 && v->spl_nleft > 0)
2094 {
2095 /* Calculate delta between penalties of join "common entries" to different groups. */
2096 commonEntries[i].delta =
2097 fabsf(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box));
2098 }
2099 else
2100 {
2101 /* One group became degenerate during split - distribute boxes by their size */
2102 commonEntries[i].delta = -box2df_penalty_single(box);
2103 }
2104 }
2105
2106 /* Sort "common entries" to distribute the most ambiguous entries first. */
2107 qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2108
2109 /* Distribute "common entries" between groups. */
2110 for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2111 {
2112 bool place_right = true;
2113 BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2114
2115 /* Recheck penalties. After we put undecided tuples to some side they're changed */
2116 float left_penalty = box2df_penalty(leftBox, box);
2117 float right_penalty = box2df_penalty(rightBox, box);
2118
2119 /* Guarantee at least one tuple on each side to make Postgres happy */
2120 if (v->spl_nright == 0 && v->spl_nleft > 0)
2121 place_right = true;
2122 else if (v->spl_nleft == 0 && v->spl_nright > 0)
2123 place_right = false;
2124 /* Check if we have to place this entry in either group to achieve LIMIT_RATIO */
2125 else if (v->spl_nleft + (commonEntriesCount - i) <= m)
2126 place_right = false;
2127 else if (v->spl_nright + (commonEntriesCount - i) <= m)
2128 place_right = true;
2129 /* Otherwise select the group by smaller penalty */
2130 else if (left_penalty < right_penalty)
2131 place_right = false;
2132 else if (right_penalty < left_penalty)
2133 place_right = true;
2134 /* or just put tuple into smaller group */
2135 else if (v->spl_nleft < v->spl_nright)
2136 place_right = false;
2137 else
2138 place_right = true;
2139
2140 if (place_right)
2141 PLACE_RIGHT(box, commonEntries[i].index);
2142 else
2143 PLACE_LEFT(box, commonEntries[i].index);
2144 }
2145 }
2146 v->spl_ldatum = PointerGetDatum(leftBox);
2147 v->spl_rdatum = PointerGetDatum(rightBox);
2148
2149 POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2150
2151 PG_RETURN_POINTER(v);
2152}
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static char * box2df_to_string(const BOX2DF *a)
#define INDEX_TUPLES_PER_PAGE
static int interval_cmp_upper(const void *i1, const void *i2)
#define LIMIT_RATIO
#define PLACE_RIGHT(box, off)
static float box2df_penalty_single(const BOX2DF *box)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_LEFT(box, off)
static float box2df_penalty(const BOX2DF *b1, const BOX2DF *b2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_penalty(), box2df_penalty_single(), box2df_to_string(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, INDEX_TUPLES_PER_PAGE, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Here is the call graph for this function: