PostGIS  3.7.0dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1781 of file gserialized_gist_2d.c.

1782 {
1783  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1784  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1785  OffsetNumber i,
1786  maxoff;
1787  ConsiderSplitContext context;
1788  BOX2DF *leftBox, *rightBox;
1789  int dim,
1790  commonEntriesCount;
1791  SplitInterval *intervalsLower,
1792  *intervalsUpper;
1793  CommonEntry *commonEntries;
1794  int nentries;
1795 
1796  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1797 
1798  memset(&context, 0, sizeof(ConsiderSplitContext));
1799 
1800  maxoff = entryvec->n - 1;
1801  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1802 
1803  /* Allocate arrays for intervals along axes */
1804  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1805  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1806 
1807  /*
1808  * Calculate the overall minimum bounding box over all the entries.
1809  */
1810  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1811  {
1812  BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[i].key);
1813  if (i == FirstOffsetNumber)
1814  context.boundingBox = *box;
1815  else
1816  adjustBox(&context.boundingBox, box);
1817  }
1818 
1819  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1820  &context.boundingBox));
1821 
1822  /*
1823  * Iterate over axes for optimal split searching.
1824  */
1825  context.first = true; /* nothing selected yet */
1826  for (dim = 0; dim < 2; dim++)
1827  {
1828  float leftUpper,
1829  rightLower;
1830  int i1,
1831  i2;
1832 
1833  /* Project each entry as an interval on the selected axis. */
1834  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1835  {
1836  BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[i].key);
1837  if (dim == 0)
1838  {
1839  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1840  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1841  }
1842  else
1843  {
1844  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1845  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1846  }
1847  }
1848 
1849  /*
1850  * Make two arrays of intervals: one sorted by lower bound and another
1851  * sorted by upper bound.
1852  */
1853  memcpy(intervalsUpper, intervalsLower,
1854  sizeof(SplitInterval) * nentries);
1855  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1857  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1859 
1860  /*----
1861  * The goal is to form a left and right interval, so that every entry
1862  * interval is contained by either left or right interval (or both).
1863  *
1864  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1865  *
1866  * 0 1 2 3 4
1867  * +-+
1868  * +---+
1869  * +-+
1870  * +---+
1871  *
1872  * The left and right intervals are of the form (0,a) and (b,4).
1873  * We first consider splits where b is the lower bound of an entry.
1874  * We iterate through all entries, and for each b, calculate the
1875  * smallest possible a. Then we consider splits where a is the
1876  * upper bound of an entry, and for each a, calculate the greatest
1877  * possible b.
1878  *
1879  * In the above example, the first loop would consider splits:
1880  * b=0: (0,1)-(0,4)
1881  * b=1: (0,1)-(1,4)
1882  * b=2: (0,3)-(2,4)
1883  *
1884  * And the second loop:
1885  * a=1: (0,1)-(1,4)
1886  * a=3: (0,3)-(2,4)
1887  * a=4: (0,4)-(2,4)
1888  */
1889 
1890  /*
1891  * Iterate over lower bound of right group, finding smallest possible
1892  * upper bound of left group.
1893  */
1894  i1 = 0;
1895  i2 = 0;
1896  rightLower = intervalsLower[i1].lower;
1897  leftUpper = intervalsUpper[i2].lower;
1898  while (true)
1899  {
1900  /*
1901  * Find next lower bound of right group.
1902  */
1903  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1904  isnan(intervalsLower[i1].lower)))
1905  {
1906  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1907  i1++;
1908  }
1909  if (i1 >= nentries)
1910  break;
1911  rightLower = intervalsLower[i1].lower;
1912 
1913  /*
1914  * Find count of intervals which anyway should be placed to the
1915  * left group.
1916  */
1917  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1918  i2++;
1919 
1920  /*
1921  * Consider found split.
1922  */
1923  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1924  }
1925 
1926  /*
1927  * Iterate over upper bound of left group finding greatest possible
1928  * lower bound of right group.
1929  */
1930  i1 = nentries - 1;
1931  i2 = nentries - 1;
1932  rightLower = intervalsLower[i1].upper;
1933  leftUpper = intervalsUpper[i2].upper;
1934  while (true)
1935  {
1936  /*
1937  * Find next upper bound of left group.
1938  */
1939  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1940  isnan(intervalsUpper[i2].upper)))
1941  {
1942  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1943  i2--;
1944  }
1945  if (i2 < 0)
1946  break;
1947  leftUpper = intervalsUpper[i2].upper;
1948 
1949  /*
1950  * Find count of intervals which anyway should be placed to the
1951  * right group.
1952  */
1953  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1954  i1--;
1955 
1956  /*
1957  * Consider found split.
1958  */
1959  g_box_consider_split(&context, dim,
1960  rightLower, i1 + 1, leftUpper, i2 + 1);
1961  }
1962  }
1963 
1964  /*
1965  * If we failed to find any acceptable splits, use trivial split.
1966  */
1967  if (context.first)
1968  {
1969  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1970  fallbackSplit(entryvec, v);
1971  PG_RETURN_POINTER(v);
1972  }
1973 
1974  /*
1975  * Ok, we have now selected the split across one axis.
1976  *
1977  * While considering the splits, we already determined that there will be
1978  * enough entries in both groups to reach the desired ratio, but we did
1979  * not memorize which entries go to which group. So determine that now.
1980  */
1981 
1982  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1983 
1984  /* Allocate vectors for results */
1985  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1986  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1987  v->spl_nleft = 0;
1988  v->spl_nright = 0;
1989 
1990  /* Allocate bounding boxes of left and right groups */
1991  leftBox = palloc0(sizeof(BOX2DF));
1992  rightBox = palloc0(sizeof(BOX2DF));
1993 
1994  /*
1995  * Allocate an array for "common entries" - entries which can be placed to
1996  * either group without affecting overlap along selected axis.
1997  */
1998  commonEntriesCount = 0;
1999  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
2000 
2001  /* Helper macros to place an entry in the left or right group */
2002 #define PLACE_LEFT(box, off) \
2003  do { \
2004  if (v->spl_nleft > 0) \
2005  adjustBox(leftBox, box); \
2006  else \
2007  *leftBox = *(box); \
2008  v->spl_left[v->spl_nleft++] = off; \
2009  } while(0)
2010 
2011 #define PLACE_RIGHT(box, off) \
2012  do { \
2013  if (v->spl_nright > 0) \
2014  adjustBox(rightBox, box); \
2015  else \
2016  *rightBox = *(box); \
2017  v->spl_right[v->spl_nright++] = off; \
2018  } while(0)
2019 
2020  /*
2021  * Distribute entries which can be distributed unambiguously, and collect
2022  * common entries.
2023  */
2024  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
2025  {
2026  float lower,
2027  upper;
2028 
2029  /*
2030  * Get upper and lower bounds along selected axis.
2031  */
2032  BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[i].key);
2033  if (context.dim == 0)
2034  {
2035  lower = box->xmin;
2036  upper = box->xmax;
2037  }
2038  else
2039  {
2040  lower = box->ymin;
2041  upper = box->ymax;
2042  }
2043 
2044  if (upper <= context.leftUpper || isnan(upper))
2045  {
2046  /* Fits to the left group */
2047  if (lower >= context.rightLower || isnan(lower))
2048  {
2049  /* Fits also to the right group, so "common entry" */
2050  commonEntries[commonEntriesCount++].index = i;
2051  }
2052  else
2053  {
2054  /* Doesn't fit to the right group, so join to the left group */
2055  PLACE_LEFT(box, i);
2056  }
2057  }
2058  else
2059  {
2060  /*
2061  * Each entry should fit on either left or right group. Since this
2062  * entry didn't fit on the left group, it better fit in the right
2063  * group.
2064  */
2065  Assert(lower >= context.rightLower);
2066 
2067  /* Doesn't fit to the left group, so join to the right group */
2068  PLACE_RIGHT(box, i);
2069  }
2070  }
2071 
2072  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2073  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2074 
2075  /*
2076  * Distribute "common entries", if any.
2077  */
2078  if (commonEntriesCount > 0)
2079  {
2080  /* Calculate minimum number of entries that must be placed in both groups, to reach LIMIT_RATIO. */
2081  int m = ceil(LIMIT_RATIO * (double)nentries);
2082 
2083  /* Recursive picksplit called, this is going to be the last split, keep split into 2 parts */
2084  if (nentries > INDEX_TUPLES_PER_PAGE && nentries <= 2 * INDEX_TUPLES_PER_PAGE)
2085  m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
2086 
2087  Assert(m >= 1);
2088 
2089  for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2090  {
2091  BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2092 
2093  if (v->spl_nright > 0 && v->spl_nleft > 0)
2094  {
2095  /* Calculate delta between penalties of join "common entries" to different groups. */
2096  commonEntries[i].delta =
2097  fabsf(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box));
2098  }
2099  else
2100  {
2101  /* One group became degenerate during split - distribute boxes by their size */
2102  commonEntries[i].delta = -box2df_penalty_single(box);
2103  }
2104  }
2105 
2106  /* Sort "common entries" to distribute the most ambiguous entries first. */
2107  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2108 
2109  /* Distribute "common entries" between groups. */
2110  for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2111  {
2112  bool place_right = true;
2113  BOX2DF *box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2114 
2115  /* Recheck penalties. After we put undecided tuples to some side they're changed */
2116  float left_penalty = box2df_penalty(leftBox, box);
2117  float right_penalty = box2df_penalty(rightBox, box);
2118 
2119  /* Guarantee at least one tuple on each side to make Postgres happy */
2120  if (v->spl_nright == 0 && v->spl_nleft > 0)
2121  place_right = true;
2122  else if (v->spl_nleft == 0 && v->spl_nright > 0)
2123  place_right = false;
2124  /* Check if we have to place this entry in either group to achieve LIMIT_RATIO */
2125  else if (v->spl_nleft + (commonEntriesCount - i) <= m)
2126  place_right = false;
2127  else if (v->spl_nright + (commonEntriesCount - i) <= m)
2128  place_right = true;
2129  /* Otherwise select the group by smaller penalty */
2130  else if (left_penalty < right_penalty)
2131  place_right = false;
2132  else if (right_penalty < left_penalty)
2133  place_right = true;
2134  /* or just put tuple into smaller group */
2135  else if (v->spl_nleft < v->spl_nright)
2136  place_right = false;
2137  else
2138  place_right = true;
2139 
2140  if (place_right)
2141  PLACE_RIGHT(box, commonEntries[i].index);
2142  else
2143  PLACE_LEFT(box, commonEntries[i].index);
2144  }
2145  }
2146  v->spl_ldatum = PointerGetDatum(leftBox);
2147  v->spl_rdatum = PointerGetDatum(rightBox);
2148 
2149  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2150 
2151  PG_RETURN_POINTER(v);
2152 }
static char * box2df_to_string(const BOX2DF *a)
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
#define INDEX_TUPLES_PER_PAGE
static int interval_cmp_upper(const void *i1, const void *i2)
#define LIMIT_RATIO
#define PLACE_RIGHT(box, off)
static float box2df_penalty_single(const BOX2DF *box)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_LEFT(box, off)
static float box2df_penalty(const BOX2DF *b1, const BOX2DF *b2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_penalty(), box2df_penalty_single(), box2df_to_string(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, INDEX_TUPLES_PER_PAGE, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Here is the call graph for this function: