PostGIS  3.4.0dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

PG!6 Abs was removed in favor of standard c lib

Definition at line 1741 of file gserialized_gist_2d.c.

1742 {
1743  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1744  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1745  OffsetNumber i,
1746  maxoff;
1747  ConsiderSplitContext context;
1748  BOX2DF *box,
1749  *leftBox,
1750  *rightBox;
1751  int dim,
1752  commonEntriesCount;
1753  SplitInterval *intervalsLower,
1754  *intervalsUpper;
1755  CommonEntry *commonEntries;
1756  int nentries;
1757 
1758  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1759 
1760  memset(&context, 0, sizeof(ConsiderSplitContext));
1761 
1762  maxoff = entryvec->n - 1;
1763  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1764 
1765  /* Allocate arrays for intervals along axes */
1766  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1767  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1768 
1769  /*
1770  * Calculate the overall minimum bounding box over all the entries.
1771  */
1772  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1773  {
1774  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1775  if (i == FirstOffsetNumber)
1776  context.boundingBox = *box;
1777  else
1778  adjustBox(&context.boundingBox, box);
1779  }
1780 
1781  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1782  &context.boundingBox));
1783 
1784  /*
1785  * Iterate over axes for optimal split searching.
1786  */
1787  context.first = true; /* nothing selected yet */
1788  for (dim = 0; dim < 2; dim++)
1789  {
1790  float leftUpper,
1791  rightLower;
1792  int i1,
1793  i2;
1794 
1795  /* Project each entry as an interval on the selected axis. */
1796  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1797  {
1798  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1799  if (dim == 0)
1800  {
1801  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1802  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1803  }
1804  else
1805  {
1806  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1807  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1808  }
1809  }
1810 
1811  /*
1812  * Make two arrays of intervals: one sorted by lower bound and another
1813  * sorted by upper bound.
1814  */
1815  memcpy(intervalsUpper, intervalsLower,
1816  sizeof(SplitInterval) * nentries);
1817  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1819  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1821 
1822  /*----
1823  * The goal is to form a left and right interval, so that every entry
1824  * interval is contained by either left or right interval (or both).
1825  *
1826  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1827  *
1828  * 0 1 2 3 4
1829  * +-+
1830  * +---+
1831  * +-+
1832  * +---+
1833  *
1834  * The left and right intervals are of the form (0,a) and (b,4).
1835  * We first consider splits where b is the lower bound of an entry.
1836  * We iterate through all entries, and for each b, calculate the
1837  * smallest possible a. Then we consider splits where a is the
1838  * upper bound of an entry, and for each a, calculate the greatest
1839  * possible b.
1840  *
1841  * In the above example, the first loop would consider splits:
1842  * b=0: (0,1)-(0,4)
1843  * b=1: (0,1)-(1,4)
1844  * b=2: (0,3)-(2,4)
1845  *
1846  * And the second loop:
1847  * a=1: (0,1)-(1,4)
1848  * a=3: (0,3)-(2,4)
1849  * a=4: (0,4)-(2,4)
1850  */
1851 
1852  /*
1853  * Iterate over lower bound of right group, finding smallest possible
1854  * upper bound of left group.
1855  */
1856  i1 = 0;
1857  i2 = 0;
1858  rightLower = intervalsLower[i1].lower;
1859  leftUpper = intervalsUpper[i2].lower;
1860  while (true)
1861  {
1862  /*
1863  * Find next lower bound of right group.
1864  */
1865  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1866  isnan(intervalsLower[i1].lower)))
1867  {
1868  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1869  i1++;
1870  }
1871  if (i1 >= nentries)
1872  break;
1873  rightLower = intervalsLower[i1].lower;
1874 
1875  /*
1876  * Find count of intervals which anyway should be placed to the
1877  * left group.
1878  */
1879  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1880  i2++;
1881 
1882  /*
1883  * Consider found split.
1884  */
1885  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1886  }
1887 
1888  /*
1889  * Iterate over upper bound of left group finding greatest possible
1890  * lower bound of right group.
1891  */
1892  i1 = nentries - 1;
1893  i2 = nentries - 1;
1894  rightLower = intervalsLower[i1].upper;
1895  leftUpper = intervalsUpper[i2].upper;
1896  while (true)
1897  {
1898  /*
1899  * Find next upper bound of left group.
1900  */
1901  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1902  isnan(intervalsUpper[i2].upper)))
1903  {
1904  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1905  i2--;
1906  }
1907  if (i2 < 0)
1908  break;
1909  leftUpper = intervalsUpper[i2].upper;
1910 
1911  /*
1912  * Find count of intervals which anyway should be placed to the
1913  * right group.
1914  */
1915  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1916  i1--;
1917 
1918  /*
1919  * Consider found split.
1920  */
1921  g_box_consider_split(&context, dim,
1922  rightLower, i1 + 1, leftUpper, i2 + 1);
1923  }
1924  }
1925 
1926  /*
1927  * If we failed to find any acceptable splits, use trivial split.
1928  */
1929  if (context.first)
1930  {
1931  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1932  fallbackSplit(entryvec, v);
1933  PG_RETURN_POINTER(v);
1934  }
1935 
1936  /*
1937  * Ok, we have now selected the split across one axis.
1938  *
1939  * While considering the splits, we already determined that there will be
1940  * enough entries in both groups to reach the desired ratio, but we did
1941  * not memorize which entries go to which group. So determine that now.
1942  */
1943 
1944  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1945 
1946  /* Allocate vectors for results */
1947  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1948  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1949  v->spl_nleft = 0;
1950  v->spl_nright = 0;
1951 
1952  /* Allocate bounding boxes of left and right groups */
1953  leftBox = palloc0(sizeof(BOX2DF));
1954  rightBox = palloc0(sizeof(BOX2DF));
1955 
1956  /*
1957  * Allocate an array for "common entries" - entries which can be placed to
1958  * either group without affecting overlap along selected axis.
1959  */
1960  commonEntriesCount = 0;
1961  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1962 
1963  /* Helper macros to place an entry in the left or right group */
1964 #define PLACE_LEFT(box, off) \
1965  do { \
1966  if (v->spl_nleft > 0) \
1967  adjustBox(leftBox, box); \
1968  else \
1969  *leftBox = *(box); \
1970  v->spl_left[v->spl_nleft++] = off; \
1971  } while(0)
1972 
1973 #define PLACE_RIGHT(box, off) \
1974  do { \
1975  if (v->spl_nright > 0) \
1976  adjustBox(rightBox, box); \
1977  else \
1978  *rightBox = *(box); \
1979  v->spl_right[v->spl_nright++] = off; \
1980  } while(0)
1981 
1982  /*
1983  * Distribute entries which can be distributed unambiguously, and collect
1984  * common entries.
1985  */
1986  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1987  {
1988  float lower,
1989  upper;
1990 
1991  /*
1992  * Get upper and lower bounds along selected axis.
1993  */
1994  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1995  if (context.dim == 0)
1996  {
1997  lower = box->xmin;
1998  upper = box->xmax;
1999  }
2000  else
2001  {
2002  lower = box->ymin;
2003  upper = box->ymax;
2004  }
2005 
2006  if (upper <= context.leftUpper || isnan(upper))
2007  {
2008  /* Fits to the left group */
2009  if (lower >= context.rightLower || isnan(lower))
2010  {
2011  /* Fits also to the right group, so "common entry" */
2012  commonEntries[commonEntriesCount++].index = i;
2013  }
2014  else
2015  {
2016  /* Doesn't fit to the right group, so join to the left group */
2017  PLACE_LEFT(box, i);
2018  }
2019  }
2020  else
2021  {
2022  /*
2023  * Each entry should fit on either left or right group. Since this
2024  * entry didn't fit on the left group, it better fit in the right
2025  * group.
2026  */
2027  Assert(lower >= context.rightLower);
2028 
2029  /* Doesn't fit to the left group, so join to the right group */
2030  PLACE_RIGHT(box, i);
2031  }
2032  }
2033 
2034  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2035  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2036 
2037  /*
2038  * Distribute "common entries", if any.
2039  */
2040  if (commonEntriesCount > 0)
2041  {
2042  /* Calculate minimum number of entries that must be placed in both groups, to reach LIMIT_RATIO. */
2043  int m = ceil(LIMIT_RATIO * (double)nentries);
2044 
2045  /* Recursive picksplit called, this is going to be the last split, keep split into 2 parts */
2046  if (nentries > INDEX_TUPLES_PER_PAGE && nentries <= 2 * INDEX_TUPLES_PER_PAGE)
2047  m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
2048 
2049  /*
2050  * Calculate delta between penalties of join "common entries" to
2051  * different groups.
2052  */
2053  for (i = 0; i < commonEntriesCount; i++)
2054  {
2055  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2056  commonEntries[i].index].key);
2058  commonEntries[i].delta = fabs(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box));
2059  }
2060 
2061  /*
2062  * Sort "common entries" by calculated deltas in order to distribute
2063  * the most ambiguous entries first.
2064  */
2065  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2066 
2067  /*
2068  * Distribute "common entries" between groups.
2069  */
2070  for (i = 0; i < commonEntriesCount; i++)
2071  {
2072  float right_penalty, left_penalty;
2073  bool place_right = true;
2074  box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2075 
2076  /* Recheck penalties. After we put undecided tuples to some side they're changed */
2077  left_penalty = box2df_penalty(leftBox, box);
2078  right_penalty = box2df_penalty(rightBox, box);
2079 
2080  /* Check if penalty is absolutely telling a tuple should go to some side */
2081  if (right_penalty > 0 && left_penalty == 0)
2082  place_right = false;
2083  else if (right_penalty == 0 && left_penalty > 0)
2084  place_right = true;
2085  /* Check if we have to place this entry in either group to achieve LIMIT_RATIO */
2086  else if (v->spl_nleft + (commonEntriesCount - i) <= m)
2087  place_right = false;
2088  else if (v->spl_nright + (commonEntriesCount - i) <= m)
2089  place_right = true;
2090  /* Otherwise select the group by smaller penalty */
2091  else if (left_penalty < right_penalty)
2092  place_right = false;
2093  else if (right_penalty < left_penalty)
2094  place_right = true;
2095  /* or just put tuple into smaller group */
2096  else if (v->spl_nleft < v->spl_nright)
2097  place_right = false;
2098  else
2099  place_right = true;
2100 
2101  if (place_right)
2102  PLACE_RIGHT(box, commonEntries[i].index);
2103  else
2104  PLACE_LEFT(box, commonEntries[i].index);
2105  }
2106  }
2107  v->spl_ldatum = PointerGetDatum(leftBox);
2108  v->spl_rdatum = PointerGetDatum(rightBox);
2109 
2110  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2111 
2112  PG_RETURN_POINTER(v);
2113 }
static char * box2df_to_string(const BOX2DF *a)
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
#define INDEX_TUPLES_PER_PAGE
static int interval_cmp_upper(const void *i1, const void *i2)
#define LIMIT_RATIO
#define PLACE_RIGHT(box, off)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_LEFT(box, off)
static float box2df_penalty(const BOX2DF *b1, const BOX2DF *b2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_penalty(), box2df_to_string(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, INDEX_TUPLES_PER_PAGE, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Here is the call graph for this function: