PostGIS  3.7.0dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

PG!6 Abs was removed in favor of standard c lib

Definition at line 1760 of file gserialized_gist_2d.c.

1761 {
1762  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1763  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1764  OffsetNumber i,
1765  maxoff;
1766  ConsiderSplitContext context;
1767  BOX2DF *box,
1768  *leftBox,
1769  *rightBox;
1770  int dim,
1771  commonEntriesCount;
1772  SplitInterval *intervalsLower,
1773  *intervalsUpper;
1774  CommonEntry *commonEntries;
1775  int nentries;
1776 
1777  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1778 
1779  memset(&context, 0, sizeof(ConsiderSplitContext));
1780 
1781  maxoff = entryvec->n - 1;
1782  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1783 
1784  /* Allocate arrays for intervals along axes */
1785  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1786  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1787 
1788  /*
1789  * Calculate the overall minimum bounding box over all the entries.
1790  */
1791  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1792  {
1793  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1794  if (i == FirstOffsetNumber)
1795  context.boundingBox = *box;
1796  else
1797  adjustBox(&context.boundingBox, box);
1798  }
1799 
1800  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1801  &context.boundingBox));
1802 
1803  /*
1804  * Iterate over axes for optimal split searching.
1805  */
1806  context.first = true; /* nothing selected yet */
1807  for (dim = 0; dim < 2; dim++)
1808  {
1809  float leftUpper,
1810  rightLower;
1811  int i1,
1812  i2;
1813 
1814  /* Project each entry as an interval on the selected axis. */
1815  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1816  {
1817  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1818  if (dim == 0)
1819  {
1820  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1821  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1822  }
1823  else
1824  {
1825  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1826  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1827  }
1828  }
1829 
1830  /*
1831  * Make two arrays of intervals: one sorted by lower bound and another
1832  * sorted by upper bound.
1833  */
1834  memcpy(intervalsUpper, intervalsLower,
1835  sizeof(SplitInterval) * nentries);
1836  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1838  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1840 
1841  /*----
1842  * The goal is to form a left and right interval, so that every entry
1843  * interval is contained by either left or right interval (or both).
1844  *
1845  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1846  *
1847  * 0 1 2 3 4
1848  * +-+
1849  * +---+
1850  * +-+
1851  * +---+
1852  *
1853  * The left and right intervals are of the form (0,a) and (b,4).
1854  * We first consider splits where b is the lower bound of an entry.
1855  * We iterate through all entries, and for each b, calculate the
1856  * smallest possible a. Then we consider splits where a is the
1857  * upper bound of an entry, and for each a, calculate the greatest
1858  * possible b.
1859  *
1860  * In the above example, the first loop would consider splits:
1861  * b=0: (0,1)-(0,4)
1862  * b=1: (0,1)-(1,4)
1863  * b=2: (0,3)-(2,4)
1864  *
1865  * And the second loop:
1866  * a=1: (0,1)-(1,4)
1867  * a=3: (0,3)-(2,4)
1868  * a=4: (0,4)-(2,4)
1869  */
1870 
1871  /*
1872  * Iterate over lower bound of right group, finding smallest possible
1873  * upper bound of left group.
1874  */
1875  i1 = 0;
1876  i2 = 0;
1877  rightLower = intervalsLower[i1].lower;
1878  leftUpper = intervalsUpper[i2].lower;
1879  while (true)
1880  {
1881  /*
1882  * Find next lower bound of right group.
1883  */
1884  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1885  isnan(intervalsLower[i1].lower)))
1886  {
1887  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1888  i1++;
1889  }
1890  if (i1 >= nentries)
1891  break;
1892  rightLower = intervalsLower[i1].lower;
1893 
1894  /*
1895  * Find count of intervals which anyway should be placed to the
1896  * left group.
1897  */
1898  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1899  i2++;
1900 
1901  /*
1902  * Consider found split.
1903  */
1904  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1905  }
1906 
1907  /*
1908  * Iterate over upper bound of left group finding greatest possible
1909  * lower bound of right group.
1910  */
1911  i1 = nentries - 1;
1912  i2 = nentries - 1;
1913  rightLower = intervalsLower[i1].upper;
1914  leftUpper = intervalsUpper[i2].upper;
1915  while (true)
1916  {
1917  /*
1918  * Find next upper bound of left group.
1919  */
1920  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1921  isnan(intervalsUpper[i2].upper)))
1922  {
1923  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1924  i2--;
1925  }
1926  if (i2 < 0)
1927  break;
1928  leftUpper = intervalsUpper[i2].upper;
1929 
1930  /*
1931  * Find count of intervals which anyway should be placed to the
1932  * right group.
1933  */
1934  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1935  i1--;
1936 
1937  /*
1938  * Consider found split.
1939  */
1940  g_box_consider_split(&context, dim,
1941  rightLower, i1 + 1, leftUpper, i2 + 1);
1942  }
1943  }
1944 
1945  /*
1946  * If we failed to find any acceptable splits, use trivial split.
1947  */
1948  if (context.first)
1949  {
1950  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1951  fallbackSplit(entryvec, v);
1952  PG_RETURN_POINTER(v);
1953  }
1954 
1955  /*
1956  * Ok, we have now selected the split across one axis.
1957  *
1958  * While considering the splits, we already determined that there will be
1959  * enough entries in both groups to reach the desired ratio, but we did
1960  * not memorize which entries go to which group. So determine that now.
1961  */
1962 
1963  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1964 
1965  /* Allocate vectors for results */
1966  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1967  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1968  v->spl_nleft = 0;
1969  v->spl_nright = 0;
1970 
1971  /* Allocate bounding boxes of left and right groups */
1972  leftBox = palloc0(sizeof(BOX2DF));
1973  rightBox = palloc0(sizeof(BOX2DF));
1974 
1975  /*
1976  * Allocate an array for "common entries" - entries which can be placed to
1977  * either group without affecting overlap along selected axis.
1978  */
1979  commonEntriesCount = 0;
1980  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1981 
1982  /* Helper macros to place an entry in the left or right group */
1983 #define PLACE_LEFT(box, off) \
1984  do { \
1985  if (v->spl_nleft > 0) \
1986  adjustBox(leftBox, box); \
1987  else \
1988  *leftBox = *(box); \
1989  v->spl_left[v->spl_nleft++] = off; \
1990  } while(0)
1991 
1992 #define PLACE_RIGHT(box, off) \
1993  do { \
1994  if (v->spl_nright > 0) \
1995  adjustBox(rightBox, box); \
1996  else \
1997  *rightBox = *(box); \
1998  v->spl_right[v->spl_nright++] = off; \
1999  } while(0)
2000 
2001  /*
2002  * Distribute entries which can be distributed unambiguously, and collect
2003  * common entries.
2004  */
2005  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
2006  {
2007  float lower,
2008  upper;
2009 
2010  /*
2011  * Get upper and lower bounds along selected axis.
2012  */
2013  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
2014  if (context.dim == 0)
2015  {
2016  lower = box->xmin;
2017  upper = box->xmax;
2018  }
2019  else
2020  {
2021  lower = box->ymin;
2022  upper = box->ymax;
2023  }
2024 
2025  if (upper <= context.leftUpper || isnan(upper))
2026  {
2027  /* Fits to the left group */
2028  if (lower >= context.rightLower || isnan(lower))
2029  {
2030  /* Fits also to the right group, so "common entry" */
2031  commonEntries[commonEntriesCount++].index = i;
2032  }
2033  else
2034  {
2035  /* Doesn't fit to the right group, so join to the left group */
2036  PLACE_LEFT(box, i);
2037  }
2038  }
2039  else
2040  {
2041  /*
2042  * Each entry should fit on either left or right group. Since this
2043  * entry didn't fit on the left group, it better fit in the right
2044  * group.
2045  */
2046  Assert(lower >= context.rightLower);
2047 
2048  /* Doesn't fit to the left group, so join to the right group */
2049  PLACE_RIGHT(box, i);
2050  }
2051  }
2052 
2053  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2054  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2055 
2056  /*
2057  * Distribute "common entries", if any.
2058  */
2059  if (commonEntriesCount > 0)
2060  {
2061  /* Calculate minimum number of entries that must be placed in both groups, to reach LIMIT_RATIO. */
2062  int m = ceil(LIMIT_RATIO * (double)nentries);
2063 
2064  /* Recursive picksplit called, this is going to be the last split, keep split into 2 parts */
2065  if (nentries > INDEX_TUPLES_PER_PAGE && nentries <= 2 * INDEX_TUPLES_PER_PAGE)
2066  m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
2067 
2068  /*
2069  * Calculate delta between penalties of join "common entries" to
2070  * different groups.
2071  */
2072  for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2073  {
2074  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2075  commonEntries[i].index].key);
2077  commonEntries[i].delta = fabs(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box));
2078  }
2079 
2080  /*
2081  * Sort "common entries" by calculated deltas in order to distribute
2082  * the most ambiguous entries first.
2083  */
2084  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2085 
2086  /*
2087  * Distribute "common entries" between groups.
2088  */
2089  for (i = 0; i < (OffsetNumber)commonEntriesCount; i++)
2090  {
2091  float right_penalty, left_penalty;
2092  bool place_right = true;
2093  box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
2094 
2095  /* Recheck penalties. After we put undecided tuples to some side they're changed */
2096  left_penalty = box2df_penalty(leftBox, box);
2097  right_penalty = box2df_penalty(rightBox, box);
2098 
2099  /* Check if penalty is absolutely telling a tuple should go to some side */
2100  if (right_penalty > 0 && left_penalty == 0)
2101  place_right = false;
2102  else if (right_penalty == 0 && left_penalty > 0)
2103  place_right = true;
2104  /* Check if we have to place this entry in either group to achieve LIMIT_RATIO */
2105  else if (v->spl_nleft + (commonEntriesCount - i) <= m)
2106  place_right = false;
2107  else if (v->spl_nright + (commonEntriesCount - i) <= m)
2108  place_right = true;
2109  /* Otherwise select the group by smaller penalty */
2110  else if (left_penalty < right_penalty)
2111  place_right = false;
2112  else if (right_penalty < left_penalty)
2113  place_right = true;
2114  /* or just put tuple into smaller group */
2115  else if (v->spl_nleft < v->spl_nright)
2116  place_right = false;
2117  else
2118  place_right = true;
2119 
2120  if (place_right)
2121  PLACE_RIGHT(box, commonEntries[i].index);
2122  else
2123  PLACE_LEFT(box, commonEntries[i].index);
2124  }
2125  }
2126  v->spl_ldatum = PointerGetDatum(leftBox);
2127  v->spl_rdatum = PointerGetDatum(rightBox);
2128 
2129  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2130 
2131  PG_RETURN_POINTER(v);
2132 }
static char * box2df_to_string(const BOX2DF *a)
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
#define INDEX_TUPLES_PER_PAGE
static int interval_cmp_upper(const void *i1, const void *i2)
#define LIMIT_RATIO
#define PLACE_RIGHT(box, off)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_LEFT(box, off)
static float box2df_penalty(const BOX2DF *b1, const BOX2DF *b2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_penalty(), box2df_to_string(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, INDEX_TUPLES_PER_PAGE, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Here is the call graph for this function: