PostGIS  2.3.8dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1705 of file gserialized_gist_2d.c.

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_in(), box2df_size(), box2df_to_string(), box_penalty(), common_entry_cmp(), rtgdalraster::cur, CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), FALSE, ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PG_FUNCTION_INFO_V1(), PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Referenced by common_entry_cmp().

1706 {
1707  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1708  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1709  OffsetNumber i,
1710  maxoff;
1711  ConsiderSplitContext context;
1712  BOX2DF *box,
1713  *leftBox,
1714  *rightBox;
1715  int dim,
1716  commonEntriesCount;
1717  SplitInterval *intervalsLower,
1718  *intervalsUpper;
1719  CommonEntry *commonEntries;
1720  int nentries;
1721 
1722  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1723 
1724  memset(&context, 0, sizeof(ConsiderSplitContext));
1725 
1726  maxoff = entryvec->n - 1;
1727  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1728 
1729  /* Allocate arrays for intervals along axes */
1730  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1731  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1732 
1733  /*
1734  * Calculate the overall minimum bounding box over all the entries.
1735  */
1736  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1737  {
1738  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1739  if (i == FirstOffsetNumber)
1740  context.boundingBox = *box;
1741  else
1742  adjustBox(&context.boundingBox, box);
1743  }
1744 
1745  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1746  &context.boundingBox));
1747 
1748  /*
1749  * Iterate over axes for optimal split searching.
1750  */
1751  context.first = true; /* nothing selected yet */
1752  for (dim = 0; dim < 2; dim++)
1753  {
1754  float leftUpper,
1755  rightLower;
1756  int i1,
1757  i2;
1758 
1759  /* Project each entry as an interval on the selected axis. */
1760  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1761  {
1762  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1763  if (dim == 0)
1764  {
1765  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1766  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1767  }
1768  else
1769  {
1770  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1771  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1772  }
1773  }
1774 
1775  /*
1776  * Make two arrays of intervals: one sorted by lower bound and another
1777  * sorted by upper bound.
1778  */
1779  memcpy(intervalsUpper, intervalsLower,
1780  sizeof(SplitInterval) * nentries);
1781  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1783  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1785 
1786  /*----
1787  * The goal is to form a left and right interval, so that every entry
1788  * interval is contained by either left or right interval (or both).
1789  *
1790  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1791  *
1792  * 0 1 2 3 4
1793  * +-+
1794  * +---+
1795  * +-+
1796  * +---+
1797  *
1798  * The left and right intervals are of the form (0,a) and (b,4).
1799  * We first consider splits where b is the lower bound of an entry.
1800  * We iterate through all entries, and for each b, calculate the
1801  * smallest possible a. Then we consider splits where a is the
1802  * uppper bound of an entry, and for each a, calculate the greatest
1803  * possible b.
1804  *
1805  * In the above example, the first loop would consider splits:
1806  * b=0: (0,1)-(0,4)
1807  * b=1: (0,1)-(1,4)
1808  * b=2: (0,3)-(2,4)
1809  *
1810  * And the second loop:
1811  * a=1: (0,1)-(1,4)
1812  * a=3: (0,3)-(2,4)
1813  * a=4: (0,4)-(2,4)
1814  */
1815 
1816  /*
1817  * Iterate over lower bound of right group, finding smallest possible
1818  * upper bound of left group.
1819  */
1820  i1 = 0;
1821  i2 = 0;
1822  rightLower = intervalsLower[i1].lower;
1823  leftUpper = intervalsUpper[i2].lower;
1824  while (true)
1825  {
1826  /*
1827  * Find next lower bound of right group.
1828  */
1829  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1830  isnan(intervalsLower[i1].lower)))
1831  {
1832  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1833  i1++;
1834  }
1835  if (i1 >= nentries)
1836  break;
1837  rightLower = intervalsLower[i1].lower;
1838 
1839  /*
1840  * Find count of intervals which anyway should be placed to the
1841  * left group.
1842  */
1843  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1844  i2++;
1845 
1846  /*
1847  * Consider found split.
1848  */
1849  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1850  }
1851 
1852  /*
1853  * Iterate over upper bound of left group finding greates possible
1854  * lower bound of right group.
1855  */
1856  i1 = nentries - 1;
1857  i2 = nentries - 1;
1858  rightLower = intervalsLower[i1].upper;
1859  leftUpper = intervalsUpper[i2].upper;
1860  while (true)
1861  {
1862  /*
1863  * Find next upper bound of left group.
1864  */
1865  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1866  isnan(intervalsUpper[i2].upper)))
1867  {
1868  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1869  i2--;
1870  }
1871  if (i2 < 0)
1872  break;
1873  leftUpper = intervalsUpper[i2].upper;
1874 
1875  /*
1876  * Find count of intervals which anyway should be placed to the
1877  * right group.
1878  */
1879  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1880  i1--;
1881 
1882  /*
1883  * Consider found split.
1884  */
1885  g_box_consider_split(&context, dim,
1886  rightLower, i1 + 1, leftUpper, i2 + 1);
1887  }
1888  }
1889 
1890  /*
1891  * If we failed to find any acceptable splits, use trivial split.
1892  */
1893  if (context.first)
1894  {
1895  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1896  fallbackSplit(entryvec, v);
1897  PG_RETURN_POINTER(v);
1898  }
1899 
1900  /*
1901  * Ok, we have now selected the split across one axis.
1902  *
1903  * While considering the splits, we already determined that there will be
1904  * enough entries in both groups to reach the desired ratio, but we did
1905  * not memorize which entries go to which group. So determine that now.
1906  */
1907 
1908  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1909 
1910  /* Allocate vectors for results */
1911  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1912  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1913  v->spl_nleft = 0;
1914  v->spl_nright = 0;
1915 
1916  /* Allocate bounding boxes of left and right groups */
1917  leftBox = palloc0(sizeof(BOX2DF));
1918  rightBox = palloc0(sizeof(BOX2DF));
1919 
1920  /*
1921  * Allocate an array for "common entries" - entries which can be placed to
1922  * either group without affecting overlap along selected axis.
1923  */
1924  commonEntriesCount = 0;
1925  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1926 
1927  /* Helper macros to place an entry in the left or right group */
1928 #define PLACE_LEFT(box, off) \
1929  do { \
1930  if (v->spl_nleft > 0) \
1931  adjustBox(leftBox, box); \
1932  else \
1933  *leftBox = *(box); \
1934  v->spl_left[v->spl_nleft++] = off; \
1935  } while(0)
1936 
1937 #define PLACE_RIGHT(box, off) \
1938  do { \
1939  if (v->spl_nright > 0) \
1940  adjustBox(rightBox, box); \
1941  else \
1942  *rightBox = *(box); \
1943  v->spl_right[v->spl_nright++] = off; \
1944  } while(0)
1945 
1946  /*
1947  * Distribute entries which can be distributed unambiguously, and collect
1948  * common entries.
1949  */
1950  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1951  {
1952  float lower,
1953  upper;
1954 
1955  /*
1956  * Get upper and lower bounds along selected axis.
1957  */
1958  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1959  if (context.dim == 0)
1960  {
1961  lower = box->xmin;
1962  upper = box->xmax;
1963  }
1964  else
1965  {
1966  lower = box->ymin;
1967  upper = box->ymax;
1968  }
1969 
1970  if (upper <= context.leftUpper || isnan(upper))
1971  {
1972  /* Fits to the left group */
1973  if (lower >= context.rightLower || isnan(lower))
1974  {
1975  /* Fits also to the right group, so "common entry" */
1976  commonEntries[commonEntriesCount++].index = i;
1977  }
1978  else
1979  {
1980  /* Doesn't fit to the right group, so join to the left group */
1981  PLACE_LEFT(box, i);
1982  }
1983  }
1984  else
1985  {
1986  /*
1987  * Each entry should fit on either left or right group. Since this
1988  * entry didn't fit on the left group, it better fit in the right
1989  * group.
1990  */
1991  Assert(lower >= context.rightLower);
1992 
1993  /* Doesn't fit to the left group, so join to the right group */
1994  PLACE_RIGHT(box, i);
1995  }
1996  }
1997 
1998  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
1999  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2000 
2001  /*
2002  * Distribute "common entries", if any.
2003  */
2004  if (commonEntriesCount > 0)
2005  {
2006  /*
2007  * Calculate minimum number of entries that must be placed in both
2008  * groups, to reach LIMIT_RATIO.
2009  */
2010  int m = ceil(LIMIT_RATIO * (double) nentries);
2011 
2012  /*
2013  * Calculate delta between penalties of join "common entries" to
2014  * different groups.
2015  */
2016  for (i = 0; i < commonEntriesCount; i++)
2017  {
2018  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2019  commonEntries[i].index].key);
2020  commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
2021  box_penalty(rightBox, box));
2022  }
2023 
2024  /*
2025  * Sort "common entries" by calculated deltas in order to distribute
2026  * the most ambiguous entries first.
2027  */
2028  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2029 
2030  /*
2031  * Distribute "common entries" between groups.
2032  */
2033  for (i = 0; i < commonEntriesCount; i++)
2034  {
2035  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2036  commonEntries[i].index].key);
2037 
2038  /*
2039  * Check if we have to place this entry in either group to achieve
2040  * LIMIT_RATIO.
2041  */
2042  if (v->spl_nleft + (commonEntriesCount - i) <= m)
2043  PLACE_LEFT(box, commonEntries[i].index);
2044  else if (v->spl_nright + (commonEntriesCount - i) <= m)
2045  PLACE_RIGHT(box, commonEntries[i].index);
2046  else
2047  {
2048  /* Otherwise select the group by minimal penalty */
2049  if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
2050  PLACE_LEFT(box, commonEntries[i].index);
2051  else
2052  PLACE_RIGHT(box, commonEntries[i].index);
2053  }
2054  }
2055  }
2056  v->spl_ldatum = PointerGetDatum(leftBox);
2057  v->spl_rdatum = PointerGetDatum(rightBox);
2058 
2059  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2060 
2061  PG_RETURN_POINTER(v);
2062 }
#define LIMIT_RATIO
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)
static char * box2df_to_string(const BOX2DF *a)
static int interval_cmp_upper(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_RIGHT(box, off)
#define PLACE_LEFT(box, off)
static float box_penalty(BOX2DF *original, BOX2DF *new)
Here is the call graph for this function:
Here is the caller graph for this function: