PostGIS  2.4.9dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1798 of file gserialized_gist_2d.c.

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_in(), box2df_size(), box2df_to_string(), box_penalty(), common_entry_cmp(), rtgdalraster::cur, CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), FALSE, ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PG_FUNCTION_INFO_V1(), PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Referenced by common_entry_cmp().

1799 {
1800  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1801  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1802  OffsetNumber i,
1803  maxoff;
1804  ConsiderSplitContext context;
1805  BOX2DF *box,
1806  *leftBox,
1807  *rightBox;
1808  int dim,
1809  commonEntriesCount;
1810  SplitInterval *intervalsLower,
1811  *intervalsUpper;
1812  CommonEntry *commonEntries;
1813  int nentries;
1814 
1815  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1816 
1817  memset(&context, 0, sizeof(ConsiderSplitContext));
1818 
1819  maxoff = entryvec->n - 1;
1820  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1821 
1822  /* Allocate arrays for intervals along axes */
1823  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1824  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1825 
1826  /*
1827  * Calculate the overall minimum bounding box over all the entries.
1828  */
1829  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1830  {
1831  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1832  if (i == FirstOffsetNumber)
1833  context.boundingBox = *box;
1834  else
1835  adjustBox(&context.boundingBox, box);
1836  }
1837 
1838  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1839  &context.boundingBox));
1840 
1841  /*
1842  * Iterate over axes for optimal split searching.
1843  */
1844  context.first = true; /* nothing selected yet */
1845  for (dim = 0; dim < 2; dim++)
1846  {
1847  float leftUpper,
1848  rightLower;
1849  int i1,
1850  i2;
1851 
1852  /* Project each entry as an interval on the selected axis. */
1853  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1854  {
1855  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1856  if (dim == 0)
1857  {
1858  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1859  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1860  }
1861  else
1862  {
1863  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1864  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1865  }
1866  }
1867 
1868  /*
1869  * Make two arrays of intervals: one sorted by lower bound and another
1870  * sorted by upper bound.
1871  */
1872  memcpy(intervalsUpper, intervalsLower,
1873  sizeof(SplitInterval) * nentries);
1874  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1876  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1878 
1879  /*----
1880  * The goal is to form a left and right interval, so that every entry
1881  * interval is contained by either left or right interval (or both).
1882  *
1883  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1884  *
1885  * 0 1 2 3 4
1886  * +-+
1887  * +---+
1888  * +-+
1889  * +---+
1890  *
1891  * The left and right intervals are of the form (0,a) and (b,4).
1892  * We first consider splits where b is the lower bound of an entry.
1893  * We iterate through all entries, and for each b, calculate the
1894  * smallest possible a. Then we consider splits where a is the
1895  * uppper bound of an entry, and for each a, calculate the greatest
1896  * possible b.
1897  *
1898  * In the above example, the first loop would consider splits:
1899  * b=0: (0,1)-(0,4)
1900  * b=1: (0,1)-(1,4)
1901  * b=2: (0,3)-(2,4)
1902  *
1903  * And the second loop:
1904  * a=1: (0,1)-(1,4)
1905  * a=3: (0,3)-(2,4)
1906  * a=4: (0,4)-(2,4)
1907  */
1908 
1909  /*
1910  * Iterate over lower bound of right group, finding smallest possible
1911  * upper bound of left group.
1912  */
1913  i1 = 0;
1914  i2 = 0;
1915  rightLower = intervalsLower[i1].lower;
1916  leftUpper = intervalsUpper[i2].lower;
1917  while (true)
1918  {
1919  /*
1920  * Find next lower bound of right group.
1921  */
1922  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1923  isnan(intervalsLower[i1].lower)))
1924  {
1925  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1926  i1++;
1927  }
1928  if (i1 >= nentries)
1929  break;
1930  rightLower = intervalsLower[i1].lower;
1931 
1932  /*
1933  * Find count of intervals which anyway should be placed to the
1934  * left group.
1935  */
1936  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1937  i2++;
1938 
1939  /*
1940  * Consider found split.
1941  */
1942  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1943  }
1944 
1945  /*
1946  * Iterate over upper bound of left group finding greates possible
1947  * lower bound of right group.
1948  */
1949  i1 = nentries - 1;
1950  i2 = nentries - 1;
1951  rightLower = intervalsLower[i1].upper;
1952  leftUpper = intervalsUpper[i2].upper;
1953  while (true)
1954  {
1955  /*
1956  * Find next upper bound of left group.
1957  */
1958  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1959  isnan(intervalsUpper[i2].upper)))
1960  {
1961  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1962  i2--;
1963  }
1964  if (i2 < 0)
1965  break;
1966  leftUpper = intervalsUpper[i2].upper;
1967 
1968  /*
1969  * Find count of intervals which anyway should be placed to the
1970  * right group.
1971  */
1972  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1973  i1--;
1974 
1975  /*
1976  * Consider found split.
1977  */
1978  g_box_consider_split(&context, dim,
1979  rightLower, i1 + 1, leftUpper, i2 + 1);
1980  }
1981  }
1982 
1983  /*
1984  * If we failed to find any acceptable splits, use trivial split.
1985  */
1986  if (context.first)
1987  {
1988  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1989  fallbackSplit(entryvec, v);
1990  PG_RETURN_POINTER(v);
1991  }
1992 
1993  /*
1994  * Ok, we have now selected the split across one axis.
1995  *
1996  * While considering the splits, we already determined that there will be
1997  * enough entries in both groups to reach the desired ratio, but we did
1998  * not memorize which entries go to which group. So determine that now.
1999  */
2000 
2001  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
2002 
2003  /* Allocate vectors for results */
2004  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
2005  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
2006  v->spl_nleft = 0;
2007  v->spl_nright = 0;
2008 
2009  /* Allocate bounding boxes of left and right groups */
2010  leftBox = palloc0(sizeof(BOX2DF));
2011  rightBox = palloc0(sizeof(BOX2DF));
2012 
2013  /*
2014  * Allocate an array for "common entries" - entries which can be placed to
2015  * either group without affecting overlap along selected axis.
2016  */
2017  commonEntriesCount = 0;
2018  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
2019 
2020  /* Helper macros to place an entry in the left or right group */
2021 #define PLACE_LEFT(box, off) \
2022  do { \
2023  if (v->spl_nleft > 0) \
2024  adjustBox(leftBox, box); \
2025  else \
2026  *leftBox = *(box); \
2027  v->spl_left[v->spl_nleft++] = off; \
2028  } while(0)
2029 
2030 #define PLACE_RIGHT(box, off) \
2031  do { \
2032  if (v->spl_nright > 0) \
2033  adjustBox(rightBox, box); \
2034  else \
2035  *rightBox = *(box); \
2036  v->spl_right[v->spl_nright++] = off; \
2037  } while(0)
2038 
2039  /*
2040  * Distribute entries which can be distributed unambiguously, and collect
2041  * common entries.
2042  */
2043  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
2044  {
2045  float lower,
2046  upper;
2047 
2048  /*
2049  * Get upper and lower bounds along selected axis.
2050  */
2051  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
2052  if (context.dim == 0)
2053  {
2054  lower = box->xmin;
2055  upper = box->xmax;
2056  }
2057  else
2058  {
2059  lower = box->ymin;
2060  upper = box->ymax;
2061  }
2062 
2063  if (upper <= context.leftUpper || isnan(upper))
2064  {
2065  /* Fits to the left group */
2066  if (lower >= context.rightLower || isnan(lower))
2067  {
2068  /* Fits also to the right group, so "common entry" */
2069  commonEntries[commonEntriesCount++].index = i;
2070  }
2071  else
2072  {
2073  /* Doesn't fit to the right group, so join to the left group */
2074  PLACE_LEFT(box, i);
2075  }
2076  }
2077  else
2078  {
2079  /*
2080  * Each entry should fit on either left or right group. Since this
2081  * entry didn't fit on the left group, it better fit in the right
2082  * group.
2083  */
2084  Assert(lower >= context.rightLower);
2085 
2086  /* Doesn't fit to the left group, so join to the right group */
2087  PLACE_RIGHT(box, i);
2088  }
2089  }
2090 
2091  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2092  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2093 
2094  /*
2095  * Distribute "common entries", if any.
2096  */
2097  if (commonEntriesCount > 0)
2098  {
2099  /*
2100  * Calculate minimum number of entries that must be placed in both
2101  * groups, to reach LIMIT_RATIO.
2102  */
2103  int m = ceil(LIMIT_RATIO * (double) nentries);
2104 
2105  /*
2106  * Calculate delta between penalties of join "common entries" to
2107  * different groups.
2108  */
2109  for (i = 0; i < commonEntriesCount; i++)
2110  {
2111  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2112  commonEntries[i].index].key);
2113  commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
2114  box_penalty(rightBox, box));
2115  }
2116 
2117  /*
2118  * Sort "common entries" by calculated deltas in order to distribute
2119  * the most ambiguous entries first.
2120  */
2121  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2122 
2123  /*
2124  * Distribute "common entries" between groups.
2125  */
2126  for (i = 0; i < commonEntriesCount; i++)
2127  {
2128  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2129  commonEntries[i].index].key);
2130 
2131  /*
2132  * Check if we have to place this entry in either group to achieve
2133  * LIMIT_RATIO.
2134  */
2135  if (v->spl_nleft + (commonEntriesCount - i) <= m)
2136  PLACE_LEFT(box, commonEntries[i].index);
2137  else if (v->spl_nright + (commonEntriesCount - i) <= m)
2138  PLACE_RIGHT(box, commonEntries[i].index);
2139  else
2140  {
2141  /* Otherwise select the group by minimal penalty */
2142  if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
2143  PLACE_LEFT(box, commonEntries[i].index);
2144  else
2145  PLACE_RIGHT(box, commonEntries[i].index);
2146  }
2147  }
2148  }
2149  v->spl_ldatum = PointerGetDatum(leftBox);
2150  v->spl_rdatum = PointerGetDatum(rightBox);
2151 
2152  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2153 
2154  PG_RETURN_POINTER(v);
2155 }
#define LIMIT_RATIO
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)
static char * box2df_to_string(const BOX2DF *a)
static int interval_cmp_upper(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_RIGHT(box, off)
#define PLACE_LEFT(box, off)
static float box_penalty(BOX2DF *original, BOX2DF *new)
Here is the call graph for this function:
Here is the caller graph for this function: