PostGIS  2.5.1dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit_2d()

Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1853 of file gserialized_gist_2d.c.

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_in(), box2df_size(), box2df_to_string(), box_penalty(), common_entry_cmp(), rtgdalraster::cur, CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PG_FUNCTION_INFO_V1(), PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

Referenced by common_entry_cmp().

1854 {
1855  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1856  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1857  OffsetNumber i,
1858  maxoff;
1859  ConsiderSplitContext context;
1860  BOX2DF *box,
1861  *leftBox,
1862  *rightBox;
1863  int dim,
1864  commonEntriesCount;
1865  SplitInterval *intervalsLower,
1866  *intervalsUpper;
1867  CommonEntry *commonEntries;
1868  int nentries;
1869 
1870  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1871 
1872  memset(&context, 0, sizeof(ConsiderSplitContext));
1873 
1874  maxoff = entryvec->n - 1;
1875  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1876 
1877  /* Allocate arrays for intervals along axes */
1878  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1879  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1880 
1881  /*
1882  * Calculate the overall minimum bounding box over all the entries.
1883  */
1884  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1885  {
1886  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1887  if (i == FirstOffsetNumber)
1888  context.boundingBox = *box;
1889  else
1890  adjustBox(&context.boundingBox, box);
1891  }
1892 
1893  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1894  &context.boundingBox));
1895 
1896  /*
1897  * Iterate over axes for optimal split searching.
1898  */
1899  context.first = true; /* nothing selected yet */
1900  for (dim = 0; dim < 2; dim++)
1901  {
1902  float leftUpper,
1903  rightLower;
1904  int i1,
1905  i2;
1906 
1907  /* Project each entry as an interval on the selected axis. */
1908  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1909  {
1910  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1911  if (dim == 0)
1912  {
1913  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1914  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1915  }
1916  else
1917  {
1918  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1919  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1920  }
1921  }
1922 
1923  /*
1924  * Make two arrays of intervals: one sorted by lower bound and another
1925  * sorted by upper bound.
1926  */
1927  memcpy(intervalsUpper, intervalsLower,
1928  sizeof(SplitInterval) * nentries);
1929  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1931  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1933 
1934  /*----
1935  * The goal is to form a left and right interval, so that every entry
1936  * interval is contained by either left or right interval (or both).
1937  *
1938  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1939  *
1940  * 0 1 2 3 4
1941  * +-+
1942  * +---+
1943  * +-+
1944  * +---+
1945  *
1946  * The left and right intervals are of the form (0,a) and (b,4).
1947  * We first consider splits where b is the lower bound of an entry.
1948  * We iterate through all entries, and for each b, calculate the
1949  * smallest possible a. Then we consider splits where a is the
1950  * upper bound of an entry, and for each a, calculate the greatest
1951  * possible b.
1952  *
1953  * In the above example, the first loop would consider splits:
1954  * b=0: (0,1)-(0,4)
1955  * b=1: (0,1)-(1,4)
1956  * b=2: (0,3)-(2,4)
1957  *
1958  * And the second loop:
1959  * a=1: (0,1)-(1,4)
1960  * a=3: (0,3)-(2,4)
1961  * a=4: (0,4)-(2,4)
1962  */
1963 
1964  /*
1965  * Iterate over lower bound of right group, finding smallest possible
1966  * upper bound of left group.
1967  */
1968  i1 = 0;
1969  i2 = 0;
1970  rightLower = intervalsLower[i1].lower;
1971  leftUpper = intervalsUpper[i2].lower;
1972  while (true)
1973  {
1974  /*
1975  * Find next lower bound of right group.
1976  */
1977  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1978  isnan(intervalsLower[i1].lower)))
1979  {
1980  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1981  i1++;
1982  }
1983  if (i1 >= nentries)
1984  break;
1985  rightLower = intervalsLower[i1].lower;
1986 
1987  /*
1988  * Find count of intervals which anyway should be placed to the
1989  * left group.
1990  */
1991  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1992  i2++;
1993 
1994  /*
1995  * Consider found split.
1996  */
1997  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1998  }
1999 
2000  /*
2001  * Iterate over upper bound of left group finding greatest possible
2002  * lower bound of right group.
2003  */
2004  i1 = nentries - 1;
2005  i2 = nentries - 1;
2006  rightLower = intervalsLower[i1].upper;
2007  leftUpper = intervalsUpper[i2].upper;
2008  while (true)
2009  {
2010  /*
2011  * Find next upper bound of left group.
2012  */
2013  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
2014  isnan(intervalsUpper[i2].upper)))
2015  {
2016  rightLower = Min(rightLower, intervalsUpper[i2].lower);
2017  i2--;
2018  }
2019  if (i2 < 0)
2020  break;
2021  leftUpper = intervalsUpper[i2].upper;
2022 
2023  /*
2024  * Find count of intervals which anyway should be placed to the
2025  * right group.
2026  */
2027  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
2028  i1--;
2029 
2030  /*
2031  * Consider found split.
2032  */
2033  g_box_consider_split(&context, dim,
2034  rightLower, i1 + 1, leftUpper, i2 + 1);
2035  }
2036  }
2037 
2038  /*
2039  * If we failed to find any acceptable splits, use trivial split.
2040  */
2041  if (context.first)
2042  {
2043  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
2044  fallbackSplit(entryvec, v);
2045  PG_RETURN_POINTER(v);
2046  }
2047 
2048  /*
2049  * Ok, we have now selected the split across one axis.
2050  *
2051  * While considering the splits, we already determined that there will be
2052  * enough entries in both groups to reach the desired ratio, but we did
2053  * not memorize which entries go to which group. So determine that now.
2054  */
2055 
2056  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
2057 
2058  /* Allocate vectors for results */
2059  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
2060  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
2061  v->spl_nleft = 0;
2062  v->spl_nright = 0;
2063 
2064  /* Allocate bounding boxes of left and right groups */
2065  leftBox = palloc0(sizeof(BOX2DF));
2066  rightBox = palloc0(sizeof(BOX2DF));
2067 
2068  /*
2069  * Allocate an array for "common entries" - entries which can be placed to
2070  * either group without affecting overlap along selected axis.
2071  */
2072  commonEntriesCount = 0;
2073  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
2074 
2075  /* Helper macros to place an entry in the left or right group */
2076 #define PLACE_LEFT(box, off) \
2077  do { \
2078  if (v->spl_nleft > 0) \
2079  adjustBox(leftBox, box); \
2080  else \
2081  *leftBox = *(box); \
2082  v->spl_left[v->spl_nleft++] = off; \
2083  } while(0)
2084 
2085 #define PLACE_RIGHT(box, off) \
2086  do { \
2087  if (v->spl_nright > 0) \
2088  adjustBox(rightBox, box); \
2089  else \
2090  *rightBox = *(box); \
2091  v->spl_right[v->spl_nright++] = off; \
2092  } while(0)
2093 
2094  /*
2095  * Distribute entries which can be distributed unambiguously, and collect
2096  * common entries.
2097  */
2098  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
2099  {
2100  float lower,
2101  upper;
2102 
2103  /*
2104  * Get upper and lower bounds along selected axis.
2105  */
2106  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
2107  if (context.dim == 0)
2108  {
2109  lower = box->xmin;
2110  upper = box->xmax;
2111  }
2112  else
2113  {
2114  lower = box->ymin;
2115  upper = box->ymax;
2116  }
2117 
2118  if (upper <= context.leftUpper || isnan(upper))
2119  {
2120  /* Fits to the left group */
2121  if (lower >= context.rightLower || isnan(lower))
2122  {
2123  /* Fits also to the right group, so "common entry" */
2124  commonEntries[commonEntriesCount++].index = i;
2125  }
2126  else
2127  {
2128  /* Doesn't fit to the right group, so join to the left group */
2129  PLACE_LEFT(box, i);
2130  }
2131  }
2132  else
2133  {
2134  /*
2135  * Each entry should fit on either left or right group. Since this
2136  * entry didn't fit on the left group, it better fit in the right
2137  * group.
2138  */
2139  Assert(lower >= context.rightLower);
2140 
2141  /* Doesn't fit to the left group, so join to the right group */
2142  PLACE_RIGHT(box, i);
2143  }
2144  }
2145 
2146  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
2147  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
2148 
2149  /*
2150  * Distribute "common entries", if any.
2151  */
2152  if (commonEntriesCount > 0)
2153  {
2154  /*
2155  * Calculate minimum number of entries that must be placed in both
2156  * groups, to reach LIMIT_RATIO.
2157  */
2158  int m = ceil(LIMIT_RATIO * (double) nentries);
2159 
2160  /*
2161  * Calculate delta between penalties of join "common entries" to
2162  * different groups.
2163  */
2164  for (i = 0; i < commonEntriesCount; i++)
2165  {
2166  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2167  commonEntries[i].index].key);
2168  commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
2169  box_penalty(rightBox, box));
2170  }
2171 
2172  /*
2173  * Sort "common entries" by calculated deltas in order to distribute
2174  * the most ambiguous entries first.
2175  */
2176  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
2177 
2178  /*
2179  * Distribute "common entries" between groups.
2180  */
2181  for (i = 0; i < commonEntriesCount; i++)
2182  {
2183  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
2184  commonEntries[i].index].key);
2185 
2186  /*
2187  * Check if we have to place this entry in either group to achieve
2188  * LIMIT_RATIO.
2189  */
2190  if (v->spl_nleft + (commonEntriesCount - i) <= m)
2191  PLACE_LEFT(box, commonEntries[i].index);
2192  else if (v->spl_nright + (commonEntriesCount - i) <= m)
2193  PLACE_RIGHT(box, commonEntries[i].index);
2194  else
2195  {
2196  /* Otherwise select the group by minimal penalty */
2197  if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
2198  PLACE_LEFT(box, commonEntries[i].index);
2199  else
2200  PLACE_RIGHT(box, commonEntries[i].index);
2201  }
2202  }
2203  }
2204  v->spl_ldatum = PointerGetDatum(leftBox);
2205  v->spl_rdatum = PointerGetDatum(rightBox);
2206 
2207  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
2208 
2209  PG_RETURN_POINTER(v);
2210 }
#define LIMIT_RATIO
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)
static char * box2df_to_string(const BOX2DF *a)
static int interval_cmp_upper(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_RIGHT(box, off)
#define PLACE_LEFT(box, off)
static float box_penalty(BOX2DF *original, BOX2DF *new)
Here is the call graph for this function:
Here is the caller graph for this function: