PostGIS  2.2.7dev-r@@SVN_REVISION@@
Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1600 of file gserialized_gist_2d.c.

References adjustBox(), ConsiderSplitContext::boundingBox, box2df_to_string(), box_penalty(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

1601 {
1602  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1603  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1604  OffsetNumber i,
1605  maxoff;
1606  ConsiderSplitContext context;
1607  BOX2DF *box,
1608  *leftBox,
1609  *rightBox;
1610  int dim,
1611  commonEntriesCount;
1612  SplitInterval *intervalsLower,
1613  *intervalsUpper;
1614  CommonEntry *commonEntries;
1615  int nentries;
1616 
1617  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1618 
1619  memset(&context, 0, sizeof(ConsiderSplitContext));
1620 
1621  maxoff = entryvec->n - 1;
1622  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1623 
1624  /* Allocate arrays for intervals along axes */
1625  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1626  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1627 
1628  /*
1629  * Calculate the overall minimum bounding box over all the entries.
1630  */
1631  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1632  {
1633  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1634  if (i == FirstOffsetNumber)
1635  context.boundingBox = *box;
1636  else
1637  adjustBox(&context.boundingBox, box);
1638  }
1639 
1640  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1641  &context.boundingBox));
1642 
1643  /*
1644  * Iterate over axes for optimal split searching.
1645  */
1646  context.first = true; /* nothing selected yet */
1647  for (dim = 0; dim < 2; dim++)
1648  {
1649  float leftUpper,
1650  rightLower;
1651  int i1,
1652  i2;
1653 
1654  /* Project each entry as an interval on the selected axis. */
1655  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1656  {
1657  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1658  if (dim == 0)
1659  {
1660  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1661  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1662  }
1663  else
1664  {
1665  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1666  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1667  }
1668  }
1669 
1670  /*
1671  * Make two arrays of intervals: one sorted by lower bound and another
1672  * sorted by upper bound.
1673  */
1674  memcpy(intervalsUpper, intervalsLower,
1675  sizeof(SplitInterval) * nentries);
1676  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1678  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1680 
1681  /*----
1682  * The goal is to form a left and right interval, so that every entry
1683  * interval is contained by either left or right interval (or both).
1684  *
1685  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1686  *
1687  * 0 1 2 3 4
1688  * +-+
1689  * +---+
1690  * +-+
1691  * +---+
1692  *
1693  * The left and right intervals are of the form (0,a) and (b,4).
1694  * We first consider splits where b is the lower bound of an entry.
1695  * We iterate through all entries, and for each b, calculate the
1696  * smallest possible a. Then we consider splits where a is the
1697  * uppper bound of an entry, and for each a, calculate the greatest
1698  * possible b.
1699  *
1700  * In the above example, the first loop would consider splits:
1701  * b=0: (0,1)-(0,4)
1702  * b=1: (0,1)-(1,4)
1703  * b=2: (0,3)-(2,4)
1704  *
1705  * And the second loop:
1706  * a=1: (0,1)-(1,4)
1707  * a=3: (0,3)-(2,4)
1708  * a=4: (0,4)-(2,4)
1709  */
1710 
1711  /*
1712  * Iterate over lower bound of right group, finding smallest possible
1713  * upper bound of left group.
1714  */
1715  i1 = 0;
1716  i2 = 0;
1717  rightLower = intervalsLower[i1].lower;
1718  leftUpper = intervalsUpper[i2].lower;
1719  while (true)
1720  {
1721  /*
1722  * Find next lower bound of right group.
1723  */
1724  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1725  isnan(intervalsLower[i1].lower)))
1726  {
1727  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1728  i1++;
1729  }
1730  if (i1 >= nentries)
1731  break;
1732  rightLower = intervalsLower[i1].lower;
1733 
1734  /*
1735  * Find count of intervals which anyway should be placed to the
1736  * left group.
1737  */
1738  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1739  i2++;
1740 
1741  /*
1742  * Consider found split.
1743  */
1744  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1745  }
1746 
1747  /*
1748  * Iterate over upper bound of left group finding greates possible
1749  * lower bound of right group.
1750  */
1751  i1 = nentries - 1;
1752  i2 = nentries - 1;
1753  rightLower = intervalsLower[i1].upper;
1754  leftUpper = intervalsUpper[i2].upper;
1755  while (true)
1756  {
1757  /*
1758  * Find next upper bound of left group.
1759  */
1760  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1761  isnan(intervalsUpper[i2].upper)))
1762  {
1763  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1764  i2--;
1765  }
1766  if (i2 < 0)
1767  break;
1768  leftUpper = intervalsUpper[i2].upper;
1769 
1770  /*
1771  * Find count of intervals which anyway should be placed to the
1772  * right group.
1773  */
1774  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1775  i1--;
1776 
1777  /*
1778  * Consider found split.
1779  */
1780  g_box_consider_split(&context, dim,
1781  rightLower, i1 + 1, leftUpper, i2 + 1);
1782  }
1783  }
1784 
1785  /*
1786  * If we failed to find any acceptable splits, use trivial split.
1787  */
1788  if (context.first)
1789  {
1790  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1791  fallbackSplit(entryvec, v);
1792  PG_RETURN_POINTER(v);
1793  }
1794 
1795  /*
1796  * Ok, we have now selected the split across one axis.
1797  *
1798  * While considering the splits, we already determined that there will be
1799  * enough entries in both groups to reach the desired ratio, but we did
1800  * not memorize which entries go to which group. So determine that now.
1801  */
1802 
1803  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1804 
1805  /* Allocate vectors for results */
1806  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1807  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1808  v->spl_nleft = 0;
1809  v->spl_nright = 0;
1810 
1811  /* Allocate bounding boxes of left and right groups */
1812  leftBox = palloc0(sizeof(BOX2DF));
1813  rightBox = palloc0(sizeof(BOX2DF));
1814 
1815  /*
1816  * Allocate an array for "common entries" - entries which can be placed to
1817  * either group without affecting overlap along selected axis.
1818  */
1819  commonEntriesCount = 0;
1820  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1821 
1822  /* Helper macros to place an entry in the left or right group */
1823 #define PLACE_LEFT(box, off) \
1824  do { \
1825  if (v->spl_nleft > 0) \
1826  adjustBox(leftBox, box); \
1827  else \
1828  *leftBox = *(box); \
1829  v->spl_left[v->spl_nleft++] = off; \
1830  } while(0)
1831 
1832 #define PLACE_RIGHT(box, off) \
1833  do { \
1834  if (v->spl_nright > 0) \
1835  adjustBox(rightBox, box); \
1836  else \
1837  *rightBox = *(box); \
1838  v->spl_right[v->spl_nright++] = off; \
1839  } while(0)
1840 
1841  /*
1842  * Distribute entries which can be distributed unambiguously, and collect
1843  * common entries.
1844  */
1845  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1846  {
1847  float lower,
1848  upper;
1849 
1850  /*
1851  * Get upper and lower bounds along selected axis.
1852  */
1853  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1854  if (context.dim == 0)
1855  {
1856  lower = box->xmin;
1857  upper = box->xmax;
1858  }
1859  else
1860  {
1861  lower = box->ymin;
1862  upper = box->ymax;
1863  }
1864 
1865  if (upper <= context.leftUpper || isnan(upper))
1866  {
1867  /* Fits to the left group */
1868  if (lower >= context.rightLower || isnan(lower))
1869  {
1870  /* Fits also to the right group, so "common entry" */
1871  commonEntries[commonEntriesCount++].index = i;
1872  }
1873  else
1874  {
1875  /* Doesn't fit to the right group, so join to the left group */
1876  PLACE_LEFT(box, i);
1877  }
1878  }
1879  else
1880  {
1881  /*
1882  * Each entry should fit on either left or right group. Since this
1883  * entry didn't fit on the left group, it better fit in the right
1884  * group.
1885  */
1886  Assert(lower >= context.rightLower);
1887 
1888  /* Doesn't fit to the left group, so join to the right group */
1889  PLACE_RIGHT(box, i);
1890  }
1891  }
1892 
1893  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
1894  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
1895 
1896  /*
1897  * Distribute "common entries", if any.
1898  */
1899  if (commonEntriesCount > 0)
1900  {
1901  /*
1902  * Calculate minimum number of entries that must be placed in both
1903  * groups, to reach LIMIT_RATIO.
1904  */
1905  int m = ceil(LIMIT_RATIO * (double) nentries);
1906 
1907  /*
1908  * Calculate delta between penalties of join "common entries" to
1909  * different groups.
1910  */
1911  for (i = 0; i < commonEntriesCount; i++)
1912  {
1913  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
1914  commonEntries[i].index].key);
1915  commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
1916  box_penalty(rightBox, box));
1917  }
1918 
1919  /*
1920  * Sort "common entries" by calculated deltas in order to distribute
1921  * the most ambiguous entries first.
1922  */
1923  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
1924 
1925  /*
1926  * Distribute "common entries" between groups.
1927  */
1928  for (i = 0; i < commonEntriesCount; i++)
1929  {
1930  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
1931  commonEntries[i].index].key);
1932 
1933  /*
1934  * Check if we have to place this entry in either group to achieve
1935  * LIMIT_RATIO.
1936  */
1937  if (v->spl_nleft + (commonEntriesCount - i) <= m)
1938  PLACE_LEFT(box, commonEntries[i].index);
1939  else if (v->spl_nright + (commonEntriesCount - i) <= m)
1940  PLACE_RIGHT(box, commonEntries[i].index);
1941  else
1942  {
1943  /* Otherwise select the group by minimal penalty */
1944  if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
1945  PLACE_LEFT(box, commonEntries[i].index);
1946  else
1947  PLACE_RIGHT(box, commonEntries[i].index);
1948  }
1949  }
1950  }
1951  v->spl_ldatum = PointerGetDatum(leftBox);
1952  v->spl_rdatum = PointerGetDatum(rightBox);
1953 
1954  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
1955 
1956  PG_RETURN_POINTER(v);
1957 }
#define LIMIT_RATIO
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)
static char * box2df_to_string(const BOX2DF *a)
static int interval_cmp_upper(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_RIGHT(box, off)
#define PLACE_LEFT(box, off)
static float box_penalty(BOX2DF *original, BOX2DF *new)

Here is the call graph for this function: