PostGIS  2.1.10dev-r@@SVN_REVISION@@
Datum gserialized_gist_picksplit_2d ( PG_FUNCTION_ARGS  )

Definition at line 1574 of file gserialized_gist_2d.c.

References adjustBox(), ConsiderSplitContext::boundingBox, box_penalty(), common_entry_cmp(), CommonEntry::delta, ConsiderSplitContext::dim, ConsiderSplitContext::entriesCount, fallbackSplit(), ConsiderSplitContext::first, g_box_consider_split(), CommonEntry::index, interval_cmp_lower(), interval_cmp_upper(), ConsiderSplitContext::leftUpper, LIMIT_RATIO, SplitInterval::lower, PLACE_LEFT, PLACE_RIGHT, ConsiderSplitContext::rightLower, and SplitInterval::upper.

1575 {
1576  GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
1577  GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
1578  OffsetNumber i,
1579  maxoff;
1580  ConsiderSplitContext context;
1581  BOX2DF *box,
1582  *leftBox,
1583  *rightBox;
1584  int dim,
1585  commonEntriesCount;
1586  SplitInterval *intervalsLower,
1587  *intervalsUpper;
1588  CommonEntry *commonEntries;
1589  int nentries;
1590 
1591  POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
1592 
1593  memset(&context, 0, sizeof(ConsiderSplitContext));
1594 
1595  maxoff = entryvec->n - 1;
1596  nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
1597 
1598  /* Allocate arrays for intervals along axes */
1599  intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1600  intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
1601 
1602  /*
1603  * Calculate the overall minimum bounding box over all the entries.
1604  */
1605  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1606  {
1607  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1608  if (i == FirstOffsetNumber)
1609  context.boundingBox = *box;
1610  else
1611  adjustBox(&context.boundingBox, box);
1612  }
1613 
1614  POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
1615  &context.boundingBox));
1616 
1617  /*
1618  * Iterate over axes for optimal split searching.
1619  */
1620  context.first = true; /* nothing selected yet */
1621  for (dim = 0; dim < 2; dim++)
1622  {
1623  float leftUpper,
1624  rightLower;
1625  int i1,
1626  i2;
1627 
1628  /* Project each entry as an interval on the selected axis. */
1629  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1630  {
1631  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1632  if (dim == 0)
1633  {
1634  intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
1635  intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
1636  }
1637  else
1638  {
1639  intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
1640  intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
1641  }
1642  }
1643 
1644  /*
1645  * Make two arrays of intervals: one sorted by lower bound and another
1646  * sorted by upper bound.
1647  */
1648  memcpy(intervalsUpper, intervalsLower,
1649  sizeof(SplitInterval) * nentries);
1650  qsort(intervalsLower, nentries, sizeof(SplitInterval),
1652  qsort(intervalsUpper, nentries, sizeof(SplitInterval),
1654 
1655  /*----
1656  * The goal is to form a left and right interval, so that every entry
1657  * interval is contained by either left or right interval (or both).
1658  *
1659  * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
1660  *
1661  * 0 1 2 3 4
1662  * +-+
1663  * +---+
1664  * +-+
1665  * +---+
1666  *
1667  * The left and right intervals are of the form (0,a) and (b,4).
1668  * We first consider splits where b is the lower bound of an entry.
1669  * We iterate through all entries, and for each b, calculate the
1670  * smallest possible a. Then we consider splits where a is the
1671  * uppper bound of an entry, and for each a, calculate the greatest
1672  * possible b.
1673  *
1674  * In the above example, the first loop would consider splits:
1675  * b=0: (0,1)-(0,4)
1676  * b=1: (0,1)-(1,4)
1677  * b=2: (0,3)-(2,4)
1678  *
1679  * And the second loop:
1680  * a=1: (0,1)-(1,4)
1681  * a=3: (0,3)-(2,4)
1682  * a=4: (0,4)-(2,4)
1683  */
1684 
1685  /*
1686  * Iterate over lower bound of right group, finding smallest possible
1687  * upper bound of left group.
1688  */
1689  i1 = 0;
1690  i2 = 0;
1691  rightLower = intervalsLower[i1].lower;
1692  leftUpper = intervalsUpper[i2].lower;
1693  while (true)
1694  {
1695  /*
1696  * Find next lower bound of right group.
1697  */
1698  while (i1 < nentries && (rightLower == intervalsLower[i1].lower ||
1699  isnan(intervalsLower[i1].lower)))
1700  {
1701  leftUpper = Max(leftUpper, intervalsLower[i1].upper);
1702  i1++;
1703  }
1704  if (i1 >= nentries)
1705  break;
1706  rightLower = intervalsLower[i1].lower;
1707 
1708  /*
1709  * Find count of intervals which anyway should be placed to the
1710  * left group.
1711  */
1712  while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
1713  i2++;
1714 
1715  /*
1716  * Consider found split.
1717  */
1718  g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
1719  }
1720 
1721  /*
1722  * Iterate over upper bound of left group finding greates possible
1723  * lower bound of right group.
1724  */
1725  i1 = nentries - 1;
1726  i2 = nentries - 1;
1727  rightLower = intervalsLower[i1].upper;
1728  leftUpper = intervalsUpper[i2].upper;
1729  while (true)
1730  {
1731  /*
1732  * Find next upper bound of left group.
1733  */
1734  while (i2 >= 0 && (leftUpper == intervalsUpper[i2].upper ||
1735  isnan(intervalsUpper[i2].upper)))
1736  {
1737  rightLower = Min(rightLower, intervalsUpper[i2].lower);
1738  i2--;
1739  }
1740  if (i2 < 0)
1741  break;
1742  leftUpper = intervalsUpper[i2].upper;
1743 
1744  /*
1745  * Find count of intervals which anyway should be placed to the
1746  * right group.
1747  */
1748  while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
1749  i1--;
1750 
1751  /*
1752  * Consider found split.
1753  */
1754  g_box_consider_split(&context, dim,
1755  rightLower, i1 + 1, leftUpper, i2 + 1);
1756  }
1757  }
1758 
1759  /*
1760  * If we failed to find any acceptable splits, use trivial split.
1761  */
1762  if (context.first)
1763  {
1764  POSTGIS_DEBUG(4, "no acceptable splits, trivial split");
1765  fallbackSplit(entryvec, v);
1766  PG_RETURN_POINTER(v);
1767  }
1768 
1769  /*
1770  * Ok, we have now selected the split across one axis.
1771  *
1772  * While considering the splits, we already determined that there will be
1773  * enough entries in both groups to reach the desired ratio, but we did
1774  * not memorize which entries go to which group. So determine that now.
1775  */
1776 
1777  POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
1778 
1779  /* Allocate vectors for results */
1780  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1781  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1782  v->spl_nleft = 0;
1783  v->spl_nright = 0;
1784 
1785  /* Allocate bounding boxes of left and right groups */
1786  leftBox = palloc0(sizeof(BOX2DF));
1787  rightBox = palloc0(sizeof(BOX2DF));
1788 
1789  /*
1790  * Allocate an array for "common entries" - entries which can be placed to
1791  * either group without affecting overlap along selected axis.
1792  */
1793  commonEntriesCount = 0;
1794  commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1795 
1796  /* Helper macros to place an entry in the left or right group */
1797 #define PLACE_LEFT(box, off) \
1798  do { \
1799  if (v->spl_nleft > 0) \
1800  adjustBox(leftBox, box); \
1801  else \
1802  *leftBox = *(box); \
1803  v->spl_left[v->spl_nleft++] = off; \
1804  } while(0)
1805 
1806 #define PLACE_RIGHT(box, off) \
1807  do { \
1808  if (v->spl_nright > 0) \
1809  adjustBox(rightBox, box); \
1810  else \
1811  *rightBox = *(box); \
1812  v->spl_right[v->spl_nright++] = off; \
1813  } while(0)
1814 
1815  /*
1816  * Distribute entries which can be distributed unambiguously, and collect
1817  * common entries.
1818  */
1819  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1820  {
1821  float lower,
1822  upper;
1823 
1824  /*
1825  * Get upper and lower bounds along selected axis.
1826  */
1827  box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
1828  if (context.dim == 0)
1829  {
1830  lower = box->xmin;
1831  upper = box->xmax;
1832  }
1833  else
1834  {
1835  lower = box->ymin;
1836  upper = box->ymax;
1837  }
1838 
1839  if (upper <= context.leftUpper || isnan(upper))
1840  {
1841  /* Fits to the left group */
1842  if (lower >= context.rightLower || isnan(lower))
1843  {
1844  /* Fits also to the right group, so "common entry" */
1845  commonEntries[commonEntriesCount++].index = i;
1846  }
1847  else
1848  {
1849  /* Doesn't fit to the right group, so join to the left group */
1850  PLACE_LEFT(box, i);
1851  }
1852  }
1853  else
1854  {
1855  /*
1856  * Each entry should fit on either left or right group. Since this
1857  * entry didn't fit on the left group, it better fit in the right
1858  * group.
1859  */
1860  Assert(lower >= context.rightLower);
1861 
1862  /* Doesn't fit to the left group, so join to the right group */
1863  PLACE_RIGHT(box, i);
1864  }
1865  }
1866 
1867  POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
1868  POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
1869 
1870  /*
1871  * Distribute "common entries", if any.
1872  */
1873  if (commonEntriesCount > 0)
1874  {
1875  /*
1876  * Calculate minimum number of entries that must be placed in both
1877  * groups, to reach LIMIT_RATIO.
1878  */
1879  int m = ceil(LIMIT_RATIO * (double) nentries);
1880 
1881  /*
1882  * Calculate delta between penalties of join "common entries" to
1883  * different groups.
1884  */
1885  for (i = 0; i < commonEntriesCount; i++)
1886  {
1887  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
1888  commonEntries[i].index].key);
1889  commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
1890  box_penalty(rightBox, box));
1891  }
1892 
1893  /*
1894  * Sort "common entries" by calculated deltas in order to distribute
1895  * the most ambiguous entries first.
1896  */
1897  qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
1898 
1899  /*
1900  * Distribute "common entries" between groups.
1901  */
1902  for (i = 0; i < commonEntriesCount; i++)
1903  {
1904  box = (BOX2DF *) DatumGetPointer(entryvec->vector[
1905  commonEntries[i].index].key);
1906 
1907  /*
1908  * Check if we have to place this entry in either group to achieve
1909  * LIMIT_RATIO.
1910  */
1911  if (v->spl_nleft + (commonEntriesCount - i) <= m)
1912  PLACE_LEFT(box, commonEntries[i].index);
1913  else if (v->spl_nright + (commonEntriesCount - i) <= m)
1914  PLACE_RIGHT(box, commonEntries[i].index);
1915  else
1916  {
1917  /* Otherwise select the group by minimal penalty */
1918  if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
1919  PLACE_LEFT(box, commonEntries[i].index);
1920  else
1921  PLACE_RIGHT(box, commonEntries[i].index);
1922  }
1923  }
1924  }
1925  v->spl_ldatum = PointerGetDatum(leftBox);
1926  v->spl_rdatum = PointerGetDatum(rightBox);
1927 
1928  POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
1929 
1930  PG_RETURN_POINTER(v);
1931 }
#define LIMIT_RATIO
static void fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void g_box_consider_split(ConsiderSplitContext *context, int dimNum, float rightLower, int minLeftCount, float leftUpper, int maxLeftCount)
static int interval_cmp_lower(const void *i1, const void *i2)
static void adjustBox(BOX2DF *b, BOX2DF *addon)
static int interval_cmp_upper(const void *i1, const void *i2)
static int common_entry_cmp(const void *i1, const void *i2)
#define PLACE_RIGHT(box, off)
#define PLACE_LEFT(box, off)
static float box_penalty(BOX2DF *original, BOX2DF *new)

Here is the call graph for this function: