PostGIS  3.4.0dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit()

Datum gserialized_gist_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 1582 of file gserialized_gist_nd.c.

1583 {
1584 
1585  GistEntryVector *entryvec = (GistEntryVector *)PG_GETARG_POINTER(0);
1586 
1587  GIST_SPLITVEC *v = (GIST_SPLITVEC *)PG_GETARG_POINTER(1);
1588  OffsetNumber i;
1589  /* One union box for each half of the space. */
1590  GIDX **box_union;
1591  /* One offset number list for each half of the space. */
1592  OffsetNumber **list;
1593  /* One position index for each half of the space. */
1594  int *pos;
1595  GIDX *box_pageunion;
1596  GIDX *box_current;
1597  int direction = -1;
1598  bool all_entries_equal = true;
1599  OffsetNumber max_offset;
1600  int nbytes, ndims_pageunion, d;
1601  int posmin = entryvec->n;
1602 
1603  POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1604 
1605  /*
1606  ** First calculate the bounding box and maximum number of dimensions in this page.
1607  */
1608 
1609  max_offset = entryvec->n - 1;
1610  box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[FirstOffsetNumber].key);
1611  box_pageunion = gidx_copy(box_current);
1612 
1613  /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1614  for (i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i))
1615  {
1616  box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
1617 
1618  if (all_entries_equal && !gidx_equals(box_pageunion, box_current))
1619  all_entries_equal = false;
1620 
1621  gidx_merge(&box_pageunion, box_current);
1622  }
1623 
1624  POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1625 
1626  /* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1627  if (all_entries_equal)
1628  {
1629  POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1631  PG_RETURN_POINTER(v);
1632  }
1633 
1634  /* Initialize memory structures. */
1635  nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1636  ndims_pageunion = GIDX_NDIMS(box_pageunion);
1637  POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1638  pos = palloc(2 * ndims_pageunion * sizeof(int));
1639  list = palloc(2 * ndims_pageunion * sizeof(OffsetNumber *));
1640  box_union = palloc(2 * ndims_pageunion * sizeof(GIDX *));
1641  for (d = 0; d < ndims_pageunion; d++)
1642  {
1643  list[BELOW(d)] = (OffsetNumber *)palloc(nbytes);
1644  list[ABOVE(d)] = (OffsetNumber *)palloc(nbytes);
1645  box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1646  box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1647  pos[BELOW(d)] = 0;
1648  pos[ABOVE(d)] = 0;
1649  }
1650 
1651  /*
1652  ** Assign each entry in the node to the volume partitions it belongs to,
1653  ** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1654  ** Each entry thereby ends up in three of the six partitions.
1655  */
1656  POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1657  for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1658  {
1659  box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
1660 
1661  for (d = 0; d < ndims_pageunion; d++)
1662  {
1663  if (GIDX_GET_MIN(box_current, d) - GIDX_GET_MIN(box_pageunion, d) <
1664  GIDX_GET_MAX(box_pageunion, d) - GIDX_GET_MAX(box_current, d))
1666  list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1667  else
1669  list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1670  }
1671  }
1672 
1673  /*
1674  ** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1675  ** plane we choose to split on, we're going to end up with a mostly full node. Where the
1676  ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1677  ** sides of the page union box can occasionally all end up in one place, leading
1678  ** to this condition.
1679  */
1680  if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
1681  {
1682  /*
1683  ** Instead we split on center points and see if we do better.
1684  ** First calculate the average center point for each axis.
1685  */
1686  double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1687 
1688  for (d = 0; d < ndims_pageunion; d++)
1689  avgCenter[d] = 0.0;
1690 
1691  POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1692 
1693  for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1694  {
1695  box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
1696  for (d = 0; d < ndims_pageunion; d++)
1697  avgCenter[d] += (GIDX_GET_MAX(box_current, d) + GIDX_GET_MIN(box_current, d)) / 2.0;
1698  }
1699  for (d = 0; d < ndims_pageunion; d++)
1700  {
1701  avgCenter[d] /= max_offset;
1702  pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1703  POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1704  }
1705 
1706  /* For each of our entries... */
1707  for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1708  {
1709  double center;
1710  box_current = (GIDX *)PG_DETOAST_DATUM(entryvec->vector[i].key);
1711 
1712  for (d = 0; d < ndims_pageunion; d++)
1713  {
1714  center = (GIDX_GET_MIN(box_current, d) + GIDX_GET_MAX(box_current, d)) / 2.0;
1715  if (center < avgCenter[d])
1717  list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1718  else if (FPeq(center, avgCenter[d]))
1719  if (pos[BELOW(d)] > pos[ABOVE(d)])
1721  &(box_union[ABOVE(d)]),
1722  box_current,
1723  &(pos[ABOVE(d)]),
1724  i);
1725  else
1727  &(box_union[BELOW(d)]),
1728  box_current,
1729  &(pos[BELOW(d)]),
1730  i);
1731  else
1733  list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1734  }
1735  }
1736 
1737  /* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1738  if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
1739  {
1740  POSTGIS_DEBUG(4,
1741  "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1743  PG_RETURN_POINTER(v);
1744  }
1745  }
1746 
1747  /*
1748  ** Now, what splitting plane gives us the most even ratio of
1749  ** entries in our child pages? Since each split region has been apportioned entries
1750  ** against the same number of total entries, the axis that has the smallest maximum
1751  ** number of entries in its regions is the most evenly distributed.
1752  ** TODO: what if the distributions are equal in two or more axes?
1753  */
1754  for (d = 0; d < ndims_pageunion; d++)
1755  {
1756  int posd = Max(pos[ABOVE(d)], pos[BELOW(d)]);
1757  if (posd < posmin)
1758  {
1759  direction = d;
1760  posmin = posd;
1761  }
1762  }
1763  if (direction == -1 || posmin == entryvec->n)
1764  elog(ERROR, "Error in building split, unable to determine split direction.");
1765 
1766  POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1767 
1769  list[BELOW(direction)],
1770  pos[BELOW(direction)],
1771  &(box_union[BELOW(direction)]),
1772  list[ABOVE(direction)],
1773  pos[ABOVE(direction)],
1774  &(box_union[ABOVE(direction)]));
1775 
1776  POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX *)v->spl_ldatum));
1777  POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX *)v->spl_rdatum));
1778 
1779  POSTGIS_DEBUGF(
1780  4,
1781  "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1782  direction,
1783  GIDX_GET_MIN(box_pageunion, direction),
1784  GIDX_GET_MAX(box_pageunion, direction),
1785  GIDX_GET_MIN((GIDX *)v->spl_ldatum, direction),
1786  GIDX_GET_MAX((GIDX *)v->spl_ldatum, direction),
1787  GIDX_GET_MIN((GIDX *)v->spl_rdatum, direction),
1788  GIDX_GET_MAX((GIDX *)v->spl_rdatum, direction));
1789 
1790  PG_RETURN_POINTER(v);
1791 }
static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
GIDX * gidx_copy(GIDX *b)
#define ABOVE(d)
#define BELOW(d)
bool gidx_equals(GIDX *a, GIDX *b)
static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
void gidx_merge(GIDX **b_union, GIDX *b_new)

References ABOVE, BELOW, gidx_copy(), gidx_equals(), gidx_merge(), gserialized_gist_picksplit_addlist(), gserialized_gist_picksplit_badratios(), gserialized_gist_picksplit_constructsplit(), and gserialized_gist_picksplit_fallback().

Here is the call graph for this function: