PostGIS  2.2.8dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit()

Datum gserialized_gist_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 1493 of file gserialized_gist_nd.c.

References ABOVE, BELOW, FPeq, gidx_copy(), gidx_equals(), gidx_in(), gidx_merge(), gserialized_gist_picksplit_addlist(), gserialized_gist_picksplit_badratios(), gserialized_gist_picksplit_constructsplit(), gserialized_gist_picksplit_fallback(), PG_FUNCTION_INFO_V1(), and TRUE.

1494 {
1495 
1496  GistEntryVector *entryvec = (GistEntryVector*) PG_GETARG_POINTER(0);
1497 
1498  GIST_SPLITVEC *v = (GIST_SPLITVEC*) PG_GETARG_POINTER(1);
1499  OffsetNumber i;
1500  /* One union box for each half of the space. */
1501  GIDX **box_union;
1502  /* One offset number list for each half of the space. */
1503  OffsetNumber **list;
1504  /* One position index for each half of the space. */
1505  int *pos;
1506  GIDX *box_pageunion;
1507  GIDX *box_current;
1508  int direction = -1;
1509  bool all_entries_equal = true;
1510  OffsetNumber max_offset;
1511  int nbytes, ndims_pageunion, d;
1512  int posmin = entryvec->n;
1513 
1514  POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1515 
1516  /*
1517  ** First calculate the bounding box and maximum number of dimensions in this page.
1518  */
1519 
1520  max_offset = entryvec->n - 1;
1521  box_current = (GIDX*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
1522  box_pageunion = gidx_copy(box_current);
1523 
1524  /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1525  for ( i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i) )
1526  {
1527  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1528 
1529  if ( all_entries_equal == true && ! gidx_equals (box_pageunion, box_current) )
1530  all_entries_equal = false;
1531 
1532  gidx_merge( &box_pageunion, box_current );
1533  }
1534 
1535  POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1536 
1537  /* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1538  if ( all_entries_equal )
1539  {
1540  POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1542  PG_RETURN_POINTER(v);
1543  }
1544 
1545  /* Initialize memory structures. */
1546  nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1547  ndims_pageunion = GIDX_NDIMS(box_pageunion);
1548  POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1549  pos = palloc(2*ndims_pageunion * sizeof(int));
1550  list = palloc(2*ndims_pageunion * sizeof(OffsetNumber*));
1551  box_union = palloc(2*ndims_pageunion * sizeof(GIDX*));
1552  for ( d = 0; d < ndims_pageunion; d++ )
1553  {
1554  list[BELOW(d)] = (OffsetNumber*) palloc(nbytes);
1555  list[ABOVE(d)] = (OffsetNumber*) palloc(nbytes);
1556  box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1557  box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1558  pos[BELOW(d)] = 0;
1559  pos[ABOVE(d)] = 0;
1560  }
1561 
1562  /*
1563  ** Assign each entry in the node to the volume partitions it belongs to,
1564  ** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1565  ** Each entry thereby ends up in three of the six partitions.
1566  */
1567  POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1568  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1569  {
1570  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1571 
1572  for ( d = 0; d < ndims_pageunion; d++ )
1573  {
1574  if ( GIDX_GET_MIN(box_current,d)-GIDX_GET_MIN(box_pageunion,d) < GIDX_GET_MAX(box_pageunion,d)-GIDX_GET_MAX(box_current,d) )
1575  {
1576  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1577  }
1578  else
1579  {
1580  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1581  }
1582 
1583  }
1584 
1585  }
1586 
1587  /*
1588  ** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1589  ** plane we choose to split on, we're going to end up with a mostly full node. Where the
1590  ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1591  ** sides of the page union box can occasionally all end up in one place, leading
1592  ** to this condition.
1593  */
1594  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE )
1595  {
1596  /*
1597  ** Instead we split on center points and see if we do better.
1598  ** First calculate the average center point for each axis.
1599  */
1600  double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1601 
1602  for ( d = 0; d < ndims_pageunion; d++ )
1603  {
1604  avgCenter[d] = 0.0;
1605  }
1606 
1607  POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1608 
1609  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1610  {
1611  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1612  for ( d = 0; d < ndims_pageunion; d++ )
1613  {
1614  avgCenter[d] += (GIDX_GET_MAX(box_current,d) + GIDX_GET_MIN(box_current,d)) / 2.0;
1615  }
1616  }
1617  for ( d = 0; d < ndims_pageunion; d++ )
1618  {
1619  avgCenter[d] /= max_offset;
1620  pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1621  POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1622  }
1623 
1624  /* For each of our entries... */
1625  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1626  {
1627  double center;
1628  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1629 
1630  for ( d = 0; d < ndims_pageunion; d++ )
1631  {
1632  center = (GIDX_GET_MIN(box_current,d)+GIDX_GET_MAX(box_current,d))/2.0;
1633  if ( center < avgCenter[d] )
1634  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1635  else if ( FPeq(center, avgCenter[d]) )
1636  if ( pos[BELOW(d)] > pos[ABOVE(d)] )
1637  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1638  else
1639  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1640  else
1641  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1642  }
1643 
1644  }
1645 
1646  /* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1647  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE )
1648  {
1649  POSTGIS_DEBUG(4, "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1651  PG_RETURN_POINTER(v);
1652  }
1653 
1654  }
1655 
1656  /*
1657  ** Now, what splitting plane gives us the most even ratio of
1658  ** entries in our child pages? Since each split region has been apportioned entries
1659  ** against the same number of total entries, the axis that has the smallest maximum
1660  ** number of entries in its regions is the most evenly distributed.
1661  ** TODO: what if the distributions are equal in two or more axes?
1662  */
1663  for ( d = 0; d < ndims_pageunion; d++ )
1664  {
1665  int posd = Max(pos[ABOVE(d)],pos[BELOW(d)]);
1666  if ( posd < posmin )
1667  {
1668  direction = d;
1669  posmin = posd;
1670  }
1671  }
1672  if ( direction == -1 || posmin == entryvec->n )
1673  {
1674  /* ERROR OUT HERE */
1675  elog(ERROR, "Error in building split, unable to determine split direction.");
1676  }
1677 
1678  POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1679 
1681  pos[BELOW(direction)],
1682  &(box_union[BELOW(direction)]),
1683  list[ABOVE(direction)],
1684  pos[ABOVE(direction)],
1685  &(box_union[ABOVE(direction)]) );
1686 
1687  POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX*)v->spl_ldatum));
1688  POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX*)v->spl_rdatum));
1689 
1690  POSTGIS_DEBUGF(4, "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1691  direction,
1692  GIDX_GET_MIN(box_pageunion, direction), GIDX_GET_MAX(box_pageunion, direction),
1693  GIDX_GET_MIN((GIDX*)v->spl_ldatum, direction), GIDX_GET_MAX((GIDX*)v->spl_ldatum, direction),
1694  GIDX_GET_MIN((GIDX*)v->spl_rdatum, direction), GIDX_GET_MAX((GIDX*)v->spl_rdatum, direction) );
1695 
1696  PG_RETURN_POINTER(v);
1697 
1698 }
static void gidx_merge(GIDX **b_union, GIDX *b_new)
static GIDX * gidx_copy(GIDX *b)
static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
static bool gidx_equals(GIDX *a, GIDX *b)
#define BELOW(d)
static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
#define FPeq(A, B)
Definition: box2d.c:11
#define ABOVE(d)
#define TRUE
Definition: dbfopen.c:169
Here is the call graph for this function: