PostGIS  2.5.0beta2dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit()

Datum gserialized_gist_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 1792 of file gserialized_gist_nd.c.

References ABOVE, BELOW, gidx_copy(), gidx_equals(), gidx_in(), gidx_merge(), gserialized_gist_picksplit_addlist(), gserialized_gist_picksplit_badratios(), gserialized_gist_picksplit_constructsplit(), gserialized_gist_picksplit_fallback(), and PG_FUNCTION_INFO_V1().

1793 {
1794 
1795  GistEntryVector *entryvec = (GistEntryVector*) PG_GETARG_POINTER(0);
1796 
1797  GIST_SPLITVEC *v = (GIST_SPLITVEC*) PG_GETARG_POINTER(1);
1798  OffsetNumber i;
1799  /* One union box for each half of the space. */
1800  GIDX **box_union;
1801  /* One offset number list for each half of the space. */
1802  OffsetNumber **list;
1803  /* One position index for each half of the space. */
1804  int *pos;
1805  GIDX *box_pageunion;
1806  GIDX *box_current;
1807  int direction = -1;
1808  bool all_entries_equal = true;
1809  OffsetNumber max_offset;
1810  int nbytes, ndims_pageunion, d;
1811  int posmin = entryvec->n;
1812 
1813  POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1814 
1815  /*
1816  ** First calculate the bounding box and maximum number of dimensions in this page.
1817  */
1818 
1819  max_offset = entryvec->n - 1;
1820  box_current = (GIDX*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
1821  box_pageunion = gidx_copy(box_current);
1822 
1823  /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1824  for ( i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i) )
1825  {
1826  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1827 
1828  if ( all_entries_equal == true && ! gidx_equals (box_pageunion, box_current) )
1829  all_entries_equal = false;
1830 
1831  gidx_merge( &box_pageunion, box_current );
1832  }
1833 
1834  POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1835 
1836  /* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1837  if ( all_entries_equal )
1838  {
1839  POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1841  PG_RETURN_POINTER(v);
1842  }
1843 
1844  /* Initialize memory structures. */
1845  nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1846  ndims_pageunion = GIDX_NDIMS(box_pageunion);
1847  POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1848  pos = palloc(2*ndims_pageunion * sizeof(int));
1849  list = palloc(2*ndims_pageunion * sizeof(OffsetNumber*));
1850  box_union = palloc(2*ndims_pageunion * sizeof(GIDX*));
1851  for ( d = 0; d < ndims_pageunion; d++ )
1852  {
1853  list[BELOW(d)] = (OffsetNumber*) palloc(nbytes);
1854  list[ABOVE(d)] = (OffsetNumber*) palloc(nbytes);
1855  box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1856  box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1857  pos[BELOW(d)] = 0;
1858  pos[ABOVE(d)] = 0;
1859  }
1860 
1861  /*
1862  ** Assign each entry in the node to the volume partitions it belongs to,
1863  ** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1864  ** Each entry thereby ends up in three of the six partitions.
1865  */
1866  POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1867  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1868  {
1869  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1870 
1871  for ( d = 0; d < ndims_pageunion; d++ )
1872  {
1873  if ( GIDX_GET_MIN(box_current,d)-GIDX_GET_MIN(box_pageunion,d) < GIDX_GET_MAX(box_pageunion,d)-GIDX_GET_MAX(box_current,d) )
1874  {
1875  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1876  }
1877  else
1878  {
1879  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1880  }
1881 
1882  }
1883 
1884  }
1885 
1886  /*
1887  ** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1888  ** plane we choose to split on, we're going to end up with a mostly full node. Where the
1889  ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1890  ** sides of the page union box can occasionally all end up in one place, leading
1891  ** to this condition.
1892  */
1893  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == true )
1894  {
1895  /*
1896  ** Instead we split on center points and see if we do better.
1897  ** First calculate the average center point for each axis.
1898  */
1899  double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1900 
1901  for ( d = 0; d < ndims_pageunion; d++ )
1902  {
1903  avgCenter[d] = 0.0;
1904  }
1905 
1906  POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1907 
1908  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1909  {
1910  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1911  for ( d = 0; d < ndims_pageunion; d++ )
1912  {
1913  avgCenter[d] += (GIDX_GET_MAX(box_current,d) + GIDX_GET_MIN(box_current,d)) / 2.0;
1914  }
1915  }
1916  for ( d = 0; d < ndims_pageunion; d++ )
1917  {
1918  avgCenter[d] /= max_offset;
1919  pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1920  POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1921  }
1922 
1923  /* For each of our entries... */
1924  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1925  {
1926  double center;
1927  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1928 
1929  for ( d = 0; d < ndims_pageunion; d++ )
1930  {
1931  center = (GIDX_GET_MIN(box_current,d)+GIDX_GET_MAX(box_current,d))/2.0;
1932  if ( center < avgCenter[d] )
1933  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1934  else if ( FPeq(center, avgCenter[d]) )
1935  if ( pos[BELOW(d)] > pos[ABOVE(d)] )
1936  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1937  else
1938  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1939  else
1940  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1941  }
1942 
1943  }
1944 
1945  /* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1946  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == true )
1947  {
1948  POSTGIS_DEBUG(4, "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1950  PG_RETURN_POINTER(v);
1951  }
1952 
1953  }
1954 
1955  /*
1956  ** Now, what splitting plane gives us the most even ratio of
1957  ** entries in our child pages? Since each split region has been apportioned entries
1958  ** against the same number of total entries, the axis that has the smallest maximum
1959  ** number of entries in its regions is the most evenly distributed.
1960  ** TODO: what if the distributions are equal in two or more axes?
1961  */
1962  for ( d = 0; d < ndims_pageunion; d++ )
1963  {
1964  int posd = Max(pos[ABOVE(d)],pos[BELOW(d)]);
1965  if ( posd < posmin )
1966  {
1967  direction = d;
1968  posmin = posd;
1969  }
1970  }
1971  if ( direction == -1 || posmin == entryvec->n )
1972  {
1973  /* ERROR OUT HERE */
1974  elog(ERROR, "Error in building split, unable to determine split direction.");
1975  }
1976 
1977  POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1978 
1980  pos[BELOW(direction)],
1981  &(box_union[BELOW(direction)]),
1982  list[ABOVE(direction)],
1983  pos[ABOVE(direction)],
1984  &(box_union[ABOVE(direction)]) );
1985 
1986  POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX*)v->spl_ldatum));
1987  POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX*)v->spl_rdatum));
1988 
1989  POSTGIS_DEBUGF(4, "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1990  direction,
1991  GIDX_GET_MIN(box_pageunion, direction), GIDX_GET_MAX(box_pageunion, direction),
1992  GIDX_GET_MIN((GIDX*)v->spl_ldatum, direction), GIDX_GET_MAX((GIDX*)v->spl_ldatum, direction),
1993  GIDX_GET_MIN((GIDX*)v->spl_rdatum, direction), GIDX_GET_MAX((GIDX*)v->spl_rdatum, direction) );
1994 
1995  PG_RETURN_POINTER(v);
1996 
1997 }
void gidx_merge(GIDX **b_union, GIDX *b_new)
static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
static bool gidx_equals(GIDX *a, GIDX *b)
#define BELOW(d)
static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
GIDX * gidx_copy(GIDX *b)
#define ABOVE(d)
Here is the call graph for this function: