PostGIS  2.4.9dev-r@@SVN_REVISION@@

◆ gserialized_gist_picksplit()

Datum gserialized_gist_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 1795 of file gserialized_gist_nd.c.

References ABOVE, BELOW, FPeq, gidx_copy(), gidx_equals(), gidx_in(), gidx_merge(), gserialized_gist_picksplit_addlist(), gserialized_gist_picksplit_badratios(), gserialized_gist_picksplit_constructsplit(), gserialized_gist_picksplit_fallback(), PG_FUNCTION_INFO_V1(), and TRUE.

1796 {
1797 
1798  GistEntryVector *entryvec = (GistEntryVector*) PG_GETARG_POINTER(0);
1799 
1800  GIST_SPLITVEC *v = (GIST_SPLITVEC*) PG_GETARG_POINTER(1);
1801  OffsetNumber i;
1802  /* One union box for each half of the space. */
1803  GIDX **box_union;
1804  /* One offset number list for each half of the space. */
1805  OffsetNumber **list;
1806  /* One position index for each half of the space. */
1807  int *pos;
1808  GIDX *box_pageunion;
1809  GIDX *box_current;
1810  int direction = -1;
1811  bool all_entries_equal = true;
1812  OffsetNumber max_offset;
1813  int nbytes, ndims_pageunion, d;
1814  int posmin = entryvec->n;
1815 
1816  POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1817 
1818  /*
1819  ** First calculate the bounding box and maximum number of dimensions in this page.
1820  */
1821 
1822  max_offset = entryvec->n - 1;
1823  box_current = (GIDX*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
1824  box_pageunion = gidx_copy(box_current);
1825 
1826  /* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1827  for ( i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i) )
1828  {
1829  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1830 
1831  if ( all_entries_equal == true && ! gidx_equals (box_pageunion, box_current) )
1832  all_entries_equal = false;
1833 
1834  gidx_merge( &box_pageunion, box_current );
1835  }
1836 
1837  POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1838 
1839  /* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1840  if ( all_entries_equal )
1841  {
1842  POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1844  PG_RETURN_POINTER(v);
1845  }
1846 
1847  /* Initialize memory structures. */
1848  nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1849  ndims_pageunion = GIDX_NDIMS(box_pageunion);
1850  POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1851  pos = palloc(2*ndims_pageunion * sizeof(int));
1852  list = palloc(2*ndims_pageunion * sizeof(OffsetNumber*));
1853  box_union = palloc(2*ndims_pageunion * sizeof(GIDX*));
1854  for ( d = 0; d < ndims_pageunion; d++ )
1855  {
1856  list[BELOW(d)] = (OffsetNumber*) palloc(nbytes);
1857  list[ABOVE(d)] = (OffsetNumber*) palloc(nbytes);
1858  box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1859  box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1860  pos[BELOW(d)] = 0;
1861  pos[ABOVE(d)] = 0;
1862  }
1863 
1864  /*
1865  ** Assign each entry in the node to the volume partitions it belongs to,
1866  ** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1867  ** Each entry thereby ends up in three of the six partitions.
1868  */
1869  POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1870  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1871  {
1872  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1873 
1874  for ( d = 0; d < ndims_pageunion; d++ )
1875  {
1876  if ( GIDX_GET_MIN(box_current,d)-GIDX_GET_MIN(box_pageunion,d) < GIDX_GET_MAX(box_pageunion,d)-GIDX_GET_MAX(box_current,d) )
1877  {
1878  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1879  }
1880  else
1881  {
1882  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1883  }
1884 
1885  }
1886 
1887  }
1888 
1889  /*
1890  ** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1891  ** plane we choose to split on, we're going to end up with a mostly full node. Where the
1892  ** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1893  ** sides of the page union box can occasionally all end up in one place, leading
1894  ** to this condition.
1895  */
1896  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE )
1897  {
1898  /*
1899  ** Instead we split on center points and see if we do better.
1900  ** First calculate the average center point for each axis.
1901  */
1902  double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1903 
1904  for ( d = 0; d < ndims_pageunion; d++ )
1905  {
1906  avgCenter[d] = 0.0;
1907  }
1908 
1909  POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1910 
1911  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1912  {
1913  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1914  for ( d = 0; d < ndims_pageunion; d++ )
1915  {
1916  avgCenter[d] += (GIDX_GET_MAX(box_current,d) + GIDX_GET_MIN(box_current,d)) / 2.0;
1917  }
1918  }
1919  for ( d = 0; d < ndims_pageunion; d++ )
1920  {
1921  avgCenter[d] /= max_offset;
1922  pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1923  POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1924  }
1925 
1926  /* For each of our entries... */
1927  for ( i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i) )
1928  {
1929  double center;
1930  box_current = (GIDX*) DatumGetPointer(entryvec->vector[i].key);
1931 
1932  for ( d = 0; d < ndims_pageunion; d++ )
1933  {
1934  center = (GIDX_GET_MIN(box_current,d)+GIDX_GET_MAX(box_current,d))/2.0;
1935  if ( center < avgCenter[d] )
1936  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1937  else if ( FPeq(center, avgCenter[d]) )
1938  if ( pos[BELOW(d)] > pos[ABOVE(d)] )
1939  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1940  else
1941  gserialized_gist_picksplit_addlist(list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1942  else
1943  gserialized_gist_picksplit_addlist(list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1944  }
1945 
1946  }
1947 
1948  /* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1949  if ( gserialized_gist_picksplit_badratios(pos,ndims_pageunion) == TRUE )
1950  {
1951  POSTGIS_DEBUG(4, "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1953  PG_RETURN_POINTER(v);
1954  }
1955 
1956  }
1957 
1958  /*
1959  ** Now, what splitting plane gives us the most even ratio of
1960  ** entries in our child pages? Since each split region has been apportioned entries
1961  ** against the same number of total entries, the axis that has the smallest maximum
1962  ** number of entries in its regions is the most evenly distributed.
1963  ** TODO: what if the distributions are equal in two or more axes?
1964  */
1965  for ( d = 0; d < ndims_pageunion; d++ )
1966  {
1967  int posd = Max(pos[ABOVE(d)],pos[BELOW(d)]);
1968  if ( posd < posmin )
1969  {
1970  direction = d;
1971  posmin = posd;
1972  }
1973  }
1974  if ( direction == -1 || posmin == entryvec->n )
1975  {
1976  /* ERROR OUT HERE */
1977  elog(ERROR, "Error in building split, unable to determine split direction.");
1978  }
1979 
1980  POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1981 
1983  pos[BELOW(direction)],
1984  &(box_union[BELOW(direction)]),
1985  list[ABOVE(direction)],
1986  pos[ABOVE(direction)],
1987  &(box_union[ABOVE(direction)]) );
1988 
1989  POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX*)v->spl_ldatum));
1990  POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX*)v->spl_rdatum));
1991 
1992  POSTGIS_DEBUGF(4, "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1993  direction,
1994  GIDX_GET_MIN(box_pageunion, direction), GIDX_GET_MAX(box_pageunion, direction),
1995  GIDX_GET_MIN((GIDX*)v->spl_ldatum, direction), GIDX_GET_MAX((GIDX*)v->spl_ldatum, direction),
1996  GIDX_GET_MIN((GIDX*)v->spl_rdatum, direction), GIDX_GET_MAX((GIDX*)v->spl_rdatum, direction) );
1997 
1998  PG_RETURN_POINTER(v);
1999 
2000 }
static void gidx_merge(GIDX **b_union, GIDX *b_new)
static GIDX * gidx_copy(GIDX *b)
static void gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v, OffsetNumber *list1, int nlist1, GIDX **union1, OffsetNumber *list2, int nlist2, GIDX **union2)
static bool gidx_equals(GIDX *a, GIDX *b)
#define BELOW(d)
static bool gserialized_gist_picksplit_badratios(int *pos, int dims)
static void gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
static void gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
#define FPeq(A, B)
Definition: box2d.c:37
#define ABOVE(d)
#define TRUE
Definition: dbfopen.c:169
Here is the call graph for this function: