PostGIS  2.4.9dev-r@@SVN_REVISION@@
gserialized_estimate.c File Reference
#include "postgres.h"
#include "executor/spi.h"
#include "fmgr.h"
#include "commands/vacuum.h"
#include "nodes/relation.h"
#include "parser/parsetree.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "../postgis_config.h"
#include "access/htup_details.h"
#include "stringbuffer.h"
#include "liblwgeom.h"
#include "lwgeom_pg.h"
#include "gserialized_gist.h"
#include <math.h>
#include <float.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
Include dependency graph for gserialized_estimate.c:

Go to the source code of this file.

Data Structures

struct  ND_BOX_T
 N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all calculations at every step. More...
 
struct  ND_IBOX_T
 N-dimensional box index type. More...
 
struct  ND_STATS_T
 N-dimensional statistics structure. More...
 

Macros

#define STATISTIC_KIND_ND   102
 Assign a number to the n-dimensional statistics kind. More...
 
#define STATISTIC_KIND_2D   103
 
#define STATISTIC_SLOT_ND   0
 
#define STATISTIC_SLOT_2D   1
 
#define SDFACTOR   3.25
 
#define ND_DIMS   4
 The maximum number of dimensions our code can handle. More...
 
#define MIN_DIMENSION_WIDTH   0.000000001
 Minimum width of a dimension that we'll bother trying to compute statistics on. More...
 
#define MAX_DIMENSION_WIDTH   1.0E+20
 Maximum width of a dimension that we'll bother trying to compute statistics on. More...
 
#define DEFAULT_ND_SEL   0.0001
 Default geometry selectivity factor. More...
 
#define DEFAULT_ND_JOINSEL   0.001
 
#define FALLBACK_ND_SEL   0.2
 More modest fallback selectivity factor. More...
 
#define FALLBACK_ND_JOINSEL   0.3
 
#define NUM_BINS   50
 

Typedefs

typedef struct ND_BOX_T ND_BOX
 N-dimensional box type for calculations, to avoid doing explicit axis conversions from GBOX in all calculations at every step. More...
 
typedef struct ND_IBOX_T ND_IBOX
 N-dimensional box index type. More...
 
typedef struct ND_STATS_T ND_STATS
 N-dimensional statistics structure. More...
 

Functions

Datum gserialized_gist_joinsel (PG_FUNCTION_ARGS)
 
Datum gserialized_gist_joinsel_2d (PG_FUNCTION_ARGS)
 
Datum gserialized_gist_joinsel_nd (PG_FUNCTION_ARGS)
 
Datum gserialized_gist_sel (PG_FUNCTION_ARGS)
 
Datum gserialized_gist_sel_2d (PG_FUNCTION_ARGS)
 
Datum gserialized_gist_sel_nd (PG_FUNCTION_ARGS)
 
Datum gserialized_analyze_nd (PG_FUNCTION_ARGS)
 
Datum gserialized_estimated_extent (PG_FUNCTION_ARGS)
 
Datum _postgis_gserialized_sel (PG_FUNCTION_ARGS)
 
Datum _postgis_gserialized_joinsel (PG_FUNCTION_ARGS)
 
Datum _postgis_gserialized_stats (PG_FUNCTION_ARGS)
 
Datum geometry_estimated_extent (PG_FUNCTION_ARGS)
 
static int gbox_ndims (const GBOX *gbox)
 Given that geodetic boxes are X/Y/Z regardless of the underlying geometry dimensionality and other boxes are guided by HAS_Z/HAS_M in their dimesionality, we have a little utility function to make it easy. More...
 
static int text_p_get_mode (const text *txt)
 Utility function to see if the first letter of the mode argument is 'N'. More...
 
static int cmp_int (const void *a, const void *b)
 Integer comparison function for qsort. More...
 
static int range_quintile (int *vals, int nvals)
 The difference between the fourth and first quintile values, the "inter-quintile range". More...
 
static double total_double (const double *vals, int nvals)
 Given double array, return sum of values. More...
 
static int nd_stats_value_index (const ND_STATS *stats, int *indexes)
 Given a position in the n-d histogram (i,j,k) return the position in the 1-d values array. More...
 
static char * nd_box_to_json (const ND_BOX *nd_box, int ndims)
 Convert an ND_BOX to a JSON string for printing. More...
 
static char * nd_stats_to_json (const ND_STATS *nd_stats)
 Convert an ND_STATS to a JSON representation for external use. More...
 
static int nd_box_merge (const ND_BOX *source, ND_BOX *target)
 Create a printable view of the ND_STATS histogram. More...
 
static int nd_box_init (ND_BOX *a)
 Zero out an ND_BOX. More...
 
static int nd_box_init_bounds (ND_BOX *a)
 Prepare an ND_BOX for bounds calculation: set the maxes to the smallest thing possible and the mins to the largest. More...
 
static void nd_box_from_gbox (const GBOX *gbox, ND_BOX *nd_box)
 Set the values of an ND_BOX from a GBOX. More...
 
static int nd_box_intersects (const ND_BOX *a, const ND_BOX *b, int ndims)
 Return TRUE if ND_BOX a overlaps b, false otherwise. More...
 
static int nd_box_contains (const ND_BOX *a, const ND_BOX *b, int ndims)
 Return TRUE if ND_BOX a contains b, false otherwise. More...
 
static int nd_box_expand (ND_BOX *nd_box, double expansion_factor)
 Expand an ND_BOX ever so slightly. More...
 
static int nd_box_overlap (const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
 What stats cells overlap with this ND_BOX? Put the lowest cell addresses in ND_IBOX->min and the highest in ND_IBOX->max. More...
 
static double nd_box_ratio (const ND_BOX *b1, const ND_BOX *b2, int ndims)
 Returns the proportion of b2 that is covered by b1. More...
 
static int nd_box_array_distribution (const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
 Calculate how much a set of boxes is homogenously distributed or contentrated within one dimension, returning the range_quintile of of the overlap counts per cell in a uniform partition of the extent of the dimension. More...
 
static int nd_increment (ND_IBOX *ibox, int ndims, int *counter)
 Given an n-d index array (counter), and a domain to increment it in (ibox) increment it by one, unless it's already at the max of the domain, in which case return false. More...
 
static ND_STATSpg_nd_stats_from_tuple (HeapTuple stats_tuple, int mode)
 
static ND_STATSpg_get_nd_stats (const Oid table_oid, AttrNumber att_num, int mode, bool only_parent)
 Pull the stats object from the PgSQL system catalogs. More...
 
static ND_STATSpg_get_nd_stats_by_name (const Oid table_oid, const text *att_text, int mode, bool only_parent)
 Pull the stats object from the PgSQL system catalogs. More...
 
static float8 estimate_join_selectivity (const ND_STATS *s1, const ND_STATS *s2)
 Given two statistics histograms, what is the selectivity of a join driven by the && or &&& operator? More...
 
 PG_FUNCTION_INFO_V1 (gserialized_gist_joinsel_nd)
 For (geometry &&& geometry) and (geography && geography) we call into the N-D mode. More...
 
 PG_FUNCTION_INFO_V1 (gserialized_gist_joinsel_2d)
 For (geometry && geometry) we call into the 2-D mode. More...
 
 PG_FUNCTION_INFO_V1 (gserialized_gist_joinsel)
 Join selectivity of the && operator. More...
 
static void compute_gserialized_stats_mode (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows, int mode)
 The gserialized_analyze_nd sets this function as a callback on the stats object when called by the ANALYZE command. More...
 
static void compute_gserialized_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int sample_rows, double total_rows)
 In order to do useful selectivity calculations in both 2-D and N-D modes, we actually have to generate two stats objects, one for 2-D and one for N-D. More...
 
 PG_FUNCTION_INFO_V1 (gserialized_analyze_nd)
 This function will be called when the ANALYZE command is run on a column of the "geometry" or "geography" type. More...
 
static float8 estimate_selectivity (const GBOX *box, const ND_STATS *nd_stats, int mode)
 This function returns an estimate of the selectivity of a search GBOX by looking at data in the ND_STATS structure. More...
 
 PG_FUNCTION_INFO_V1 (_postgis_gserialized_stats)
 Utility function to print the statistics information for a given table/column in JSON. More...
 
 PG_FUNCTION_INFO_V1 (_postgis_gserialized_sel)
 Utility function to read the calculated selectivity for a given search box and table/column. More...
 
 PG_FUNCTION_INFO_V1 (_postgis_gserialized_joinsel)
 Utility function to read the calculated join selectivity for a pair of tables. More...
 
 PG_FUNCTION_INFO_V1 (gserialized_gist_sel_2d)
 For (geometry && geometry) we call into the 2-D mode. More...
 
 PG_FUNCTION_INFO_V1 (gserialized_gist_sel_nd)
 For (geometry &&& geometry) and (geography && geography) we call into the N-D mode. More...
 
 PG_FUNCTION_INFO_V1 (gserialized_gist_sel)
 This function should return an estimation of the number of rows returned by a query involving an overlap check ( it's the restrict function for the && operator ) More...
 
 PG_FUNCTION_INFO_V1 (gserialized_estimated_extent)
 Return the estimated extent of the table looking at gathered statistics (or NULL if no statistics have been gathered). More...
 
 PG_FUNCTION_INFO_V1 (geometry_estimated_extent)
 Return the estimated extent of the table looking at gathered statistics (or NULL if no statistics have been gathered). More...