PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
gserialized_estimate_support.h
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * This file is part of PostGIS
7 *
8 * PostGIS is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * PostGIS is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
20 *
21 **********************************************************************
22 *
23 * Internal helpers shared between the gserialized selectivity
24 * implementation and the unit tests.
25 *
26 * Keeping the routines header-only ensures the planner code and the
27 * harness evaluate the exact same floating-point flows without the
28 * cross-object plumbing that previously complicated maintenance.
29 * Nothing here is installed; the header is meant for
30 * gserialized_estimate.c and for the dedicated CUnit suite only.
31 *
32 **********************************************************************
33 *
34 * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
35 * Copyright 2025 (C) Darafei Praliaskouski <me@komzpa.net>
36 *
37 **********************************************************************/
38
39#ifndef POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
40#define POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
41
42#include "postgres.h"
43
44#include <limits.h>
45#include <math.h>
46
47/* The maximum number of dimensions our statistics code supports. */
48#define ND_DIMS 4
49
50/* Lightweight n-dimensional box representation for selectivity math. */
51typedef struct ND_BOX_T {
52 float4 min[ND_DIMS];
53 float4 max[ND_DIMS];
55
56/* Integer counterpart used for histogram cell iteration. */
57typedef struct ND_IBOX_T {
61
62/* On-disk representation of the histogram emitted by ANALYZE. */
75
76/*
77 * Return the flattened index for the histogram coordinate expressed by
78 * 'indexes'. A negative result signals that one of the axes fell outside
79 * the histogram definition.
80 */
81static inline int
82nd_stats_value_index(const ND_STATS *stats, const int *indexes)
83{
84 int d;
85 int accum = 1;
86 int vdx = 0;
87
88 for (d = 0; d < (int)(stats->ndims); d++)
89 {
90 int size = (int)(stats->size[d]);
91 if (indexes[d] < 0 || indexes[d] >= size)
92 return -1;
93 vdx += indexes[d] * accum;
94 accum *= size;
95 }
96 return vdx;
97}
98
99/*
100 * Derive the histogram grid budget requested by PostgreSQL's ANALYZE machinery.
101 * The planner caps the cell count via three heuristics that take the requested
102 * attstattarget, the histogram dimensionality, and the underlying row count
103 * into account. Double precision arithmetic keeps the intermediate products in
104 * range so the cap behaves consistently across build architectures.
105 */
106static inline int
107histogram_cell_budget(double total_rows, int ndims, int attstattarget)
108{
109 double budget;
110 double dims_cap;
111 double rows_cap;
112 double attstat;
113 double dims;
114
115 if (ndims <= 0)
116 return 0;
117
118 if (attstattarget <= 0)
119 attstattarget = 1;
120
121 /* Requested resolution coming from PostgreSQL's ANALYZE knob. */
122 attstat = (double)attstattarget;
123 dims = (double)ndims;
124 budget = pow(attstat, dims);
125
126 /* Hard ceiling that keeps the statistics collector responsive. */
127 dims_cap = (double)ndims * 100000.0;
128 if (budget > dims_cap)
129 budget = dims_cap;
130
131 /* Small relations do not need a histogram that dwarfs the sample. */
132 if (total_rows <= 0.0)
133 return 0;
134
135 rows_cap = 10.0 * (double)ndims * total_rows;
136 if (rows_cap < 0.0)
137 rows_cap = 0.0;
138
139 /* Keep intermediate computations in double precision before clamping. */
140 if (rows_cap > (double)INT_MAX)
141 rows_cap = (double)INT_MAX;
142
143 if (budget > rows_cap)
144 budget = rows_cap;
145
146 if (budget >= (double)INT_MAX)
147 return INT_MAX;
148 if (budget <= 0.0)
149 return 0;
150
151 return (int)budget;
152}
153
154/*
155 * Allocate histogram buckets along a single axis in proportion to the observed
156 * density variation. The caller passes in the global histogram target along
157 * with the number of axes that exhibited variation in the sampled data and the
158 * relative contribution of the current axis (edge_ratio). Earlier versions
159 * evaluated the pow() call directly in the caller, which exposed the planner to
160 * NaN propagation on some amd64 builds when the ratio was denormal or negative
161 * (see #5959). Keeping the calculation in one place allows us to clamp the
162 * inputs and provide a predictable fallback for problematic floating point
163 * combinations.
164 */
165static inline int
166histogram_axis_cells(int histo_cells_target, int histo_ndims, double edge_ratio)
167{
168 double scaled;
169 double axis_cells;
170
171 if (histo_cells_target <= 0 || histo_ndims <= 0)
172 return 1;
173
174 if (!(edge_ratio > 0.0) || !isfinite(edge_ratio))
175 return 1;
176
177 scaled = (double)histo_cells_target * (double)histo_ndims * edge_ratio;
178 if (!(scaled > 0.0) || !isfinite(scaled))
179 return 1;
180
181 axis_cells = pow(scaled, 1.0 / (double)histo_ndims);
182 if (!(axis_cells > 0.0) || !isfinite(axis_cells))
183 return 1;
184
185 if (axis_cells >= (double)INT_MAX)
186 return INT_MAX;
187
188 if (axis_cells <= 1.0)
189 return 1;
190
191 return (int)axis_cells;
192}
193
194/*
195 * Compute the portion of 'target' covered by 'cover'. The caller supplies the
196 * dimensionality because ND_BOX always carries four slots. Degenerate volumes
197 * fold to zero, allowing the callers to detect slabs that ANALYZE sometimes
198 * emits for skewed datasets.
199 */
200static inline double
201nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
202{
203 int d;
204 bool fully_covered = true;
205 double ivol = 1.0;
206 double refvol = 1.0;
207
208 for (d = 0; d < ndims; d++)
209 {
210 if (cover->max[d] <= target->min[d] || cover->min[d] >= target->max[d])
211 return 0.0; /* Disjoint */
212
213 if (cover->min[d] > target->min[d] || cover->max[d] < target->max[d])
214 fully_covered = false;
215 }
216
217 if (fully_covered)
218 return 1.0;
219
220 for (d = 0; d < ndims; d++)
221 {
222 double width = target->max[d] - target->min[d];
223 double imin = Max(cover->min[d], target->min[d]);
224 double imax = Min(cover->max[d], target->max[d]);
225 double iwidth = Max(0.0, imax - imin);
226
227 refvol *= width;
228 ivol *= iwidth;
229 }
230
231 if (refvol == 0.0)
232 return refvol;
233
234 return ivol / refvol;
235}
236
237#endif /* POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H */
struct ND_STATS_T ND_STATS
static int histogram_axis_cells(int histo_cells_target, int histo_ndims, double edge_ratio)
static double nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
static int histogram_cell_budget(double total_rows, int ndims, int attstattarget)
struct ND_IBOX_T ND_IBOX
static int nd_stats_value_index(const ND_STATS *stats, const int *indexes)
struct ND_BOX_T ND_BOX