PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
lwgeom_in_marc21.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * PostGIS is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * PostGIS is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
18 *
19 **********************************************************************/
20
21#include "postgres.h"
22#include "utils/builtins.h"
23
24#include <libxml/tree.h>
25#include <libxml/parser.h>
26#include <string.h>
27
28#include "../postgis_config.h"
29#include "lwgeom_pg.h"
30#include <math.h>
31#include "liblwgeom.h"
32
33static LWGEOM* parse_marc21(xmlNodePtr xnode);
34
35/**********************************************************************
36 * Ability to parse geographic data contained in MARC21/XML records
37 * to return an LWGEOM or an error message. It returns NULL if the
38 * MARC21/XML record is valid but does not contain any geographic
39 * data (datafield:034).
40 *
41 * MARC21/XML version supported: 1.1
42 * MARC21/XML Cartographic Mathematical Data Definition:
43 * https://www.loc.gov/marc/bibliographic/bd034.html
44 *
45 * Copyright (C) 2021 University of Münster (WWU), Germany
46 * Written by Jim Jones <jim.jones@uni-muenster.de>
47 *
48 **********************************************************************/
49
51Datum ST_GeomFromMARC21(PG_FUNCTION_ARGS) {
52 GSERIALIZED *geom;
53 LWGEOM *lwgeom;
54 xmlDocPtr xmldoc;
55 text *xml_input;
56 int xml_size;
57 char *xml;
58 xmlNodePtr xmlroot = NULL;
59
60 if (PG_ARGISNULL(0)) PG_RETURN_NULL();
61
62 xml_input = PG_GETARG_TEXT_P(0);
63 xml = text_to_cstring(xml_input);
64 xml_size = VARSIZE_ANY_EXHDR(xml_input);
65
66 xmlInitParser();
67 xmldoc = xmlReadMemory(xml, xml_size, NULL, NULL, 0);
68
69 if (!xmldoc || (xmlroot = xmlDocGetRootElement(xmldoc)) == NULL) {
70 xmlFreeDoc(xmldoc);
71 xmlCleanupParser();
72 lwpgerror("invalid MARC21/XML document.");
73 }
74
75 lwgeom = parse_marc21(xmlroot);
76
77 xmlFreeDoc(xmldoc);
78 xmlCleanupParser();
79
80 if (lwgeom == NULL) {
81
82 //lwgeom_free(lwgeom);
83 PG_RETURN_NULL();
84
85 }
86
87 geom = geometry_serialize(lwgeom);
88
89 lwgeom_free(lwgeom);
90
91 PG_RETURN_POINTER(geom);
92}
93
94static inline bool
95is_xml_element(xmlNodePtr xn, const char *xml_name)
96{
97 const char *colon_pos;
98 const char *node_name;
99
100 /* Not an element node, can't do anything */
101 if (!xn || xn->type != XML_ELEMENT_NODE)
102 return false;
103
104 /* If there's a colon in the element name, */
105 /* move past it before checking for equality with */
106 /* the element name we are looking for */
107 node_name = (const char*)xn->name;
108 colon_pos = strchr(node_name, ':');
109 if (colon_pos)
110 node_name = colon_pos + 1;
111
112 return strcmp(node_name, xml_name) == 0;
113}
114
115static int is_literal_valid(const char *literal) {
116
117 int num_dec_sep;
118 int coord_start;
119 int literal_length;
120
121 if(literal == NULL) return LW_FALSE;
122
123 literal_length = strlen(literal);
124
125 POSTGIS_DEBUGF(2, "is_literal_valid called (%s)", literal);
126
127 if (literal_length < 3) return LW_FALSE;
128
129 coord_start = 0;
130 num_dec_sep = 0;
131
141 if (literal[0] == 'N' || literal[0] == 'E' || literal[0] == 'S' || literal[0] == 'W' || literal[0] == '+' || literal[0] == '-') {
142
143 if (literal_length < 4) {
144 POSTGIS_DEBUGF(3, " invalid literal length (%d): \"%s\"", literal_length, literal);
145 return LW_FALSE;
146 }
147
148 coord_start = 1;
149 }
150
151 for (int j = coord_start; j < literal_length; j++) {
152
153 if (!isdigit(literal[j])) {
154
155
156 if (j < 3) {
157
162 POSTGIS_DEBUGF(3," invalid character '%c' at the degrees section: \"%s\"", literal[j], literal);
163 return LW_FALSE;
164
165 }
166
171 if (literal[j] == '.' || literal[j] == ',') {
172
173 num_dec_sep++;
174
175 if (num_dec_sep > 1) return LW_FALSE;
176
177 } else {
178 POSTGIS_DEBUGF(3, " invalid character '%c' in %d: \"%s\"", literal[j], j, literal);
179 return LW_FALSE;
180
181 }
182
183 }
184
185 }
186
187 POSTGIS_DEBUGF(2, "=> is_literal_valid returns LW_TRUE for \"%s\"", literal);
188 return LW_TRUE;
189
190}
191
192static double parse_geo_literal(char *literal) {
193
206 char *dgr;
207 char *min;
208 char *sec;
209 size_t literal_length;
210
211 char start_character = literal[0];
212 int start_literal = 0;
213 double result = 0.0;
214
215 const size_t numdigits_degrees = 3;
216 const size_t numdigits_minutes = 2;
217 const size_t numdigits_seconds = 2;
218
219 POSTGIS_DEBUGF(2, "parse_geo_literal called (%s)", literal);
220 POSTGIS_DEBUGF(2, " start character: %c", start_character);
221
222 literal_length = strlen(literal);
223
224 if (!isdigit(start_character)) start_literal = 1;
225
226 POSTGIS_DEBUGF(2, " start_literal=%d", start_literal);
227
228 dgr = palloc(sizeof(char)*numdigits_degrees+1);
229 snprintf(dgr, numdigits_degrees+1, "%s", &literal[start_literal]);
230
231 if (strchr(literal, '.') == NULL && strchr(literal, ',') == NULL) {
232
244 POSTGIS_DEBUG(2, " lat/lon integer coordinates detected");
245 POSTGIS_DEBUGF(2, " parsed degrees (lon/lat): %s", dgr);
246
247 /* literal contain at least degrees.
248 * minutes and seconds are optional */
249 result = atof(dgr);
250
251 /* checks if the literal contains minutes */
252 if (literal_length > (start_literal + numdigits_degrees)) {
253
254 min = palloc(sizeof(char)*numdigits_minutes+1);
255 snprintf(min, numdigits_minutes+1, "%s", &literal[start_literal+numdigits_degrees]);
256 POSTGIS_DEBUGF(2, " parsed minutes (lon/lat): %s", min);
257 result = result + atof(min) / 60;
258 pfree(min);
259
260 /* checks if the literal contains seconds */
261 if (literal_length >= (start_literal + numdigits_degrees + numdigits_minutes)) {
262
263 sec = palloc(sizeof(char)*numdigits_seconds+1);
264 snprintf(sec, numdigits_seconds+1, "%s", &literal[start_literal+numdigits_degrees+numdigits_minutes]);
265 POSTGIS_DEBUGF(2, " parsed seconds (lon/lat): %s", sec);
266
267 result = result + atof(sec) / 3600;
268 pfree(sec);
269
270 }
271
272
273 }
274
275
276 } else {
277
278 POSTGIS_DEBUG(2, " decimal coordinates detected");
279
280 if (strchr(literal, ',')) {
281
282 /* changes the literal decimal sign from comma to period to avoid problems with atof.
283 * from the docs "In MARC21/XML coordinates, the decimal sign may be either a period or a comma." */
284
285 literal[literal_length-strlen(strchr(literal, ','))]='.';
286 POSTGIS_DEBUGF(2, " decimal separator changed to '.': %s",literal);
287
288 }
289
290 /* checks if the literal is encoded in decimal degrees */
291 if (literal[start_literal + numdigits_degrees] == '.') {
292
302 char *dec = palloc(sizeof(char)*literal_length+1);
303 snprintf(dec, literal_length+1, "%s", &literal[start_literal]);
304 result = atof(dec);
305
306 POSTGIS_DEBUGF(2, " parsed decimal degrees: %s", dec);
307 pfree(dec);
308
309 /* checks if the literal is encoded in decimal minutes */
310 } else if (literal[start_literal + numdigits_degrees + numdigits_minutes] == '.') {
311
321 size_t len_decimal_minutes = literal_length - (start_literal + numdigits_degrees);
322
323 min = palloc(sizeof(char)*len_decimal_minutes+1);
324 snprintf(min, len_decimal_minutes+1, "%s", &literal[start_literal + numdigits_degrees]);
325
326 POSTGIS_DEBUGF(2, " parsed degrees: %s", dgr);
327 POSTGIS_DEBUGF(2, " parsed decimal minutes: %s", min);
328
329 result = atof(dgr) + (atof(min) / 60);
330
331 pfree(min);
332
333 /* checks if the literal is encoded in decimal seconds */
334 } else if (literal[start_literal + numdigits_degrees + numdigits_minutes + numdigits_seconds] == '.') {
335
346 size_t len_decimal_seconds = literal_length - (start_literal + numdigits_degrees + numdigits_minutes);
347
348 min = palloc(sizeof(char)*numdigits_minutes+1);
349 snprintf(min, numdigits_minutes+1, "%s", &literal[start_literal + numdigits_degrees]);
350
351 sec = palloc(sizeof(char)*len_decimal_seconds+1);
352 snprintf(sec, len_decimal_seconds+1, "%s", &literal[start_literal + numdigits_degrees + numdigits_minutes]);
353
354 result = atof(dgr) + (atof(min) / 60) + (atof(sec) / 3600);
355
356 POSTGIS_DEBUGF(2, " parsed degrees: %s", dgr);
357 POSTGIS_DEBUGF(2, " parsed minutes: %s", min);
358 POSTGIS_DEBUGF(2, " parsed decimal seconds: %s", sec);
359 pfree(min);
360 pfree(sec);
361
362 }
363
364 }
365
371 pfree(dgr);
372
373 if (start_character == 'S' || start_character == 'W' || start_character == '-') {
374
375 POSTGIS_DEBUGF(2, " switching sign due to start character: '%c'", start_character);
376 result = -result;
377
378 }
379
380 return result;
381}
382
383static LWGEOM*
384parse_marc21(xmlNodePtr xnode) {
385
386 int ngeoms;
387 int i;
388 xmlNodePtr datafield;
389 xmlNodePtr subfield;
390 LWGEOM *result;
391 LWGEOM **lwgeoms = (LWGEOM**) lwalloc(sizeof(LWGEOM*));
392 uint8_t geometry_type;
393 uint8_t result_type;
394 char *code;
395 char *literal;
396
397 POSTGIS_DEBUGF(2, "parse_marc21 called: root '<%s>'", xnode->name);
398
404 if (!is_xml_element(xnode, "record"))
405 lwpgerror("invalid MARC21/XML document. Root element <record> expected but <%s> found.",xnode->name);
406
407 result_type = 0;
408 ngeoms = 0;
409
410 for (datafield = xnode->children; datafield != NULL; datafield = datafield->next) {
411
412 char *lw = NULL;
413 char *le = NULL;
414 char *ln = NULL;
415 char *ls = NULL;
416
417 if (datafield->type != XML_ELEMENT_NODE) continue;
418
419 if (!is_xml_element(datafield, "datafield") || xmlStrcmp(xmlGetProp(datafield, (xmlChar*) "tag"),(xmlChar*) "034") != 0) continue;
420
421 POSTGIS_DEBUG(3, " datafield found");
422
423 for (subfield = datafield->children; subfield != NULL; subfield = subfield->next) {
424
425 if (subfield->type != XML_ELEMENT_NODE) continue;
426 if (!is_xml_element(subfield, "subfield"))
427 continue;
428
429 code = (char*) xmlGetProp(subfield, (xmlChar*) "code");
430
431 if ((strcmp(code, "d") != 0 &&
432 strcmp(code, "e") != 0 &&
433 strcmp(code, "f") != 0 &&
434 strcmp(code, "g")) != 0)
435 continue;
436
437 literal = (char*) xmlNodeGetContent(subfield);
438
439 POSTGIS_DEBUGF(3, " subfield code '%s': %s", code, literal);
440
441 if (is_literal_valid(literal) == LW_TRUE) {
442
443 if (strcmp(code, "d") == 0) lw = literal;
444 else if (strcmp(code, "e") == 0) le = literal;
445 else if (strcmp(code, "f") == 0) ln = literal;
446 else if (strcmp(code, "g") == 0) ls = literal;
447
448 } else {
449
450 lwpgerror("parse error - invalid literal at 034$%s: \"%s\"", code, literal);
451
452 }
453
454 }
455
456 xmlFreeNode(subfield);
457
458 if (lw && le && ln && ls) {
459
460 double w = parse_geo_literal(lw);
461 double e = parse_geo_literal(le);
462 double n = parse_geo_literal(ln);
463 double s = parse_geo_literal(ls);
464 geometry_type = 0;
465
466 if (ngeoms > 0) lwgeoms = (LWGEOM**)
467 lwrealloc(lwgeoms, sizeof(LWGEOM*) * (ngeoms + 1));
468
469 if (fabs(w - e) < 0.0000001f && fabs(n - s) < 0.0000001f) {
470
477 lwgeoms[ngeoms] = (LWGEOM*) lwpoint_make2d(SRID_UNKNOWN, w, s);
478 geometry_type = MULTIPOINTTYPE;
479
480 } else {
481
482 lwgeoms[ngeoms] = (LWGEOM*) lwpoly_construct_envelope(SRID_UNKNOWN, w, n, e, s);
483 geometry_type = MULTIPOLYGONTYPE;
484
485 }
486
487 if (ngeoms && result_type != geometry_type) {
488 result_type = COLLECTIONTYPE;
489 } else {
490 result_type = geometry_type;
491 }
492
493 ngeoms++;
494
495 } else {
496
497 if (lw || le || ln || ls) {
498
499 lwpgerror("parse error - the Coded Cartographic Mathematical Data (datafield:034) in the given MARC21/XML is incomplete. Coordinates for subfields \"$d\",\"$e\",\"$f\" and \"$g\" are expected.");
500 }
501
502 }
503
504 }
505
506 POSTGIS_DEBUG(5, " xmlFreeNode(datafield)");
507 xmlFreeNode(datafield);
508
509 if (ngeoms == 1) {
510
511 POSTGIS_DEBUGF(2, "=> parse_marc21 returns single geometry: %s",lwtype_name(lwgeom_get_type(lwgeoms[0])));
512 lwgeom_force_clockwise(lwgeoms[0]);
513 return lwgeoms[0];
514
515 } else if (ngeoms > 1) {
516
518
519 for (i = 0; i < ngeoms; i++) {
520
521 POSTGIS_DEBUGF(3, " adding geometry to result set: %s",lwtype_name(lwgeom_get_type(lwgeoms[i])));
522 lwgeom_force_clockwise(lwgeoms[i]);
524
525 }
526
527 POSTGIS_DEBUGF(2, "=> parse_marc21 returns a collection: %s", lwtype_name(lwgeom_get_type(result)));
528 return result;
529
530 }
531
535 POSTGIS_DEBUG(2, "=> parse_marc21 returns NULL");
536 return NULL;
537
538}
char * s
Definition cu_in_wkt.c:23
static char * w
Definition cu_out_twkb.c:25
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition cu_print.c:267
const char * lwtype_name(uint8_t type)
Return the type name string associated with a type number (e.g.
Definition lwutil.c:216
#define LW_FALSE
Definition liblwgeom.h:94
#define COLLECTIONTYPE
Definition liblwgeom.h:108
void * lwrealloc(void *mem, size_t size)
Definition lwutil.c:242
void lwgeom_free(LWGEOM *geom)
Definition lwgeom.c:1246
#define MULTIPOINTTYPE
Definition liblwgeom.h:105
LWPOLY * lwpoly_construct_envelope(int32_t srid, double x1, double y1, double x2, double y2)
Definition lwpoly.c:98
void * lwalloc(size_t size)
Definition lwutil.c:227
#define MULTIPOLYGONTYPE
Definition liblwgeom.h:107
LWPOINT * lwpoint_make2d(int32_t srid, double x, double y)
Definition lwpoint.c:163
void lwgeom_force_clockwise(LWGEOM *lwgeom)
Definition lwgeom.c:73
LWCOLLECTION * lwcollection_construct_empty(uint8_t type, int32_t srid, char hasz, char hasm)
#define LW_TRUE
Return types for functions with status returns.
Definition liblwgeom.h:93
LWCOLLECTION * lwcollection_add_lwgeom(LWCOLLECTION *col, const LWGEOM *geom)
Appends geom to the collection managed by col.
#define SRID_UNKNOWN
Unknown SRID value.
Definition liblwgeom.h:215
This library is the generic geometry handling section of PostGIS.
static bool is_xml_element(xmlNodePtr xn, const char *xml_name)
PG_FUNCTION_INFO_V1(ST_GeomFromMARC21)
static LWGEOM * parse_marc21(xmlNodePtr xnode)
Datum ST_GeomFromMARC21(PG_FUNCTION_ARGS)
static double parse_geo_literal(char *literal)
static int is_literal_valid(const char *literal)
static uint32_t lwgeom_get_type(const LWGEOM *geom)
Return LWTYPE number.
Definition lwinline.h:141