PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
shpcommon.c
Go to the documentation of this file.
1/**********************************************************************
2 *
3 * PostGIS - Spatial Types for PostgreSQL
4 * http://postgis.net
5 *
6 * Copyright (C) 2014 Sandro Santilli <strk@kbt.io>
7 * Copyright (C) 2010 Mark Cave-Ayland <mark.cave-ayland@siriusit.co.uk>
8 *
9 * This is free software; you can redistribute and/or modify it under
10 * the terms of the GNU General Public Licence. See the COPYING file.
11 *
12 **********************************************************************/
13
14/* This file contains functions that are shared between the loader and dumper */
15
16#ifndef _GNU_SOURCE
17# define _GNU_SOURCE
18#endif
19
20#include <stdio.h>
21#include <string.h>
22#include <stdlib.h>
23
24#include "shpcommon.h"
25
26typedef struct
27{
28 int ldid;
29 int cpg;
30 char *desc;
31 char *iconv;
32 char *pg;
34
35static int num_code_pages = 60;
36
37/* http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM */
38/* http://resources.arcgis.com/fr/content/kbase?fa=articleShow&d=21106 */
39
41 {0x01, 437, "U.S. MS-DOS", "CP437",""},
42 {0x02, 850, "International MS-DOS", "CP850",""},
43 {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
44 {0x08, 865, "Danish OEM", "CP865",""},
45 {0x09, 437, "Dutch OEM", "CP437",""},
46 {0x0A, 850, "Dutch OEM*", "CP850",""},
47 {0x0B, 437, "Finnish OEM", "CP437",""},
48 {0x0D, 437, "French OEM", "CP437",""},
49 {0x0E, 850, "French OEM*", "CP850",""},
50 {0x0F, 437, "German OEM", "CP437",""},
51 {0x10, 850, "German OEM*", "CP850",""},
52 {0x11, 437, "Italian OEM", "CP437",""},
53 {0x12, 850, "Italian OEM*", "CP850",""},
54 {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
55 {0x14, 850, "Spanish OEM*", "CP850",""},
56 {0x15, 437, "Swedish OEM", "CP437",""},
57 {0x16, 850, "Swedish OEM*", "CP850",""},
58 {0x17, 865, "Norwegian OEM", "CP865",""},
59 {0x18, 437, "Spanish OEM", "CP865",""},
60 {0x19, 437, "English OEM (Britain)", "CP437",""},
61 {0x1A, 850, "English OEM (Britain)*", "CP850",""},
62 {0x1B, 437, "English OEM (U.S.)", "CP437",""},
63 {0x1C, 863, "French OEM (Canada)", "CP863",""},
64 {0x1D, 850, "French OEM*", "CP850",""},
65 {0x1F, 852, "Czech OEM", "CP852",""},
66 {0x22, 852, "Hungarian OEM", "CP852",""},
67 {0x23, 852, "Polish OEM", "CP852",""},
68 {0x24, 860, "Portuguese OEM", "CP860",""},
69 {0x25, 850, "Portuguese OEM*", "CP850",""},
70 {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
71 {0x37, 850, "English OEM (U.S.)*", "CP850",""},
72 {0x40, 852, "Romanian OEM", "CP852",""},
73 {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
74 {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
75 {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
76 {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
77 {0x57, 1252, "ANSI", "WINDOWS-1252",""},
78 {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
79 {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
80 {0x64, 852, "Eastern European MS-DOS", "CP852",""},
81 {0x65, 866, "Russian MS-DOS", "CP866",""},
82 {0x66, 865, "Nordic MS-DOS", "CP865",""},
83 {0x67, 861, "Icelandic MS-DOS", "",""},
84 {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
85 {0x6B, 857, "Turkish MS-DOS", "CP857",""},
86 {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
87 {0x78, 950, "Taiwan Big 5", "CP950",""},
88 {0x79, 949, "Hangul (Wansung)", "CP949",""},
89 {0x7A, 936, "PRC GBK", "CP936","GBK"},
90 {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
91 {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
92 {0x86, 737, "Greek OEM", "CP737",""},
93 {0x87, 852, "Slovenian OEM", "CP852",""},
94 {0x88, 857, "Turkish OEM", "CP857",""},
95 {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
96 {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
97 {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
98 {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
99 {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
100 {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
101};
102
103
104
105
106
112char *
114{
115 /*
116 * Escape apostrophes and backslashes:
117 * ' -> \'
118 * \ -> \\
119 *
120 * 1. find # of characters
121 * 2. make new string
122 */
123
124 char *result;
125 char *ptr, *optr;
126 int toescape = 0;
127 size_t size;
128
129 ptr = str;
130
131 /* Count how many characters we need to escape so we know the size of the string we need to return */
132 while (*ptr)
133 {
134 if (*ptr == '\'' || *ptr == '\\')
135 toescape++;
136
137 ptr++;
138 }
139
140 /* If we don't have to escape anything, simply return the input pointer */
141 if (toescape == 0)
142 return str;
143
144 size = ptr - str + toescape + 1;
145 result = calloc(1, size);
146 optr = result;
147 ptr = str;
148
149 while (*ptr)
150 {
151 if (*ptr == '\'' || *ptr == '\\')
152 *optr++ = '\\';
153
154 *optr++ = *ptr++;
155 }
156
157 *optr = '\0';
158
159 return result;
160}
161
162void
164{
165 map->size = 0;
166 map->pgfieldnames = NULL;
167 map->dbffieldnames = NULL;
168}
169
170void
172{
173 int i;
174 if (map != NULL){
175 if (map->size)
176 {
177 for (i = 0; i < map->size; i++)
178 {
179 if (map->pgfieldnames[i]) free(map->pgfieldnames[i]);
180 if (map->dbffieldnames[i]) free(map->dbffieldnames[i]);
181 }
182 free(map->pgfieldnames);
183 free(map->dbffieldnames);
184 }
185 }
186}
187
188const char *
189colmap_dbf_by_pg(colmap *map, const char *pgname)
190{
191 int i;
192 for (i=0; i<map->size; i++)
193 {
194 if (!strcasecmp(map->pgfieldnames[i], pgname))
195 {
196 return map->dbffieldnames[i];
197 }
198 }
199 return NULL;
200}
201
202const char *
203colmap_pg_by_dbf(colmap *map, const char *dbfname)
204{
205 int i;
206 for (i=0; i<map->size; i++)
207 {
208 if (!strcasecmp(map->dbffieldnames[i], dbfname))
209 {
210 return map->pgfieldnames[i];
211 }
212 }
213 return NULL;
214}
215
216int
217colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
218{
219 FILE *fptr;
220 char linebuffer[1024];
221 char *tmpstr;
222 int curmapsize, fieldnamesize;
223
224 /* Read column map file and load the colmap_dbffieldnames
225 * and colmap_pgfieldnames arrays */
226 fptr = fopen(filename, "r");
227 if (!fptr)
228 {
229 /* Return an error */
230 snprintf(errbuf, errbuflen, _("ERROR: Unable to open column map file %s"),
231 filename);
232 return 0;
233 }
234
235 /* First count how many columns we have... */
236 while (fgets(linebuffer, 1024, fptr) != NULL) ++map->size;
237
238 /* Now we know the final size, allocate the arrays and load the data */
239 fseek(fptr, 0, SEEK_SET);
240 map->pgfieldnames = (char **)malloc(sizeof(char *) * map->size);
241 map->dbffieldnames = (char **)malloc(sizeof(char *) * map->size);
242
243 /* Read in a line at a time... */
244 curmapsize = 0;
245 while (fgets(linebuffer, 1024, fptr) != NULL)
246 {
247 /* Split into two separate strings: pgfieldname and dbffieldname */
248 /* First locate end of first column (pgfieldname) */
249 fieldnamesize = strcspn(linebuffer, "\t\n ");
250 tmpstr = linebuffer;
251
252 /* Allocate memory and copy the string ensuring it is terminated */
253 map->pgfieldnames[curmapsize] = malloc(fieldnamesize + 1);
254 strncpy(map->pgfieldnames[curmapsize], tmpstr, fieldnamesize);
255 map->pgfieldnames[curmapsize][fieldnamesize] = '\0';
256
257 /* Now swallow up any whitespace */
258 tmpstr = linebuffer + fieldnamesize;
259 tmpstr += strspn(tmpstr, "\t\n ");
260
261 /* Finally locate end of second column (dbffieldname) */
262 fieldnamesize = strcspn(tmpstr, "\t\n ");
263
264 /* Allocate memory and copy the string ensuring it is terminated */
265 map->dbffieldnames[curmapsize] = malloc(fieldnamesize + 1);
266 strncpy(map->dbffieldnames[curmapsize], tmpstr, fieldnamesize);
267 map->dbffieldnames[curmapsize][fieldnamesize] = '\0';
268
269 /* Error out if the dbffieldname is > 10 chars */
270 if (strlen(map->dbffieldnames[curmapsize]) > 10)
271 {{
272 int ret = snprintf(errbuf, errbuflen,
273 _("ERROR: column map file specifies a DBF field name \"%s\" which is longer than 10 characters"),
274 map->dbffieldnames[curmapsize]);
275 if ( ret < 0 ) {
276 /* output error - TODO: report via perror? */
277 fclose(fptr);
278 return 0;
279 }
280 if ( errbuflen <= (size_t)ret ) {
281 /* output truncated */
282 errbuf[errbuflen-1] = '\0';
283 }
284 }}
285
286 ++curmapsize;
287 }
288
289 fclose(fptr);
290
291 /* Done; return success */
292 return 1;
293}
294
295/*
296* Code page info will come out of dbfopen as either a bare codepage number
297* (e.g. 1256) or as "LDID/1234" from the DBF hreader. We want to look up
298* the equivalent iconv encoding string so we can use iconv to transcode
299* the data into UTF8
300*/
301char *
302codepage2encoding(const char *cpg)
303{
304 int cpglen;
305 int is_ldid = 0;
306 int num, i;
307
308 /* Do nothing on nothing. */
309 if ( ! cpg ) return NULL;
310
311 /* Is this an LDID string? */
312 /* If so, note it and move past the "LDID/" tag */
313 cpglen = strlen(cpg);
314 if ( strstr(cpg, "LDID/") )
315 {
316 if ( cpglen > 5 )
317 {
318 cpg += 5;
319 is_ldid = 1;
320 }
321 else
322 {
323 return NULL;
324 }
325 }
326
327 /* Read the number */
328 num = atoi(cpg);
329
330 /* Can we find this number in our lookup table? */
331 for ( i = is_ldid ; i < num_code_pages; i++ )
332 {
333 if ( is_ldid )
334 {
335 if ( code_pages[i].ldid == num )
336 return strdup(code_pages[i].iconv);
337 }
338 else
339 {
340 if ( code_pages[i].cpg == num )
341 return strdup(code_pages[i].iconv);
342 }
343 }
344
345 /* Didn't find a matching entry */
346 return NULL;
347
348}
349
350/*
351* In the case where data is coming out of the database in some weird encoding
352* we want to look up the appropriate code page entry to feed to DBFCreateEx
353*
354* Return null on error (cannot allocate memory)
355*/
356char *
357encoding2codepage(const char *encoding)
358{
359 int i;
360 for ( i = 0; i < num_code_pages; i++ )
361 {
362 if ( strcasecmp(encoding, code_pages[i].pg) == 0 )
363 {
364 if ( code_pages[i].ldid == 0xFF )
365 {
366 return strdup("UTF-8");
367 }
368 else
369 {
370 char *codepage = NULL;
371 int ret = asprintf(&codepage, "LDID/%d", code_pages[i].ldid);
372 if ( ret == -1 ) return NULL; /* return null on error */
373 return codepage;
374 }
375 }
376 }
377
378 /* OK, we give up, pretend it's UTF8 */
379 return strdup("UTF-8");
380}
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition cu_print.c:267
#define str(s)
void * malloc(YYSIZE_T)
void free(void *)
static int num_code_pages
Definition shpcommon.c:35
char * encoding2codepage(const char *encoding)
Definition shpcommon.c:357
static code_page_entry code_pages[]
Definition shpcommon.c:40
int colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
Read the content of filename into a symbol map.
Definition shpcommon.c:217
void colmap_init(colmap *map)
Definition shpcommon.c:163
const char * colmap_dbf_by_pg(colmap *map, const char *pgname)
Definition shpcommon.c:189
char * codepage2encoding(const char *cpg)
Definition shpcommon.c:302
void colmap_clean(colmap *map)
Definition shpcommon.c:171
const char * colmap_pg_by_dbf(colmap *map, const char *dbfname)
Definition shpcommon.c:203
char * escape_connection_string(char *str)
Escape strings that are to be used as part of a PostgreSQL connection string.
Definition shpcommon.c:113
#define _(String)
Definition shpcommon.h:24
char * pg
Definition shpcommon.c:32
char * desc
Definition shpcommon.c:30
int ldid
Definition shpcommon.c:28
char * iconv
Definition shpcommon.c:31
int cpg
Definition shpcommon.c:29
Definition shpcommon.c:27
char ** pgfieldnames
Definition shpcommon.h:55
int size
Definition shpcommon.h:61
char ** dbffieldnames
Definition shpcommon.h:58