PostGIS  3.4.0dev-r@@SVN_REVISION@@
shpcommon.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * Copyright (C) 2014 Sandro Santilli <strk@kbt.io>
7  * Copyright (C) 2010 Mark Cave-Ayland <mark.cave-ayland@siriusit.co.uk>
8  *
9  * This is free software; you can redistribute and/or modify it under
10  * the terms of the GNU General Public Licence. See the COPYING file.
11  *
12  **********************************************************************/
13 
14 /* This file contains functions that are shared between the loader and dumper */
15 
16 #ifndef _GNU_SOURCE
17 # define _GNU_SOURCE
18 #endif
19 
20 #include <stdio.h>
21 #include <string.h>
22 #include <stdlib.h>
23 
24 #include "shpcommon.h"
25 
26 typedef struct
27 {
28  int ldid;
29  int cpg;
30  char *desc;
31  char *iconv;
32  char *pg;
34 
35 static int num_code_pages = 60;
36 
37 /* http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM */
38 /* http://resources.arcgis.com/fr/content/kbase?fa=articleShow&d=21106 */
39 
41  {0x01, 437, "U.S. MS-DOS", "CP437",""},
42  {0x02, 850, "International MS-DOS", "CP850",""},
43  {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
44  {0x08, 865, "Danish OEM", "CP865",""},
45  {0x09, 437, "Dutch OEM", "CP437",""},
46  {0x0A, 850, "Dutch OEM*", "CP850",""},
47  {0x0B, 437, "Finnish OEM", "CP437",""},
48  {0x0D, 437, "French OEM", "CP437",""},
49  {0x0E, 850, "French OEM*", "CP850",""},
50  {0x0F, 437, "German OEM", "CP437",""},
51  {0x10, 850, "German OEM*", "CP850",""},
52  {0x11, 437, "Italian OEM", "CP437",""},
53  {0x12, 850, "Italian OEM*", "CP850",""},
54  {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
55  {0x14, 850, "Spanish OEM*", "CP850",""},
56  {0x15, 437, "Swedish OEM", "CP437",""},
57  {0x16, 850, "Swedish OEM*", "CP850",""},
58  {0x17, 865, "Norwegian OEM", "CP865",""},
59  {0x18, 437, "Spanish OEM", "CP865",""},
60  {0x19, 437, "English OEM (Britain)", "CP437",""},
61  {0x1A, 850, "English OEM (Britain)*", "CP850",""},
62  {0x1B, 437, "English OEM (U.S.)", "CP437",""},
63  {0x1C, 863, "French OEM (Canada)", "CP863",""},
64  {0x1D, 850, "French OEM*", "CP850",""},
65  {0x1F, 852, "Czech OEM", "CP852",""},
66  {0x22, 852, "Hungarian OEM", "CP852",""},
67  {0x23, 852, "Polish OEM", "CP852",""},
68  {0x24, 860, "Portuguese OEM", "CP860",""},
69  {0x25, 850, "Portuguese OEM*", "CP850",""},
70  {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
71  {0x37, 850, "English OEM (U.S.)*", "CP850",""},
72  {0x40, 852, "Romanian OEM", "CP852",""},
73  {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
74  {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
75  {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
76  {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
77  {0x57, 1252, "ANSI", "WINDOWS-1252",""},
78  {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
79  {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
80  {0x64, 852, "Eastern European MS-DOS", "CP852",""},
81  {0x65, 866, "Russian MS-DOS", "CP866",""},
82  {0x66, 865, "Nordic MS-DOS", "CP865",""},
83  {0x67, 861, "Icelandic MS-DOS", "",""},
84  {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
85  {0x6B, 857, "Turkish MS-DOS", "CP857",""},
86  {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
87  {0x78, 950, "Taiwan Big 5", "CP950",""},
88  {0x79, 949, "Hangul (Wansung)", "CP949",""},
89  {0x7A, 936, "PRC GBK", "CP936","GBK"},
90  {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
91  {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
92  {0x86, 737, "Greek OEM", "CP737",""},
93  {0x87, 852, "Slovenian OEM", "CP852",""},
94  {0x88, 857, "Turkish OEM", "CP857",""},
95  {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
96  {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
97  {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
98  {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
99  {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
100  {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
101 };
102 
103 
104 
105 
106 
112 char *
114 {
115  /*
116  * Escape apostrophes and backslashes:
117  * ' -> \'
118  * \ -> \\
119  *
120  * 1. find # of characters
121  * 2. make new string
122  */
123 
124  char *result;
125  char *ptr, *optr;
126  int toescape = 0;
127  size_t size;
128 
129  ptr = str;
130 
131  /* Count how many characters we need to escape so we know the size of the string we need to return */
132  while (*ptr)
133  {
134  if (*ptr == '\'' || *ptr == '\\')
135  toescape++;
136 
137  ptr++;
138  }
139 
140  /* If we don't have to escape anything, simply return the input pointer */
141  if (toescape == 0)
142  return str;
143 
144  size = ptr - str + toescape + 1;
145  result = calloc(1, size);
146  optr = result;
147  ptr = str;
148 
149  while (*ptr)
150  {
151  if (*ptr == '\'' || *ptr == '\\')
152  *optr++ = '\\';
153 
154  *optr++ = *ptr++;
155  }
156 
157  *optr = '\0';
158 
159  return result;
160 }
161 
162 void
164 {
165  map->size = 0;
166  map->pgfieldnames = NULL;
167  map->dbffieldnames = NULL;
168 }
169 
170 void
172 {
173  int i;
174  if (map != NULL){
175  if (map->size)
176  {
177  for (i = 0; i < map->size; i++)
178  {
179  if (map->pgfieldnames[i]) free(map->pgfieldnames[i]);
180  if (map->dbffieldnames[i]) free(map->dbffieldnames[i]);
181  }
182  free(map->pgfieldnames);
183  free(map->dbffieldnames);
184  }
185  }
186 }
187 
188 const char *
189 colmap_dbf_by_pg(colmap *map, const char *pgname)
190 {
191  int i;
192  for (i=0; i<map->size; i++)
193  {
194  if (!strcasecmp(map->pgfieldnames[i], pgname))
195  {
196  return map->dbffieldnames[i];
197  }
198  }
199  return NULL;
200 }
201 
202 const char *
203 colmap_pg_by_dbf(colmap *map, const char *dbfname)
204 {
205  int i;
206  for (i=0; i<map->size; i++)
207  {
208  if (!strcasecmp(map->dbffieldnames[i], dbfname))
209  {
210  return map->pgfieldnames[i];
211  }
212  }
213  return NULL;
214 }
215 
216 int
217 colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
218 {
219  FILE *fptr;
220  char linebuffer[1024];
221  char *tmpstr;
222  int curmapsize, fieldnamesize;
223 
224  /* Read column map file and load the colmap_dbffieldnames
225  * and colmap_pgfieldnames arrays */
226  fptr = fopen(filename, "r");
227  if (!fptr)
228  {
229  /* Return an error */
230  snprintf(errbuf, errbuflen, _("ERROR: Unable to open column map file %s"),
231  filename);
232  return 0;
233  }
234 
235  /* First count how many columns we have... */
236  while (fgets(linebuffer, 1024, fptr) != NULL) ++map->size;
237 
238  /* Now we know the final size, allocate the arrays and load the data */
239  fseek(fptr, 0, SEEK_SET);
240  map->pgfieldnames = (char **)malloc(sizeof(char *) * map->size);
241  map->dbffieldnames = (char **)malloc(sizeof(char *) * map->size);
242 
243  /* Read in a line at a time... */
244  curmapsize = 0;
245  while (fgets(linebuffer, 1024, fptr) != NULL)
246  {
247  /* Split into two separate strings: pgfieldname and dbffieldname */
248  /* First locate end of first column (pgfieldname) */
249  fieldnamesize = strcspn(linebuffer, "\t\n ");
250  tmpstr = linebuffer;
251 
252  /* Allocate memory and copy the string ensuring it is terminated */
253  map->pgfieldnames[curmapsize] = malloc(fieldnamesize + 1);
254  strncpy(map->pgfieldnames[curmapsize], tmpstr, fieldnamesize);
255  map->pgfieldnames[curmapsize][fieldnamesize] = '\0';
256 
257  /* Now swallow up any whitespace */
258  tmpstr = linebuffer + fieldnamesize;
259  tmpstr += strspn(tmpstr, "\t\n ");
260 
261  /* Finally locate end of second column (dbffieldname) */
262  fieldnamesize = strcspn(tmpstr, "\t\n ");
263 
264  /* Allocate memory and copy the string ensuring it is terminated */
265  map->dbffieldnames[curmapsize] = malloc(fieldnamesize + 1);
266  strncpy(map->dbffieldnames[curmapsize], tmpstr, fieldnamesize);
267  map->dbffieldnames[curmapsize][fieldnamesize] = '\0';
268 
269  /* Error out if the dbffieldname is > 10 chars */
270  if (strlen(map->dbffieldnames[curmapsize]) > 10)
271  {
272  snprintf(errbuf, errbuflen, _("ERROR: column map file specifies a DBF field name \"%s\" which is longer than 10 characters"), map->dbffieldnames[curmapsize]);
273  return 0;
274  }
275 
276  ++curmapsize;
277  }
278 
279  fclose(fptr);
280 
281  /* Done; return success */
282  return 1;
283 }
284 
285 /*
286 * Code page info will come out of dbfopen as either a bare codepage number
287 * (e.g. 1256) or as "LDID/1234" from the DBF hreader. We want to look up
288 * the equivalent iconv encoding string so we can use iconv to transcode
289 * the data into UTF8
290 */
291 char *
292 codepage2encoding(const char *cpg)
293 {
294  int cpglen;
295  int is_ldid = 0;
296  int num, i;
297 
298  /* Do nothing on nothing. */
299  if ( ! cpg ) return NULL;
300 
301  /* Is this an LDID string? */
302  /* If so, note it and move past the "LDID/" tag */
303  cpglen = strlen(cpg);
304  if ( strstr(cpg, "LDID/") )
305  {
306  if ( cpglen > 5 )
307  {
308  cpg += 5;
309  is_ldid = 1;
310  }
311  else
312  {
313  return NULL;
314  }
315  }
316 
317  /* Read the number */
318  num = atoi(cpg);
319 
320  /* Can we find this number in our lookup table? */
321  for ( i = is_ldid ; i < num_code_pages; i++ )
322  {
323  if ( is_ldid )
324  {
325  if ( code_pages[i].ldid == num )
326  return strdup(code_pages[i].iconv);
327  }
328  else
329  {
330  if ( code_pages[i].cpg == num )
331  return strdup(code_pages[i].iconv);
332  }
333  }
334 
335  /* Didn't find a matching entry */
336  return NULL;
337 
338 }
339 
340 /*
341 * In the case where data is coming out of the database in some wierd encoding
342 * we want to look up the appropriate code page entry to feed to DBFCreateEx
343 *
344 * Return null on error (cannot allocate memory)
345 */
346 char *
347 encoding2codepage(const char *encoding)
348 {
349  int i;
350  for ( i = 0; i < num_code_pages; i++ )
351  {
352  if ( strcasecmp(encoding, code_pages[i].pg) == 0 )
353  {
354  if ( code_pages[i].ldid == 0xFF )
355  {
356  return strdup("UTF-8");
357  }
358  else
359  {
360  char *codepage = NULL;
361  int ret = asprintf(&codepage, "LDID/%d", code_pages[i].ldid);
362  if ( ret == -1 ) return NULL; /* return null on error */
363  return codepage;
364  }
365  }
366  }
367 
368  /* OK, we give up, pretend it's UTF8 */
369  return strdup("UTF-8");
370 }
char result[OUT_DOUBLE_BUFFER_SIZE]
Definition: cu_print.c:262
#define str(s)
void * malloc(YYSIZE_T)
void free(void *)
char * encoding2codepage(const char *encoding)
Definition: shpcommon.c:347
char * escape_connection_string(char *str)
Escape strings that are to be used as part of a PostgreSQL connection string.
Definition: shpcommon.c:113
static int num_code_pages
Definition: shpcommon.c:35
static code_page_entry code_pages[]
Definition: shpcommon.c:40
int colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
Read the content of filename into a symbol map.
Definition: shpcommon.c:217
void colmap_init(colmap *map)
Definition: shpcommon.c:163
const char * colmap_dbf_by_pg(colmap *map, const char *pgname)
Definition: shpcommon.c:189
const char * colmap_pg_by_dbf(colmap *map, const char *dbfname)
Definition: shpcommon.c:203
void colmap_clean(colmap *map)
Definition: shpcommon.c:171
char * codepage2encoding(const char *cpg)
Definition: shpcommon.c:292
#define _(String)
Definition: shpcommon.h:24
char * pg
Definition: shpcommon.c:32
char * desc
Definition: shpcommon.c:30
int ldid
Definition: shpcommon.c:28
char * iconv
Definition: shpcommon.c:31
int cpg
Definition: shpcommon.c:29
Definition: shpcommon.c:27
char ** pgfieldnames
Definition: shpcommon.h:55
int size
Definition: shpcommon.h:61
char ** dbffieldnames
Definition: shpcommon.h:58