PostGIS  2.5.7dev-r@@SVN_REVISION@@
shpcommon.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * Copyright (C) 2014 Sandro Santilli <strk@kbt.io>
7  * Copyright (C) 2010 Mark Cave-Ayland <mark.cave-ayland@siriusit.co.uk>
8  *
9  * This is free software; you can redistribute and/or modify it under
10  * the terms of the GNU General Public Licence. See the COPYING file.
11  *
12  **********************************************************************/
13 
14 /* This file contains functions that are shared between the loader and dumper */
15 
16 #include <stdio.h>
17 #include <string.h>
18 #include <stdlib.h>
19 
20 #include "shpcommon.h"
21 
22 typedef struct
23 {
24  int ldid;
25  int cpg;
26  char *desc;
27  char *iconv;
28  char *pg;
30 
31 static int num_code_pages = 60;
32 
33 /* http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM */
34 /* http://resources.arcgis.com/fr/content/kbase?fa=articleShow&d=21106 */
35 
37  {0x01, 437, "U.S. MS-DOS", "CP437",""},
38  {0x02, 850, "International MS-DOS", "CP850",""},
39  {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
40  {0x08, 865, "Danish OEM", "CP865",""},
41  {0x09, 437, "Dutch OEM", "CP437",""},
42  {0x0A, 850, "Dutch OEM*", "CP850",""},
43  {0x0B, 437, "Finnish OEM", "CP437",""},
44  {0x0D, 437, "French OEM", "CP437",""},
45  {0x0E, 850, "French OEM*", "CP850",""},
46  {0x0F, 437, "German OEM", "CP437",""},
47  {0x10, 850, "German OEM*", "CP850",""},
48  {0x11, 437, "Italian OEM", "CP437",""},
49  {0x12, 850, "Italian OEM*", "CP850",""},
50  {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
51  {0x14, 850, "Spanish OEM*", "CP850",""},
52  {0x15, 437, "Swedish OEM", "CP437",""},
53  {0x16, 850, "Swedish OEM*", "CP850",""},
54  {0x17, 865, "Norwegian OEM", "CP865",""},
55  {0x18, 437, "Spanish OEM", "CP865",""},
56  {0x19, 437, "English OEM (Britain)", "CP437",""},
57  {0x1A, 850, "English OEM (Britain)*", "CP850",""},
58  {0x1B, 437, "English OEM (U.S.)", "CP437",""},
59  {0x1C, 863, "French OEM (Canada)", "CP863",""},
60  {0x1D, 850, "French OEM*", "CP850",""},
61  {0x1F, 852, "Czech OEM", "CP852",""},
62  {0x22, 852, "Hungarian OEM", "CP852",""},
63  {0x23, 852, "Polish OEM", "CP852",""},
64  {0x24, 860, "Portuguese OEM", "CP860",""},
65  {0x25, 850, "Portuguese OEM*", "CP850",""},
66  {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
67  {0x37, 850, "English OEM (U.S.)*", "CP850",""},
68  {0x40, 852, "Romanian OEM", "CP852",""},
69  {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
70  {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
71  {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
72  {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
73  {0x57, 1252, "ANSI", "WINDOWS-1252",""},
74  {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
75  {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
76  {0x64, 852, "Eastern European MS-DOS", "CP852",""},
77  {0x65, 866, "Russian MS-DOS", "CP866",""},
78  {0x66, 865, "Nordic MS-DOS", "CP865",""},
79  {0x67, 861, "Icelandic MS-DOS", "",""},
80  {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
81  {0x6B, 857, "Turkish MS-DOS", "CP857",""},
82  {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
83  {0x78, 950, "Taiwan Big 5", "CP950",""},
84  {0x79, 949, "Hangul (Wansung)", "CP949",""},
85  {0x7A, 936, "PRC GBK", "CP936","GBK"},
86  {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
87  {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
88  {0x86, 737, "Greek OEM", "CP737",""},
89  {0x87, 852, "Slovenian OEM", "CP852",""},
90  {0x88, 857, "Turkish OEM", "CP857",""},
91  {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
92  {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
93  {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
94  {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
95  {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
96  {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
97 };
98 
99 
100 
101 
102 
108 char *
110 {
111  /*
112  * Escape apostrophes and backslashes:
113  * ' -> \'
114  * \ -> \\
115  *
116  * 1. find # of characters
117  * 2. make new string
118  */
119 
120  char *result;
121  char *ptr, *optr;
122  int toescape = 0;
123  size_t size;
124 
125  ptr = str;
126 
127  /* Count how many characters we need to escape so we know the size of the string we need to return */
128  while (*ptr)
129  {
130  if (*ptr == '\'' || *ptr == '\\')
131  toescape++;
132 
133  ptr++;
134  }
135 
136  /* If we don't have to escape anything, simply return the input pointer */
137  if (toescape == 0)
138  return str;
139 
140  size = ptr - str + toescape + 1;
141  result = calloc(1, size);
142  optr = result;
143  ptr = str;
144 
145  while (*ptr)
146  {
147  if (*ptr == '\'' || *ptr == '\\')
148  *optr++ = '\\';
149 
150  *optr++ = *ptr++;
151  }
152 
153  *optr = '\0';
154 
155  return result;
156 }
157 
158 void
160 {
161  map->size = 0;
162  map->pgfieldnames = NULL;
163  map->dbffieldnames = NULL;
164 }
165 
166 void
168 {
169  int i;
170  if (map != NULL){
171  if (map->size)
172  {
173  for (i = 0; i < map->size; i++)
174  {
175  if (map->pgfieldnames[i]) free(map->pgfieldnames[i]);
176  if (map->dbffieldnames[i]) free(map->dbffieldnames[i]);
177  }
178  free(map->pgfieldnames);
179  free(map->dbffieldnames);
180  }
181  }
182 }
183 
184 const char *
185 colmap_dbf_by_pg(colmap *map, const char *pgname)
186 {
187  int i;
188  for (i=0; i<map->size; i++)
189  {
190  if (!strcasecmp(map->pgfieldnames[i], pgname))
191  {
192  return map->dbffieldnames[i];
193  }
194  }
195  return NULL;
196 }
197 
198 const char *
199 colmap_pg_by_dbf(colmap *map, const char *dbfname)
200 {
201  int i;
202  for (i=0; i<map->size; i++)
203  {
204  if (!strcasecmp(map->dbffieldnames[i], dbfname))
205  {
206  return map->pgfieldnames[i];
207  }
208  }
209  return NULL;
210 }
211 
212 int
213 colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
214 {
215  FILE *fptr;
216  char linebuffer[1024];
217  char *tmpstr;
218  int curmapsize, fieldnamesize;
219 
220  /* Read column map file and load the colmap_dbffieldnames
221  * and colmap_pgfieldnames arrays */
222  fptr = fopen(filename, "r");
223  if (!fptr)
224  {
225  /* Return an error */
226  snprintf(errbuf, errbuflen, _("ERROR: Unable to open column map file %s"),
227  filename);
228  return 0;
229  }
230 
231  /* First count how many columns we have... */
232  while (fgets(linebuffer, 1024, fptr) != NULL) ++map->size;
233 
234  /* Now we know the final size, allocate the arrays and load the data */
235  fseek(fptr, 0, SEEK_SET);
236  map->pgfieldnames = (char **)malloc(sizeof(char *) * map->size);
237  map->dbffieldnames = (char **)malloc(sizeof(char *) * map->size);
238 
239  /* Read in a line at a time... */
240  curmapsize = 0;
241  while (fgets(linebuffer, 1024, fptr) != NULL)
242  {
243  /* Split into two separate strings: pgfieldname and dbffieldname */
244  /* First locate end of first column (pgfieldname) */
245  fieldnamesize = strcspn(linebuffer, "\t\n ");
246  tmpstr = linebuffer;
247 
248  /* Allocate memory and copy the string ensuring it is terminated */
249  map->pgfieldnames[curmapsize] = malloc(fieldnamesize + 1);
250  strncpy(map->pgfieldnames[curmapsize], tmpstr, fieldnamesize);
251  map->pgfieldnames[curmapsize][fieldnamesize] = '\0';
252 
253  /* Now swallow up any whitespace */
254  tmpstr = linebuffer + fieldnamesize;
255  tmpstr += strspn(tmpstr, "\t\n ");
256 
257  /* Finally locate end of second column (dbffieldname) */
258  fieldnamesize = strcspn(tmpstr, "\t\n ");
259 
260  /* Allocate memory and copy the string ensuring it is terminated */
261  map->dbffieldnames[curmapsize] = malloc(fieldnamesize + 1);
262  strncpy(map->dbffieldnames[curmapsize], tmpstr, fieldnamesize);
263  map->dbffieldnames[curmapsize][fieldnamesize] = '\0';
264 
265  /* Error out if the dbffieldname is > 10 chars */
266  if (strlen(map->dbffieldnames[curmapsize]) > 10)
267  {
268  snprintf(errbuf, errbuflen, _("ERROR: column map file specifies a DBF field name \"%s\" which is longer than 10 characters"), map->dbffieldnames[curmapsize]);
269  return 0;
270  }
271 
272  ++curmapsize;
273  }
274 
275  fclose(fptr);
276 
277  /* Done; return success */
278  return 1;
279 }
280 
281 /*
282 * Code page info will come out of dbfopen as either a bare codepage number
283 * (e.g. 1256) or as "LDID/1234" from the DBF hreader. We want to look up
284 * the equivalent iconv encoding string so we can use iconv to transcode
285 * the data into UTF8
286 */
287 char *
288 codepage2encoding(const char *cpg)
289 {
290  int cpglen;
291  int is_ldid = 0;
292  int num, i;
293 
294  /* Do nothing on nothing. */
295  if ( ! cpg ) return NULL;
296 
297  /* Is this an LDID string? */
298  /* If so, note it and move past the "LDID/" tag */
299  cpglen = strlen(cpg);
300  if ( strstr(cpg, "LDID/") )
301  {
302  if ( cpglen > 5 )
303  {
304  cpg += 5;
305  is_ldid = 1;
306  }
307  else
308  {
309  return NULL;
310  }
311  }
312 
313  /* Read the number */
314  num = atoi(cpg);
315 
316  /* Can we find this number in our lookup table? */
317  for ( i = is_ldid ; i < num_code_pages; i++ )
318  {
319  if ( is_ldid )
320  {
321  if ( code_pages[i].ldid == num )
322  return strdup(code_pages[i].iconv);
323  }
324  else
325  {
326  if ( code_pages[i].cpg == num )
327  return strdup(code_pages[i].iconv);
328  }
329  }
330 
331  /* Didn't find a matching entry */
332  return NULL;
333 
334 }
335 
336 /*
337 * In the case where data is coming out of the database in some wierd encoding
338 * we want to look up the appropriate code page entry to feed to DBFCreateEx
339 *
340 * Return null on error (cannot allocate memory)
341 */
342 char *
343 encoding2codepage(const char *encoding)
344 {
345  int i;
346  for ( i = 0; i < num_code_pages; i++ )
347  {
348  if ( strcasecmp(encoding, code_pages[i].pg) == 0 )
349  {
350  if ( code_pages[i].ldid == 0xFF )
351  {
352  return strdup("UTF-8");
353  }
354  else
355  {
356  char *codepage = NULL;
357  int ret = asprintf(&codepage, "LDID/%d", code_pages[i].ldid);
358  if ( ret == -1 ) return NULL; /* return null on error */
359  return codepage;
360  }
361  }
362  }
363 
364  /* OK, we give up, pretend it's UTF8 */
365  return strdup("UTF-8");
366 }
void * malloc(YYSIZE_T)
void free(void *)
char * encoding2codepage(const char *encoding)
Definition: shpcommon.c:343
char * escape_connection_string(char *str)
Escape strings that are to be used as part of a PostgreSQL connection string.
Definition: shpcommon.c:109
static int num_code_pages
Definition: shpcommon.c:31
static code_page_entry code_pages[]
Definition: shpcommon.c:36
int colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
Read the content of filename into a symbol map.
Definition: shpcommon.c:213
void colmap_init(colmap *map)
Definition: shpcommon.c:159
const char * colmap_dbf_by_pg(colmap *map, const char *pgname)
Definition: shpcommon.c:185
const char * colmap_pg_by_dbf(colmap *map, const char *dbfname)
Definition: shpcommon.c:199
void colmap_clean(colmap *map)
Definition: shpcommon.c:167
char * codepage2encoding(const char *cpg)
Definition: shpcommon.c:288
#define _(String)
Definition: shpcommon.h:24
char * pg
Definition: shpcommon.c:28
char * desc
Definition: shpcommon.c:26
int ldid
Definition: shpcommon.c:24
char * iconv
Definition: shpcommon.c:27
int cpg
Definition: shpcommon.c:25
Definition: shpcommon.c:23
char ** pgfieldnames
Definition: shpcommon.h:55
int size
Definition: shpcommon.h:61
char ** dbffieldnames
Definition: shpcommon.h:58