PostGIS  2.3.8dev-r@@SVN_REVISION@@
shpcommon.c
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * Copyright (C) 2014 Sandro Santilli <strk@kbt.io>
7  * Copyright (C) 2010 Mark Cave-Ayland <mark.cave-ayland@siriusit.co.uk>
8  *
9  * This is free software; you can redistribute and/or modify it under
10  * the terms of the GNU General Public Licence. See the COPYING file.
11  *
12  **********************************************************************/
13 
14 /* This file contains functions that are shared between the loader and dumper */
15 
16 #include <stdio.h>
17 #include <string.h>
18 #include <stdlib.h>
19 
20 #include "shpcommon.h"
21 
22 typedef struct
23 {
24  int ldid;
25  int cpg;
26  char *desc;
27  char *iconv;
28  char *pg;
30 
31 static int num_code_pages = 60;
32 
33 /* http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM */
34 /* http://resources.arcgis.com/fr/content/kbase?fa=articleShow&d=21106 */
35 
37  {0x01, 437, "U.S. MS-DOS", "CP437",""},
38  {0x02, 850, "International MS-DOS", "CP850",""},
39  {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
40  {0x08, 865, "Danish OEM", "CP865",""},
41  {0x09, 437, "Dutch OEM", "CP437",""},
42  {0x0A, 850, "Dutch OEM*", "CP850",""},
43  {0x0B, 437, "Finnish OEM", "CP437",""},
44  {0x0D, 437, "French OEM", "CP437",""},
45  {0x0E, 850, "French OEM*", "CP850",""},
46  {0x0F, 437, "German OEM", "CP437",""},
47  {0x10, 850, "German OEM*", "CP850",""},
48  {0x11, 437, "Italian OEM", "CP437",""},
49  {0x12, 850, "Italian OEM*", "CP850",""},
50  {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
51  {0x14, 850, "Spanish OEM*", "CP850",""},
52  {0x15, 437, "Swedish OEM", "CP437",""},
53  {0x16, 850, "Swedish OEM*", "CP850",""},
54  {0x17, 865, "Norwegian OEM", "CP865",""},
55  {0x18, 437, "Spanish OEM", "CP865",""},
56  {0x19, 437, "English OEM (Britain)", "CP437",""},
57  {0x1A, 850, "English OEM (Britain)*", "CP850",""},
58  {0x1B, 437, "English OEM (U.S.)", "CP437",""},
59  {0x1C, 863, "French OEM (Canada)", "CP863",""},
60  {0x1D, 850, "French OEM*", "CP850",""},
61  {0x1F, 852, "Czech OEM", "CP852",""},
62  {0x22, 852, "Hungarian OEM", "CP852",""},
63  {0x23, 852, "Polish OEM", "CP852",""},
64  {0x24, 860, "Portuguese OEM", "CP860",""},
65  {0x25, 850, "Portuguese OEM*", "CP850",""},
66  {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
67  {0x37, 850, "English OEM (U.S.)*", "CP850",""},
68  {0x40, 852, "Romanian OEM", "CP852",""},
69  {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
70  {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
71  {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
72  {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
73  {0x57, 1252, "ANSI", "WINDOWS-1252",""},
74  {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
75  {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
76  {0x64, 852, "Eastern European MS-DOS", "CP852",""},
77  {0x65, 866, "Russian MS-DOS", "CP866",""},
78  {0x66, 865, "Nordic MS-DOS", "CP865",""},
79  {0x67, 861, "Icelandic MS-DOS", "",""},
80  {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
81  {0x6B, 857, "Turkish MS-DOS", "CP857",""},
82  {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
83  {0x78, 950, "Taiwan Big 5", "CP950",""},
84  {0x79, 949, "Hangul (Wansung)", "CP949",""},
85  {0x7A, 936, "PRC GBK", "CP936","GBK"},
86  {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
87  {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
88  {0x86, 737, "Greek OEM", "CP737",""},
89  {0x87, 852, "Slovenian OEM", "CP852",""},
90  {0x88, 857, "Turkish OEM", "CP857",""},
91  {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
92  {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
93  {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
94  {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
95  {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
96  {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
97 };
98 
99 
100 
101 
102 
108 char *
110 {
111  /*
112  * Escape apostrophes and backslashes:
113  * ' -> \'
114  * \ -> \\
115  *
116  * 1. find # of characters
117  * 2. make new string
118  */
119 
120  char *result;
121  char *ptr, *optr;
122  int toescape = 0;
123  size_t size;
124 
125  ptr = str;
126 
127  /* Count how many characters we need to escape so we know the size of the string we need to return */
128  while (*ptr)
129  {
130  if (*ptr == '\'' || *ptr == '\\')
131  toescape++;
132 
133  ptr++;
134  }
135 
136  /* If we don't have to escape anything, simply return the input pointer */
137  if (toescape == 0)
138  return str;
139 
140  size = ptr - str + toescape + 1;
141  result = calloc(1, size);
142  optr = result;
143  ptr = str;
144 
145  while (*ptr)
146  {
147  if (*ptr == '\'' || *ptr == '\\')
148  *optr++ = '\\';
149 
150  *optr++ = *ptr++;
151  }
152 
153  *optr = '\0';
154 
155  return result;
156 }
157 
158 void
160 {
161  map->size = 0;
162  map->pgfieldnames = NULL;
163  map->dbffieldnames = NULL;
164 }
165 
166 void
168 {
169  int i;
170  if (map->size)
171  {
172  for (i = 0; i < map->size; i++)
173  {
174  if (map->pgfieldnames[i]) free(map->pgfieldnames[i]);
175  if (map->dbffieldnames[i]) free(map->dbffieldnames[i]);
176  }
177  free(map->pgfieldnames);
178  free(map->dbffieldnames);
179  }
180 }
181 
182 const char *
183 colmap_dbf_by_pg(colmap *map, const char *pgname)
184 {
185  int i;
186  for (i=0; i<map->size; ++i)
187  {
188  if (!strcasecmp(map->pgfieldnames[i], pgname))
189  {
190  return map->dbffieldnames[i];
191  }
192  }
193  return NULL;
194 }
195 
196 const char *
197 colmap_pg_by_dbf(colmap *map, const char *dbfname)
198 {
199  int i;
200  for (i=0; i<map->size; ++i)
201  {
202  if (!strcasecmp(map->dbffieldnames[i], dbfname))
203  {
204  return map->pgfieldnames[i];
205  }
206  }
207  return NULL;
208 }
209 
210 int
211 colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
212 {
213  FILE *fptr;
214  char linebuffer[1024];
215  char *tmpstr;
216  int curmapsize, fieldnamesize;
217 
218  /* Read column map file and load the colmap_dbffieldnames
219  * and colmap_pgfieldnames arrays */
220  fptr = fopen(filename, "r");
221  if (!fptr)
222  {
223  /* Return an error */
224  snprintf(errbuf, errbuflen, _("ERROR: Unable to open column map file %s"),
225  filename);
226  return 0;
227  }
228 
229  /* First count how many columns we have... */
230  while (fgets(linebuffer, 1024, fptr) != NULL) ++map->size;
231 
232  /* Now we know the final size, allocate the arrays and load the data */
233  fseek(fptr, 0, SEEK_SET);
234  map->pgfieldnames = (char **)malloc(sizeof(char *) * map->size);
235  map->dbffieldnames = (char **)malloc(sizeof(char *) * map->size);
236 
237  /* Read in a line at a time... */
238  curmapsize = 0;
239  while (fgets(linebuffer, 1024, fptr) != NULL)
240  {
241  /* Split into two separate strings: pgfieldname and dbffieldname */
242  /* First locate end of first column (pgfieldname) */
243  fieldnamesize = strcspn(linebuffer, "\t\n ");
244  tmpstr = linebuffer;
245 
246  /* Allocate memory and copy the string ensuring it is terminated */
247  map->pgfieldnames[curmapsize] = malloc(fieldnamesize + 1);
248  strncpy(map->pgfieldnames[curmapsize], tmpstr, fieldnamesize);
249  map->pgfieldnames[curmapsize][fieldnamesize] = '\0';
250 
251  /* Now swallow up any whitespace */
252  tmpstr = linebuffer + fieldnamesize;
253  tmpstr += strspn(tmpstr, "\t\n ");
254 
255  /* Finally locate end of second column (dbffieldname) */
256  fieldnamesize = strcspn(tmpstr, "\t\n ");
257 
258  /* Allocate memory and copy the string ensuring it is terminated */
259  map->dbffieldnames[curmapsize] = malloc(fieldnamesize + 1);
260  strncpy(map->dbffieldnames[curmapsize], tmpstr, fieldnamesize);
261  map->dbffieldnames[curmapsize][fieldnamesize] = '\0';
262 
263  /* Error out if the dbffieldname is > 10 chars */
264  if (strlen(map->dbffieldnames[curmapsize]) > 10)
265  {
266  snprintf(errbuf, errbuflen, _("ERROR: column map file specifies a DBF field name \"%s\" which is longer than 10 characters"), map->dbffieldnames[curmapsize]);
267  return 0;
268  }
269 
270  ++curmapsize;
271  }
272 
273  fclose(fptr);
274 
275  /* Done; return success */
276  return 1;
277 }
278 
279 /*
280 * Code page info will come out of dbfopen as either a bare codepage number
281 * (e.g. 1256) or as "LDID/1234" from the DBF hreader. We want to look up
282 * the equivalent iconv encoding string so we can use iconv to transcode
283 * the data into UTF8
284 */
285 char *
286 codepage2encoding(const char *cpg)
287 {
288  int cpglen;
289  int is_ldid = 0;
290  int num, i;
291 
292  /* Do nothing on nothing. */
293  if ( ! cpg ) return NULL;
294 
295  /* Is this an LDID string? */
296  /* If so, note it and move past the "LDID/" tag */
297  cpglen = strlen(cpg);
298  if ( strstr(cpg, "LDID/") )
299  {
300  if ( cpglen > 5 )
301  {
302  cpg += 5;
303  is_ldid = 1;
304  }
305  else
306  {
307  return NULL;
308  }
309  }
310 
311  /* Read the number */
312  num = atoi(cpg);
313 
314  /* Can we find this number in our lookup table? */
315  for ( i = is_ldid ; i < num_code_pages; i++ )
316  {
317  if ( is_ldid )
318  {
319  if ( code_pages[i].ldid == num )
320  return strdup(code_pages[i].iconv);
321  }
322  else
323  {
324  if ( code_pages[i].cpg == num )
325  return strdup(code_pages[i].iconv);
326  }
327  }
328 
329  /* Didn't find a matching entry */
330  return NULL;
331 
332 }
333 
334 /*
335 * In the case where data is coming out of the database in some wierd encoding
336 * we want to look up the appropriate code page entry to feed to DBFCreateEx
337 *
338 * Return null on error (cannot allocate memory)
339 */
340 char *
341 encoding2codepage(const char *encoding)
342 {
343  int i;
344  for ( i = 0; i < num_code_pages; i++ )
345  {
346  if ( strcasecmp(encoding, code_pages[i].pg) == 0 )
347  {
348  if ( code_pages[i].ldid == 0xFF )
349  {
350  return strdup("UTF-8");
351  }
352  else
353  {
354  char *codepage = NULL;
355  int ret = asprintf(&codepage, "LDID/%d", code_pages[i].ldid);
356  if ( ret == -1 ) return NULL; /* return null on error */
357  return codepage;
358  }
359  }
360  }
361 
362  /* OK, we give up, pretend it's UTF8 */
363  return strdup("UTF-8");
364 }
void colmap_clean(colmap *map)
Definition: shpcommon.c:167
int ldid
Definition: shpcommon.c:24
char * iconv
Definition: shpcommon.c:27
int size
Definition: shpcommon.h:61
int colmap_read(const char *filename, colmap *map, char *errbuf, size_t errbuflen)
Read the content of filename into a symbol map.
Definition: shpcommon.c:211
static code_page_entry code_pages[]
Definition: shpcommon.c:36
#define _(String)
Definition: shpcommon.h:24
char * codepage2encoding(const char *cpg)
Definition: shpcommon.c:286
int cpg
Definition: shpcommon.c:25
char * desc
Definition: shpcommon.c:26
void colmap_init(colmap *map)
Definition: shpcommon.c:159
char * pg
Definition: shpcommon.c:28
char ** pgfieldnames
Definition: shpcommon.h:55
Definition: shpcommon.c:22
char * escape_connection_string(char *str)
Escape strings that are to be used as part of a PostgreSQL connection string.
Definition: shpcommon.c:109
static int num_code_pages
Definition: shpcommon.c:31
char * encoding2codepage(const char *encoding)
Definition: shpcommon.c:341
char ** dbffieldnames
Definition: shpcommon.h:58
const char * colmap_pg_by_dbf(colmap *map, const char *dbfname)
Definition: shpcommon.c:197
void free(void *)
void * malloc(YYSIZE_T)
const char * colmap_dbf_by_pg(colmap *map, const char *pgname)
Definition: shpcommon.c:183