1 |
adcroft |
1.1 |
/* |
2 |
|
|
$Id: string.c,v 1.46 2002/08/14 22:08:48 whmoseley Exp $ |
3 |
|
|
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company |
4 |
|
|
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94 |
5 |
|
|
** |
6 |
|
|
** This program and library is free software; you can redistribute it and/or |
7 |
|
|
** modify it under the terms of the GNU (Library) General Public License |
8 |
|
|
** as published by the Free Software Foundation; either version 2 |
9 |
|
|
** of the License, or any later version. |
10 |
|
|
** |
11 |
|
|
** This program is distributed in the hope that it will be useful, |
12 |
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
|
|
** GNU (Library) General Public License for more details. |
15 |
|
|
** |
16 |
|
|
** You should have received a copy of the GNU (Library) General Public License |
17 |
|
|
** along with this program; if not, write to the Free Software |
18 |
|
|
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
19 |
|
|
**--------------------------------------------------------- |
20 |
|
|
** ** ** PATCHED 5/13/96, CJC |
21 |
|
|
** Added MatchAndChange for regex in replace rule G.Hill 2/10/98 |
22 |
|
|
** |
23 |
|
|
** change sprintf to snprintf to avoid corruption |
24 |
|
|
** added safestrcpy() macro to avoid corruption from strcpy overflow |
25 |
|
|
** SRE 11/17/99 |
26 |
|
|
** |
27 |
|
|
** fixed cast to int problems pointed out by "gcc -Wall" |
28 |
|
|
** SRE 2/22/00 |
29 |
|
|
** |
30 |
|
|
** 2001-02-xx rasc makeItLow, strtolower optimized/new |
31 |
|
|
** iso handling, minor bugfixes |
32 |
|
|
** |
33 |
|
|
** 2001-02-xx jruiz, rasc: -- IMPORTANT NOTE -- |
34 |
|
|
** due to ISO charsset tolower,isspace, strcmp, etc. |
35 |
|
|
** have to be (unsigned char)!! |
36 |
|
|
** otherwise some chars may fail. |
37 |
|
|
** |
38 |
|
|
** 2001-03-08 rasc rewritten and enhanced suffix routines |
39 |
|
|
** 2001-04-10 rasc str_dirname, str_basename, changed char_decode_C_ESC |
40 |
|
|
** |
41 |
|
|
*/ |
42 |
|
|
|
43 |
|
|
#include <ctype.h> |
44 |
|
|
#include "swish.h" |
45 |
|
|
#include "mem.h" |
46 |
|
|
#include "index.h" |
47 |
|
|
#include "swish_qsort.h" |
48 |
|
|
#include "string.h" |
49 |
|
|
#include "error.h" |
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
/* Case-insensitive strstr(). */ |
54 |
|
|
/* Jose Ruiz 02/2001 Faster one */ |
55 |
|
|
char *lstrstr(char *s, char *t) |
56 |
|
|
{ |
57 |
|
|
int lens; |
58 |
|
|
int lent; |
59 |
|
|
int first = tolower((unsigned char) *t); |
60 |
|
|
|
61 |
|
|
lent = strlen(t); |
62 |
|
|
lens = strlen(s); |
63 |
|
|
for (; lens && lent <= lens; lens--, s++) |
64 |
|
|
{ |
65 |
|
|
if (tolower((int) ((unsigned char) *s)) == first) |
66 |
|
|
{ |
67 |
|
|
if (lent == 1) |
68 |
|
|
return s; |
69 |
|
|
if (strncasecmp(s + 1, t + 1, lent - 1) == 0) |
70 |
|
|
return s; |
71 |
|
|
} |
72 |
|
|
} |
73 |
|
|
return NULL; |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
/* Gets the next word in a line. If the word's in quotes, |
77 |
|
|
** include blank spaces in the word or phrase. |
78 |
|
|
-- 2001-02-11 rasc totally rewritten, respect escapes like \" |
79 |
|
|
-- 2001-11-09 moseley rewritten again - doesn't check for missing end quote |
80 |
|
|
-- Always returns a string, but may be empty. |
81 |
|
|
*/ |
82 |
|
|
|
83 |
|
|
static char *getword(char **in_buf) |
84 |
|
|
{ |
85 |
|
|
unsigned char quotechar; |
86 |
|
|
unsigned char uc; |
87 |
|
|
char *s = *in_buf; |
88 |
|
|
char *start = *in_buf; |
89 |
|
|
char buf[MAXWORDLEN + 1]; |
90 |
|
|
char *cur_char = buf; |
91 |
|
|
int backslash = 0; |
92 |
|
|
|
93 |
|
|
|
94 |
|
|
quotechar = '\0'; |
95 |
|
|
|
96 |
|
|
s = str_skip_ws(s); |
97 |
|
|
|
98 |
|
|
/* anything to read? */ |
99 |
|
|
if (!*s) |
100 |
|
|
{ |
101 |
|
|
*in_buf = s; |
102 |
|
|
return estrdup("\0"); |
103 |
|
|
} |
104 |
|
|
|
105 |
|
|
|
106 |
|
|
if (*s == '\"' || *s == '\'') |
107 |
|
|
quotechar = *s++; |
108 |
|
|
|
109 |
|
|
/* find end of "more words" or word */ |
110 |
|
|
|
111 |
|
|
while (*s) |
112 |
|
|
{ |
113 |
|
|
uc = (unsigned char) *s; |
114 |
|
|
|
115 |
|
|
if (uc == '\\' && !backslash && quotechar) // Mar 17, 2002 - only enable backslash inside of quotes |
116 |
|
|
{ |
117 |
|
|
s++; |
118 |
|
|
backslash++; |
119 |
|
|
continue; |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
/* Can't see why we would need to escape these, can you? - always fed a single line */ |
123 |
|
|
if (uc == '\n' || uc == '\r') |
124 |
|
|
{ |
125 |
|
|
s++; |
126 |
|
|
break; |
127 |
|
|
} |
128 |
|
|
|
129 |
|
|
|
130 |
|
|
if (!backslash) |
131 |
|
|
{ |
132 |
|
|
/* break on ending quote or unquoted space */ |
133 |
|
|
|
134 |
|
|
if (uc == quotechar || (!quotechar && isspace((int) uc))) |
135 |
|
|
{ |
136 |
|
|
s++; // past quote or space char. |
137 |
|
|
break; |
138 |
|
|
} |
139 |
|
|
|
140 |
|
|
} else |
141 |
|
|
backslash = 0; |
142 |
|
|
|
143 |
|
|
|
144 |
|
|
*cur_char++ = *s++; |
145 |
|
|
|
146 |
|
|
if (cur_char - buf > MAXWORDLEN) |
147 |
|
|
progerr("Parsed word '%s' exceeded max length of %d", start, MAXWORDLEN); |
148 |
|
|
} |
149 |
|
|
|
150 |
|
|
if (backslash) |
151 |
|
|
*cur_char++ = '\\'; |
152 |
|
|
|
153 |
|
|
|
154 |
|
|
*cur_char = '\0'; |
155 |
|
|
|
156 |
|
|
*in_buf = s; |
157 |
|
|
|
158 |
|
|
return estrdup(buf); |
159 |
|
|
|
160 |
|
|
} |
161 |
|
|
|
162 |
|
|
|
163 |
|
|
/* Gets the value of a variable in a line of the configuration file. |
164 |
|
|
** Basically, anything in quotes or an argument to a variable. |
165 |
|
|
*/ |
166 |
|
|
|
167 |
|
|
char *getconfvalue(line, var) |
168 |
|
|
char *line; |
169 |
|
|
char *var; |
170 |
|
|
{ |
171 |
|
|
int i; |
172 |
|
|
char *c; |
173 |
|
|
int lentmpvalue; |
174 |
|
|
char *tmpvalue, |
175 |
|
|
*p; |
176 |
|
|
|
177 |
|
|
if ((c = (char *) lstrstr(line, var)) != NULL) |
178 |
|
|
{ |
179 |
|
|
if (c != line) |
180 |
|
|
return NULL; |
181 |
|
|
c += strlen(var); |
182 |
|
|
while (isspace((int) ((unsigned char) *c)) || *c == '\"') |
183 |
|
|
c++; |
184 |
|
|
if (*c == '\0') |
185 |
|
|
return NULL; |
186 |
|
|
tmpvalue = (char *) emalloc((lentmpvalue = MAXSTRLEN) + 1); |
187 |
|
|
for (i = 0; *c != '\0' && *c != '\"' && *c != '\n' && *c != '\r'; c++) |
188 |
|
|
{ |
189 |
|
|
if (i == lentmpvalue) |
190 |
|
|
{ |
191 |
|
|
lentmpvalue *= 2; |
192 |
|
|
tmpvalue = (char *) erealloc(tmpvalue, lentmpvalue + 1); |
193 |
|
|
} |
194 |
|
|
tmpvalue[i++] = *c; |
195 |
|
|
} |
196 |
|
|
tmpvalue[i] = '\0'; |
197 |
|
|
/* Do not waste memory !! Resize word */ |
198 |
|
|
p = tmpvalue; |
199 |
|
|
tmpvalue = estrdup(p); |
200 |
|
|
efree(p); |
201 |
|
|
return tmpvalue; |
202 |
|
|
} else |
203 |
|
|
return NULL; |
204 |
|
|
} |
205 |
|
|
|
206 |
|
|
|
207 |
|
|
/* In a string, replaces all occurrences of "oldpiece" with "newpiece". |
208 |
|
|
** This is not really bulletproof yet. |
209 |
|
|
*/ |
210 |
|
|
/* 05/00 Jose Ruiz |
211 |
|
|
** Totally rewritten |
212 |
|
|
*/ |
213 |
|
|
char *replace(string, oldpiece, newpiece) |
214 |
|
|
char *string; |
215 |
|
|
char *oldpiece; |
216 |
|
|
char *newpiece; |
217 |
|
|
{ |
218 |
|
|
int limit, |
219 |
|
|
curpos, |
220 |
|
|
lennewpiece, |
221 |
|
|
lenoldpiece, |
222 |
|
|
curnewlen; |
223 |
|
|
char *c, |
224 |
|
|
*p, |
225 |
|
|
*q; |
226 |
|
|
int lennewstring; |
227 |
|
|
char *newstring; |
228 |
|
|
|
229 |
|
|
newstring = (char *) emalloc((lennewstring = strlen(string) * 2) + 1); |
230 |
|
|
lennewpiece = strlen(newpiece); |
231 |
|
|
lenoldpiece = strlen(oldpiece); |
232 |
|
|
c = string; |
233 |
|
|
q = newstring; |
234 |
|
|
curnewlen = 0; |
235 |
|
|
while ((p = (char *) strstr(c, oldpiece))) |
236 |
|
|
{ |
237 |
|
|
limit = p - c; |
238 |
|
|
curnewlen += (limit + lennewpiece); |
239 |
|
|
if (curnewlen > lennewstring) |
240 |
|
|
{ |
241 |
|
|
curpos = q - newstring; |
242 |
|
|
lennewstring = curnewlen + 200; |
243 |
|
|
newstring = (char *) erealloc(newstring, lennewstring + 1); |
244 |
|
|
q = newstring + curpos; |
245 |
|
|
} |
246 |
|
|
memcpy(q, c, limit); |
247 |
|
|
q += limit; |
248 |
|
|
memcpy(q, newpiece, lennewpiece); |
249 |
|
|
q += lennewpiece; |
250 |
|
|
c = p + lenoldpiece; |
251 |
|
|
} |
252 |
|
|
curnewlen += strlen(c); |
253 |
|
|
if (curnewlen > lennewstring) |
254 |
|
|
{ |
255 |
|
|
curpos = q - newstring; |
256 |
|
|
lennewstring = curnewlen + 200; |
257 |
|
|
newstring = (char *) erealloc(newstring, lennewstring + 1); |
258 |
|
|
q = newstring + curpos; |
259 |
|
|
} |
260 |
|
|
strcpy(q, c); |
261 |
|
|
efree(string); |
262 |
|
|
return newstring; |
263 |
|
|
} |
264 |
|
|
|
265 |
|
|
|
266 |
|
|
|
267 |
|
|
|
268 |
|
|
/*----------------------------------------------------*/ |
269 |
|
|
|
270 |
|
|
|
271 |
|
|
/* |
272 |
|
|
-- Check if a file with a particular suffix should be indexed |
273 |
|
|
-- according to the settings in the configuration file. |
274 |
|
|
-- 2001-03-08 rasc rewritten (optimize and match also |
275 |
|
|
-- e.g. ".htm.de" or ".html.gz") |
276 |
|
|
*/ |
277 |
|
|
|
278 |
|
|
int isoksuffix(char *filename, struct swline *rulelist) |
279 |
|
|
{ |
280 |
|
|
char *s, |
281 |
|
|
*fe; |
282 |
|
|
|
283 |
|
|
|
284 |
|
|
if (!rulelist) |
285 |
|
|
return 1; /* no suffixlist */ |
286 |
|
|
|
287 |
|
|
/* basically do a right to left compare */ |
288 |
|
|
fe = (filename + strlen(filename)); |
289 |
|
|
while (rulelist) |
290 |
|
|
{ |
291 |
|
|
s = fe - strlen(rulelist->line); |
292 |
|
|
if (s >= filename) |
293 |
|
|
{ /* no negative overflow! */ |
294 |
|
|
if (!strcasecmp(rulelist->line, s)) |
295 |
|
|
{ |
296 |
|
|
return 1; |
297 |
|
|
} |
298 |
|
|
} |
299 |
|
|
rulelist = rulelist->next; |
300 |
|
|
} |
301 |
|
|
|
302 |
|
|
return 0; |
303 |
|
|
} |
304 |
|
|
|
305 |
|
|
|
306 |
|
|
|
307 |
|
|
|
308 |
|
|
/* 05/00 Jose Ruiz |
309 |
|
|
** Function to copy strings |
310 |
|
|
** Reallocate memory if needed |
311 |
|
|
** Returns the string copied |
312 |
|
|
** [see als estrredup() and estrdup()] |
313 |
|
|
*/ |
314 |
|
|
char *SafeStrCopy(dest, orig, initialsize) |
315 |
|
|
char *dest; |
316 |
|
|
char *orig; |
317 |
|
|
int *initialsize; |
318 |
|
|
{ |
319 |
|
|
int len, |
320 |
|
|
oldlen; |
321 |
|
|
|
322 |
|
|
len = strlen(orig); |
323 |
|
|
oldlen = *initialsize; |
324 |
|
|
if (len > oldlen || !oldlen) |
325 |
|
|
{ |
326 |
|
|
*initialsize = len + 200; /* 200 extra chars!!! */ |
327 |
|
|
if (oldlen) |
328 |
|
|
efree(dest); |
329 |
|
|
dest = (char *) emalloc(*initialsize + 1); |
330 |
|
|
} |
331 |
|
|
memcpy(dest, orig, len); |
332 |
|
|
*(dest + len) = '\0'; |
333 |
|
|
return (dest); |
334 |
|
|
} |
335 |
|
|
|
336 |
|
|
/* Comparison routine to sort a string - See sortstring */ |
337 |
|
|
int ccomp(const void *s1, const void *s2) |
338 |
|
|
{ |
339 |
|
|
return (*(unsigned char *) s1 - *(unsigned char *) s2); |
340 |
|
|
} |
341 |
|
|
|
342 |
|
|
/* Sort a string removing dups */ |
343 |
|
|
void sortstring(char *s) |
344 |
|
|
{ |
345 |
|
|
int i, |
346 |
|
|
j, |
347 |
|
|
len; |
348 |
|
|
|
349 |
|
|
len = strlen(s); |
350 |
|
|
swish_qsort(s, len, 1, &ccomp); |
351 |
|
|
for (i = 1, j = 1; i < len; i++) |
352 |
|
|
if (s[i] != s[j - 1]) |
353 |
|
|
s[j++] = s[i]; |
354 |
|
|
s[j] = '\0'; |
355 |
|
|
|
356 |
|
|
} |
357 |
|
|
|
358 |
|
|
/* Merges two strings removing dups and ordering results */ |
359 |
|
|
char *mergestrings(char *s1, char *s2) |
360 |
|
|
{ |
361 |
|
|
int i, |
362 |
|
|
j, |
363 |
|
|
ilen1, |
364 |
|
|
ilen2, |
365 |
|
|
ilent; |
366 |
|
|
char *s, |
367 |
|
|
*p; |
368 |
|
|
|
369 |
|
|
ilen1 = strlen(s1); |
370 |
|
|
ilen2 = strlen(s2); |
371 |
|
|
ilent = ilen1 + ilen2; |
372 |
|
|
s = emalloc(ilent + 1); |
373 |
|
|
p = emalloc(ilent + 1); |
374 |
|
|
if (ilen1) |
375 |
|
|
memcpy(s, s1, ilen1); |
376 |
|
|
if (ilen2) |
377 |
|
|
memcpy(s + ilen1, s2, ilen2); |
378 |
|
|
if (ilent) |
379 |
|
|
swish_qsort(s, ilent, 1, &ccomp); |
380 |
|
|
for (i = 1, j = 1, p[0] = s[0]; i < ilent; i++) |
381 |
|
|
if (s[i] != p[j - 1]) |
382 |
|
|
p[j++] = s[i]; |
383 |
|
|
p[j] = '\0'; |
384 |
|
|
efree(s); |
385 |
|
|
return (p); |
386 |
|
|
} |
387 |
|
|
|
388 |
|
|
void makelookuptable(char *s, int *l) |
389 |
|
|
{ |
390 |
|
|
int i; |
391 |
|
|
|
392 |
|
|
for (i = 0; i < 256; i++) |
393 |
|
|
l[i] = 0; |
394 |
|
|
for (; *s; s++) |
395 |
|
|
l[(int) ((unsigned char) *s)] = 1; |
396 |
|
|
} |
397 |
|
|
|
398 |
|
|
void makeallstringlookuptables(SWISH * sw) |
399 |
|
|
{ |
400 |
|
|
makelookuptable("aeiouAEIOU", sw->isvowellookuptable); |
401 |
|
|
} |
402 |
|
|
|
403 |
|
|
/* 06/00 Jose Ruiz- Parses a line into a StringList |
404 |
|
|
** 02/2001 Jose Ruiz - Added extra NULL at the end |
405 |
|
|
*/ |
406 |
|
|
StringList *parse_line(char *line) |
407 |
|
|
{ |
408 |
|
|
StringList *sl; |
409 |
|
|
int cursize, |
410 |
|
|
maxsize; |
411 |
|
|
char *p; |
412 |
|
|
|
413 |
|
|
if (!line) |
414 |
|
|
return (NULL); |
415 |
|
|
|
416 |
|
|
if ((p = strchr(line, '\n'))) |
417 |
|
|
*p = '\0'; |
418 |
|
|
|
419 |
|
|
cursize = 0; |
420 |
|
|
sl = (StringList *) emalloc(sizeof(StringList)); |
421 |
|
|
|
422 |
|
|
sl->word = (char **) emalloc((maxsize = 2) * sizeof(char *)); |
423 |
|
|
|
424 |
|
|
p = line; |
425 |
|
|
|
426 |
|
|
while (&line && (p = getword(&line))) |
427 |
|
|
{ |
428 |
|
|
/* getword returns "" when, not null, so need to free it if we are not using it */ |
429 |
|
|
if ( !*p) { |
430 |
|
|
efree( p ); |
431 |
|
|
break; |
432 |
|
|
} |
433 |
|
|
|
434 |
|
|
if (cursize == maxsize) |
435 |
|
|
sl->word = (char **) erealloc(sl->word, (maxsize *= 2) * sizeof(char *)); |
436 |
|
|
|
437 |
|
|
sl->word[cursize++] = (char *) p; |
438 |
|
|
} |
439 |
|
|
sl->n = cursize; |
440 |
|
|
|
441 |
|
|
/* Add an extra NULL */ |
442 |
|
|
if (cursize == maxsize) |
443 |
|
|
sl->word = (char **) erealloc(sl->word, (maxsize += 1) * sizeof(char *)); |
444 |
|
|
|
445 |
|
|
sl->word[cursize] = NULL; |
446 |
|
|
|
447 |
|
|
return sl; |
448 |
|
|
} |
449 |
|
|
|
450 |
|
|
/* Frees memory used by a StringList |
451 |
|
|
*/ |
452 |
|
|
void freeStringList(StringList * sl) |
453 |
|
|
{ |
454 |
|
|
if (sl) |
455 |
|
|
{ |
456 |
|
|
while (sl->n) |
457 |
|
|
efree(sl->word[--sl->n]); |
458 |
|
|
efree(sl->word); |
459 |
|
|
efree(sl); |
460 |
|
|
} |
461 |
|
|
} |
462 |
|
|
|
463 |
|
|
/* 10/00 Jose Ruiz |
464 |
|
|
** Function to copy len bytes from orig to dest+off_dest |
465 |
|
|
** Reallocate memory if needed |
466 |
|
|
** Returns the pointer to the new area |
467 |
|
|
*/ |
468 |
|
|
unsigned char *SafeMemCopy(dest, orig, off_dest, sz_dest, len) |
469 |
|
|
unsigned char *dest; |
470 |
|
|
unsigned char *orig; |
471 |
|
|
int off_dest; |
472 |
|
|
int *sz_dest; |
473 |
|
|
int len; |
474 |
|
|
{ |
475 |
|
|
if (len > (*sz_dest - off_dest)) |
476 |
|
|
{ |
477 |
|
|
*sz_dest = len + off_dest; |
478 |
|
|
if (dest) |
479 |
|
|
dest = (unsigned char *) erealloc(dest, *sz_dest); |
480 |
|
|
else |
481 |
|
|
dest = (unsigned char *) emalloc(*sz_dest); |
482 |
|
|
} |
483 |
|
|
memcpy(dest + off_dest, orig, len); |
484 |
|
|
return (dest); |
485 |
|
|
} |
486 |
|
|
|
487 |
|
|
|
488 |
|
|
/* Routine to check if a string contains only numbers */ |
489 |
|
|
int isnumstring(unsigned char *s) |
490 |
|
|
{ |
491 |
|
|
if (!s || !*s) |
492 |
|
|
return 0; |
493 |
|
|
for (; *s; s++) |
494 |
|
|
if (!isdigit((int) (*s))) |
495 |
|
|
break; |
496 |
|
|
if (*s) |
497 |
|
|
return 0; |
498 |
|
|
return 1; |
499 |
|
|
} |
500 |
|
|
|
501 |
|
|
void remove_newlines(char *s) |
502 |
|
|
{ |
503 |
|
|
char *p; |
504 |
|
|
|
505 |
|
|
if (!s || !*s) |
506 |
|
|
return; |
507 |
|
|
for (p = s; p;) |
508 |
|
|
if ((p = strchr(p, '\n'))) |
509 |
|
|
*p++ = ' '; |
510 |
|
|
for (p = s; p;) |
511 |
|
|
if ((p = strchr(p, '\r'))) |
512 |
|
|
*p++ = ' '; |
513 |
|
|
} |
514 |
|
|
|
515 |
|
|
|
516 |
|
|
void remove_tags(char *s) |
517 |
|
|
{ |
518 |
|
|
int intag; |
519 |
|
|
char *p, |
520 |
|
|
*q; |
521 |
|
|
|
522 |
|
|
if (!s || !*s) |
523 |
|
|
return; |
524 |
|
|
for (p = q = s, intag = 0; *q; q++) |
525 |
|
|
{ |
526 |
|
|
switch (*q) |
527 |
|
|
{ |
528 |
|
|
case '<': |
529 |
|
|
intag = 1; |
530 |
|
|
/* jmruiz 02/2001 change <tag> by a space */ |
531 |
|
|
*p++ = ' '; |
532 |
|
|
break; |
533 |
|
|
case '>': |
534 |
|
|
intag = 0; |
535 |
|
|
break; |
536 |
|
|
default: |
537 |
|
|
if (!intag) |
538 |
|
|
{ |
539 |
|
|
*p++ = *q; |
540 |
|
|
} |
541 |
|
|
break; |
542 |
|
|
} |
543 |
|
|
} |
544 |
|
|
*p = '\0'; |
545 |
|
|
} |
546 |
|
|
|
547 |
|
|
/* #### Function to convert binary data of length len to a string */ |
548 |
|
|
unsigned char *bin2string(unsigned char *data, int len) |
549 |
|
|
{ |
550 |
|
|
unsigned char *s = NULL; |
551 |
|
|
|
552 |
|
|
if (data && len) |
553 |
|
|
{ |
554 |
|
|
s = emalloc(len + 1); |
555 |
|
|
memcpy(s, data, len); |
556 |
|
|
s[len] = '\0'; |
557 |
|
|
} |
558 |
|
|
return (s); |
559 |
|
|
} |
560 |
|
|
|
561 |
|
|
/* #### */ |
562 |
|
|
|
563 |
|
|
|
564 |
|
|
|
565 |
|
|
|
566 |
|
|
|
567 |
|
|
|
568 |
|
|
/* ------------------------------------------------------------ */ |
569 |
|
|
|
570 |
|
|
|
571 |
|
|
|
572 |
|
|
|
573 |
|
|
/* |
574 |
|
|
-- Skip white spaces... |
575 |
|
|
-- position to non space character |
576 |
|
|
-- return: ptr. to non space char or \0 |
577 |
|
|
-- 2001-01-30 rasc |
578 |
|
|
*/ |
579 |
|
|
|
580 |
|
|
char *str_skip_ws(char *s) |
581 |
|
|
{ |
582 |
|
|
while (*s && isspace((int) (unsigned char) *s)) |
583 |
|
|
s++; |
584 |
|
|
return s; |
585 |
|
|
} |
586 |
|
|
|
587 |
|
|
/************************************* |
588 |
|
|
* Trim trailing white space |
589 |
|
|
* Returns void |
590 |
|
|
**************************************/ |
591 |
|
|
|
592 |
|
|
void str_trim_ws(char *string) |
593 |
|
|
{ |
594 |
|
|
int i = strlen( string ); |
595 |
|
|
|
596 |
|
|
while ( i && isspace( (int)string[i-1]) ) |
597 |
|
|
string[--i] = '\0'; |
598 |
|
|
} |
599 |
|
|
|
600 |
|
|
|
601 |
|
|
|
602 |
|
|
|
603 |
|
|
|
604 |
|
|
|
605 |
|
|
|
606 |
|
|
/* |
607 |
|
|
-- character decode excape sequence |
608 |
|
|
-- input: ptr to \... escape sequence (C-escapes) |
609 |
|
|
-- return: character code |
610 |
|
|
se: string ptr to char after control sequence. |
611 |
|
|
(ignore, if NULL ptr) |
612 |
|
|
-- 2001-02-04 rasc |
613 |
|
|
-- 2001-04-10 rasc handle '\''\0' (safty!) |
614 |
|
|
*/ |
615 |
|
|
|
616 |
|
|
|
617 |
|
|
char charDecode_C_Escape(char *s, char **se) |
618 |
|
|
{ |
619 |
|
|
char c, |
620 |
|
|
*se2; |
621 |
|
|
|
622 |
|
|
if (*s != '\\') |
623 |
|
|
{ |
624 |
|
|
/* no escape */ |
625 |
|
|
c = *s; /* return char */ |
626 |
|
|
|
627 |
|
|
} else |
628 |
|
|
{ |
629 |
|
|
|
630 |
|
|
switch (*(++s)) |
631 |
|
|
{ /* can be optimized ... */ |
632 |
|
|
case 'a': |
633 |
|
|
c = '\a'; |
634 |
|
|
break; |
635 |
|
|
case 'b': |
636 |
|
|
c = '\b'; |
637 |
|
|
break; |
638 |
|
|
case 'f': |
639 |
|
|
c = '\f'; |
640 |
|
|
break; |
641 |
|
|
case 'n': |
642 |
|
|
c = '\n'; |
643 |
|
|
break; |
644 |
|
|
case 'r': |
645 |
|
|
c = '\r'; |
646 |
|
|
break; |
647 |
|
|
case 't': |
648 |
|
|
c = '\t'; |
649 |
|
|
break; |
650 |
|
|
case 'v': |
651 |
|
|
c = '\v'; |
652 |
|
|
break; |
653 |
|
|
|
654 |
|
|
case 'x': /* Hex \xff */ |
655 |
|
|
c = (char) strtoul(++s, &se2, 16); |
656 |
|
|
s = --se2; |
657 |
|
|
break; |
658 |
|
|
|
659 |
|
|
case '0': /* Oct \0, \012 */ |
660 |
|
|
c = (char) strtoul(s, &se2, 8); |
661 |
|
|
s = --se2; |
662 |
|
|
break; |
663 |
|
|
|
664 |
|
|
case '\0': /* outch!! null after \ */ |
665 |
|
|
s--; /* it's a "\" */ |
666 |
|
|
default: |
667 |
|
|
c = *s; /* print the escaped character */ |
668 |
|
|
break; |
669 |
|
|
} |
670 |
|
|
|
671 |
|
|
} |
672 |
|
|
|
673 |
|
|
if (se) |
674 |
|
|
*se = s + 1; |
675 |
|
|
return c; |
676 |
|
|
} |
677 |
|
|
|
678 |
|
|
|
679 |
|
|
|
680 |
|
|
|
681 |
|
|
|
682 |
|
|
/* |
683 |
|
|
-- strtolower (make this string to lowercase) |
684 |
|
|
-- The string itself will be converted |
685 |
|
|
-- Return: ptr to string |
686 |
|
|
-- 2001-02-09 rasc: former makeItLow() been a little optimized |
687 |
|
|
|
688 |
|
|
!!! most tolower() don't map umlauts, etc. |
689 |
|
|
!!! you have to use the right cast! (unsigned char) |
690 |
|
|
!!! or an ISO mapping... |
691 |
|
|
*/ |
692 |
|
|
|
693 |
|
|
char *strtolower(char *s) |
694 |
|
|
{ |
695 |
|
|
unsigned char *p = (unsigned char *) s; |
696 |
|
|
|
697 |
|
|
while (*p) |
698 |
|
|
{ |
699 |
|
|
*p = tolower((unsigned char) *p); |
700 |
|
|
p++; |
701 |
|
|
} |
702 |
|
|
return s; |
703 |
|
|
} |
704 |
|
|
|
705 |
|
|
|
706 |
|
|
|
707 |
|
|
|
708 |
|
|
|
709 |
|
|
|
710 |
|
|
/* ---------------------------------------------------------- */ |
711 |
|
|
/* ISO characters conversion/mapping handling */ |
712 |
|
|
|
713 |
|
|
|
714 |
|
|
|
715 |
|
|
/* |
716 |
|
|
-- character map to normalize chars for search and store |
717 |
|
|
-- characters are all mapped to lowercase |
718 |
|
|
-- umlauts and special characters are mapped to ascii7 chars |
719 |
|
|
-- control chars/ special chars are mapped to " " |
720 |
|
|
-- 2001-02-10 rasc |
721 |
|
|
*/ |
722 |
|
|
|
723 |
|
|
|
724 |
|
|
static const unsigned char iso8859_to_ascii7_lower_map[] = { |
725 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 0 */ |
726 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 8 */ |
727 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 16 */ |
728 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', |
729 |
|
|
' ', '!', '"', '#', '$', '%', '&', '\'', /* 32 */ |
730 |
|
|
'(', ')', '*', '+', ',', '-', '.', '/', |
731 |
|
|
'0', '1', '2', '3', '4', '5', '6', '7', /* 48 */ |
732 |
|
|
'8', '9', ':', ';', '<', '=', '>', '?', |
733 |
|
|
'@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', /* 64 */ |
734 |
|
|
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
735 |
|
|
'p', 'q', 'r', 's', 't', 'u', 'v', 'q', /* 80 */ |
736 |
|
|
'x', 'y', 'z', '[', '\\', ']', '^', '_', |
737 |
|
|
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', /* 96 */ |
738 |
|
|
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
739 |
|
|
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', |
740 |
|
|
'x', 'y', 'z', '{', '|', '}', '~', ' ', |
741 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 128 */ |
742 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', |
743 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 144 */ |
744 |
|
|
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', |
745 |
|
|
' ', '!', 'c', 'l', 'o', 'y', '|', '§', /* 160 */ |
746 |
|
|
'\"', 'c', ' ', '\"', ' ', '-', 'r', ' ', |
747 |
|
|
' ', ' ', '2', '3', '\'', 'u', ' ', '.', /* 176 */ |
748 |
|
|
' ', '1', ' ', '"', ' ', ' ', ' ', '?', |
749 |
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'e', 'c', /* 192 */ |
750 |
|
|
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', |
751 |
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', ' ', /* 208 */ |
752 |
|
|
'o', 'u', 'u', 'u', 'u', 'y', ' ', 's', |
753 |
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'e', 'c', /* 224 */ |
754 |
|
|
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', |
755 |
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', ' ', /* 240 */ |
756 |
|
|
'o', 'u', 'u', 'u', 'u', 'y', ' ', 'y' |
757 |
|
|
}; |
758 |
|
|
|
759 |
|
|
|
760 |
|
|
|
761 |
|
|
/* |
762 |
|
|
-- "normalize" ISO character for store and search |
763 |
|
|
-- operations. This means convert it to ascii7 lower case. |
764 |
|
|
-- Return: char |
765 |
|
|
-- 2001-02-11 rasc |
766 |
|
|
*/ |
767 |
|
|
|
768 |
|
|
unsigned char char_ISO_normalize(unsigned char c) |
769 |
|
|
{ |
770 |
|
|
return iso8859_to_ascii7_lower_map[c]; |
771 |
|
|
} |
772 |
|
|
|
773 |
|
|
|
774 |
|
|
|
775 |
|
|
|
776 |
|
|
/* |
777 |
|
|
-- "normalize" ISO character for store and search |
778 |
|
|
-- operations. This means convert it to ascii7 lower case. |
779 |
|
|
-- Return: char |
780 |
|
|
-- 2001-02-11 rasc |
781 |
|
|
*/ |
782 |
|
|
|
783 |
|
|
|
784 |
|
|
char *str_ISO_normalize(char *s) |
785 |
|
|
{ |
786 |
|
|
unsigned char *p; |
787 |
|
|
|
788 |
|
|
p = (unsigned char *) s; |
789 |
|
|
while (*p) |
790 |
|
|
{ |
791 |
|
|
*p = iso8859_to_ascii7_lower_map[*p]; |
792 |
|
|
p++; |
793 |
|
|
} |
794 |
|
|
return s; |
795 |
|
|
} |
796 |
|
|
|
797 |
|
|
|
798 |
|
|
/* 02/2001 Jmruiz - Builds a string from a Stringlist starting at the |
799 |
|
|
n element */ |
800 |
|
|
unsigned char *StringListToString(StringList * sl, int n) |
801 |
|
|
{ |
802 |
|
|
int i, |
803 |
|
|
j; |
804 |
|
|
unsigned char *s; |
805 |
|
|
int len_s, |
806 |
|
|
len_w; |
807 |
|
|
|
808 |
|
|
s = emalloc((len_s = 256) + 1); |
809 |
|
|
/* compute required string size */ |
810 |
|
|
for (i = n, j = 0; i < sl->n; i++) |
811 |
|
|
{ |
812 |
|
|
len_w = strlen(sl->word[i]); |
813 |
|
|
if (len_s < (j + len_w + 1)) |
814 |
|
|
s = erealloc(s, (len_s += len_w + 1) + 1); |
815 |
|
|
if (i != n) |
816 |
|
|
{ |
817 |
|
|
*(s + j) = ' '; |
818 |
|
|
j++; |
819 |
|
|
} |
820 |
|
|
memcpy(s + j, sl->word[i], len_w); |
821 |
|
|
j += len_w; |
822 |
|
|
} |
823 |
|
|
*(s + j) = '\0'; |
824 |
|
|
return s; |
825 |
|
|
} |
826 |
|
|
|
827 |
|
|
|
828 |
|
|
|
829 |
|
|
|
830 |
|
|
/* ---------------------------------------------------------- */ |
831 |
|
|
|
832 |
|
|
|
833 |
|
|
|
834 |
|
|
/* |
835 |
|
|
-- translate chars |
836 |
|
|
-- rewrite string itself via an character translation table |
837 |
|
|
-- translation table is a int[256] |
838 |
|
|
-- return: ptr to string itself |
839 |
|
|
*/ |
840 |
|
|
|
841 |
|
|
unsigned char *TranslateChars(int trlookup[], unsigned char *s) |
842 |
|
|
{ |
843 |
|
|
unsigned char *p; |
844 |
|
|
|
845 |
|
|
p = s; |
846 |
|
|
while (*p) |
847 |
|
|
{ |
848 |
|
|
*p = (unsigned char) trlookup[(int) *p]; |
849 |
|
|
p++; |
850 |
|
|
} |
851 |
|
|
return s; |
852 |
|
|
} |
853 |
|
|
|
854 |
|
|
|
855 |
|
|
|
856 |
|
|
/* |
857 |
|
|
-- Build a character translation table |
858 |
|
|
-- characters "from" will be converted in "to" |
859 |
|
|
-- result is stored in a lookuptable fixed size |
860 |
|
|
-- does also special translation rules like :ascii7: |
861 |
|
|
-- return: 0/1 param fail/ok |
862 |
|
|
*/ |
863 |
|
|
|
864 |
|
|
int BuildTranslateChars(int trlookup[], unsigned char *from, unsigned char *to) |
865 |
|
|
{ |
866 |
|
|
int i; |
867 |
|
|
|
868 |
|
|
/* default init = 1:1 translation */ |
869 |
|
|
for (i = 0; i < 256; i++) |
870 |
|
|
trlookup[i] = i; |
871 |
|
|
|
872 |
|
|
if (!from) |
873 |
|
|
return 0; /* No param! */ |
874 |
|
|
|
875 |
|
|
/* special cases, one param */ |
876 |
|
|
if (!strcmp( (char *)from, ":ascii7:")) |
877 |
|
|
{ |
878 |
|
|
for (i = 0; i < 256; i++) |
879 |
|
|
trlookup[i] = (int) char_ISO_normalize((unsigned char) i); |
880 |
|
|
return 1; |
881 |
|
|
} |
882 |
|
|
|
883 |
|
|
if (!to) |
884 |
|
|
return 0; /* missing second param */ |
885 |
|
|
|
886 |
|
|
/* alter table for "non 1:1" translation... */ |
887 |
|
|
while (*from && *to) |
888 |
|
|
trlookup[(int) *from++] = (int) *to++; |
889 |
|
|
if (*to || *from) |
890 |
|
|
return 0; /* length the same? no? -> err */ |
891 |
|
|
|
892 |
|
|
return 1; |
893 |
|
|
} |
894 |
|
|
|
895 |
|
|
|
896 |
|
|
|
897 |
|
|
|
898 |
|
|
/* ---------------------------------------------------------- */ |
899 |
|
|
|
900 |
|
|
|
901 |
|
|
/* |
902 |
|
|
-- cstr_basename |
903 |
|
|
-- return basename of a document path |
904 |
|
|
-- return: (char *) copy of filename |
905 |
|
|
*/ |
906 |
|
|
|
907 |
|
|
char *cstr_basename(char *path) |
908 |
|
|
{ |
909 |
|
|
return (char *) estrdup(str_basename(path)); |
910 |
|
|
} |
911 |
|
|
|
912 |
|
|
|
913 |
|
|
/* |
914 |
|
|
-- str_basename |
915 |
|
|
-- return basename of a document path |
916 |
|
|
-- return: (char *) ptr into(!) path string |
917 |
|
|
*/ |
918 |
|
|
|
919 |
|
|
char *str_basename(char *path) |
920 |
|
|
{ |
921 |
|
|
char *s; |
922 |
|
|
|
923 |
|
|
s = strrchr(path, '/'); |
924 |
|
|
return (s) ? s + 1 : path; |
925 |
|
|
} |
926 |
|
|
|
927 |
|
|
|
928 |
|
|
/* |
929 |
|
|
-- cstr_dirname (copy) |
930 |
|
|
-- return dirname of a document path |
931 |
|
|
-- return: (char *) ptr on copy(!) of path |
932 |
|
|
*/ |
933 |
|
|
|
934 |
|
|
char *cstr_dirname(char *path) |
935 |
|
|
{ |
936 |
|
|
char *s; |
937 |
|
|
char *dir; |
938 |
|
|
int len; |
939 |
|
|
|
940 |
|
|
s = strrchr(path, '/'); |
941 |
|
|
|
942 |
|
|
if (!s) |
943 |
|
|
{ |
944 |
|
|
dir = (char *) estrdup(" "); |
945 |
|
|
*dir = (*path == '/') ? '/' : '.'; |
946 |
|
|
} else |
947 |
|
|
{ |
948 |
|
|
len = s - path; |
949 |
|
|
dir = emalloc(len + 1); |
950 |
|
|
strncpy(dir, path, len); |
951 |
|
|
*(dir + len) = '\0'; |
952 |
|
|
} |
953 |
|
|
|
954 |
|
|
return dir; |
955 |
|
|
} |
956 |
|
|
|
957 |
|
|
|
958 |
|
|
|
959 |
|
|
/* estrdup - like strdup except we call our emalloc routine explicitly |
960 |
|
|
** as it does better memory management and tracking |
961 |
|
|
** Note: emalloc will report error and not return if no memory |
962 |
|
|
*/ |
963 |
|
|
|
964 |
|
|
char *estrdup(char *str) |
965 |
|
|
{ |
966 |
|
|
char *p; |
967 |
|
|
|
968 |
|
|
if (!str) |
969 |
|
|
return NULL; |
970 |
|
|
|
971 |
|
|
if ((p = emalloc(strlen(str) + 1))) |
972 |
|
|
return strcpy(p, str); |
973 |
|
|
|
974 |
|
|
return NULL; |
975 |
|
|
} |
976 |
|
|
|
977 |
|
|
|
978 |
|
|
char *estrndup(char *s, size_t n) |
979 |
|
|
{ |
980 |
|
|
size_t lens = strlen(s); |
981 |
|
|
size_t newlen; |
982 |
|
|
char *news; |
983 |
|
|
|
984 |
|
|
if (lens < n) |
985 |
|
|
newlen = lens; |
986 |
|
|
else |
987 |
|
|
newlen = n; |
988 |
|
|
|
989 |
|
|
if (newlen < n) |
990 |
|
|
news = emalloc(n + 1); |
991 |
|
|
else |
992 |
|
|
news = emalloc(newlen + 1); |
993 |
|
|
memcpy(news, s, newlen); |
994 |
|
|
news[newlen] = '\0'; |
995 |
|
|
return news; |
996 |
|
|
} |
997 |
|
|
|
998 |
|
|
|
999 |
|
|
/* |
1000 |
|
|
-- estrredup |
1001 |
|
|
-- do free on s1 and make copy of s2 |
1002 |
|
|
-- this is used, when s1 is replaced by s2 |
1003 |
|
|
-- 2001-02-15 rasc |
1004 |
|
|
|
1005 |
|
|
*/ |
1006 |
|
|
|
1007 |
|
|
char *estrredup(char *s1, char *s2) |
1008 |
|
|
{ |
1009 |
|
|
if (s1) |
1010 |
|
|
efree(s1); |
1011 |
|
|
return estrdup(s2); |
1012 |
|
|
} |