/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/swregex.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/swregex.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Error occurred while calculating annotation data.
Importing web-site building process.

1 /*
2 $Id: swregex.c,v 1.1 2002/03/17 03:54:19 whmoseley Exp $
3 **
4 **
5 ** This program and library is free software; you can redistribute it and/or
6 ** modify it under the terms of the GNU (Library) General Public License
7 ** as published by the Free Software Foundation; either version 2
8 ** of the License, or any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU (Library) General Public License for more details.
14 **
15 ** You should have received a copy of the GNU (Library) General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 **---------------------------------------------------------
19 **
20 **
21 ** March 16, 2002 - Bill Moseley: moved regex routines out of string.c
22 **
23 ** This is a collection of routines for building and testing regular expressions
24 ** for use with swish-e.
25 **
26 */
27
28 //#include <ctype.h>
29 #include "swish.h"
30 #include "mem.h"
31 //#include "index.h"
32 //#include "swish_qsort.h"
33 #include "string.h"
34 #include "error.h"
35 #include "swregex.h"
36
37 static char *regex_replace( char *str, regex_list *regex, int offset, int *matched );
38
39
40 /*********************************************************************
41 * Adds a list of patterns to a reg_list. Calls progerr on failure.
42 * Call With:
43 * name = Descriptive name for errors - e.g. the name of the directive currently being processed
44 * regex_list = pointer to the list of regular expressions
45 * params = null-terminated list of pointers to strings
46 * regex_pattern = flag to indicate that it's a delimited pattern (instead of just the pattern)
47 *
48 * Returns:
49 * void
50 *
51 * ToDO:
52 * Really should get passed in *SWISH so can set error string and return
53 *
54 * Notes:
55 * An expression can be proceeded by the word "not" to negate the matching of the pattern.
56 *
57 *
58 **********************************************************************/
59 void add_regex_patterns( char *name, regex_list **reg_list, char **params, int regex_pattern )
60 {
61 int negate;
62 char *word;
63 char *pos;
64 char *ptr;
65 int delimiter;
66 int cflags;
67 int global;
68
69
70 while ( *params )
71 {
72 negate = 0;
73 global = 0;
74 cflags = REG_EXTENDED;
75
76
77 if ( (strcasecmp( *params, "not" ) == 0) && *(params+1) )
78 {
79 negate = 1;
80 params++;
81 }
82
83 /* Simple case of a string pattern */
84 if ( !regex_pattern )
85 {
86 add_regular_expression( reg_list, *params, NULL, cflags, global, negate );
87 params++;
88 continue;
89 }
90
91 word = *params;
92 delimiter = (int)*word;
93
94 word++; /* past the first delimiter */
95
96 if ( !(pos = strchr( word, delimiter )))
97 progerr("%s regex: failed to find search pattern delimiter '%c' in pattern '%s'", name, (char)delimiter, *params );
98
99 *pos = '\0';
100
101
102 /* now check for flags */
103 for ( ptr = pos + 1; *ptr; ptr++ )
104 {
105 if ( *ptr == 'i' )
106 cflags |= REG_ICASE;
107 else if ( *ptr == 'm' )
108 cflags |= REG_NEWLINE;
109 else
110 progerr("%s regexp %s: unknown flag '%c'", name, *params, *ptr );
111 }
112
113 add_regular_expression( reg_list, word, NULL, cflags, global, negate );
114
115 *pos = delimiter; /* put it back */
116 params++;
117 }
118 }
119
120 /*********************************************************************
121 * Adds a single regex replacement pattern
122 *
123 * Call With:
124 * name = Descriptive name for errors - e.g. the name of the directive currently being processed
125 * regex_list = pointer to the list of regular expressions
126 * word = delimited regex pattern
127 *
128 * Returns:
129 * void
130 *
131 *
132 *
133 **********************************************************************/
134
135 void add_replace_expression( char *name, regex_list **reg_list, char *expression )
136
137 {
138 char *word = estrdup( expression );
139 char *save = word;
140 int delimiter = (int)*word;
141 char *pos;
142 char *pattern = NULL;
143 char *replace = NULL;
144 int cflags = REG_EXTENDED;
145 int global = 0;
146 char *ptr;
147
148
149 word++; /* past the first delimiter */
150
151 if ( !(pos = strchr( word, delimiter )))
152 progerr("%s regex: failed to find search pattern delimiter '%c' in pattern '%s'", name, (char)delimiter, word );
153
154 *pos = '\0';
155 pattern = estrdup(word);
156
157 word = pos + 1; /* now at replace pattern */
158
159 if ( !(pos = strchr( word, delimiter )))
160 progerr("%s regex: failed to find replace pattern delimiter '%c' in pattern '%s'", name, (char)delimiter, word );
161
162 *pos = '\0';
163 replace = estrdup(word);
164
165
166 /* now check for flags */
167 for ( ptr = pos + 1; *ptr; ptr++ )
168 {
169 if ( *ptr == 'i' )
170 cflags |= REG_ICASE;
171
172 else if ( *ptr == 'm' )
173 cflags |= REG_NEWLINE;
174
175 else if ( *ptr == 'g' )
176 global++;
177 else
178 progerr("%s regexp %s: unknown flag '%c'", name, expression, *ptr );
179 }
180
181 add_regular_expression( reg_list, pattern, replace, cflags, global, 0 );
182
183 efree( pattern );
184 efree( replace );
185 efree( save );
186 }
187
188
189
190 /*********************************************************************
191 * Match regular expressions
192 * Works on a list of expressions, and returns true if *ANY* match
193 *
194 *
195 **********************************************************************/
196 int match_regex_list( char *str, regex_list *regex )
197 {
198 regmatch_t pmatch[1];
199 int matched;
200
201 while ( regex )
202 {
203 matched = regex->negate
204 ? regexec(&regex->re, str, (size_t) 1, pmatch, 0) != 0
205 : regexec(&regex->re, str, (size_t) 1, pmatch, 0) == 0;
206
207 if ( DEBUG_MASK & DEBUG_REGEX )
208 printf("match %s %c~ m[%s] : %s\n", str, (int)(regex->negate ? '!' : '='), regex->pattern, matched ? "matched" : "nope" );
209
210 if ( matched )
211 return 1;
212
213 regex = regex->next;
214 }
215
216 return 0;
217 }
218
219
220 /*********************************************************************
221 * Process all the regular expressions in a regex_list
222 *
223 *
224 **********************************************************************/
225 char *process_regex_list( char *str, regex_list *regex, int *matched )
226 {
227 if ( DEBUG_MASK & DEBUG_REGEX && regex )
228 printf("\nOriginal String: '%s'\n", str );
229
230 while ( regex )
231 {
232 str = regex_replace( str, regex, 0, matched );
233 regex = regex->next;
234
235 if ( DEBUG_MASK & DEBUG_REGEX )
236 printf(" Result String: '%s'\n", str );
237
238 }
239
240 return str;
241 }
242
243 /*********************************************************************
244 * Regular Expression Substitution
245 *
246 * Rewritten 7/31/2001 - general purpose regexp
247 *
248 * Pass in a string and a regex_list pointer
249 *
250 * Returns:
251 * a string. Either the original, or a replacement string
252 * Frees passed in string if return is different.
253 *
254 * Notes:
255 * Clearly, there must be a library to do this already. For /g I'm
256 * recursively calling this.
257 *
258 *
259 **********************************************************************/
260 static char *regex_replace( char *str, regex_list *regex, int offset, int *matched )
261 {
262 regmatch_t pmatch[MAXPAR];
263 char *c;
264 char *newstr;
265 int escape = 0;
266 int pos = 0;
267 int j;
268 int last_offset = 0;
269
270 if ( DEBUG_MASK & DEBUG_REGEX )
271 printf("replace %s =~ m[%s][%s]: %s\n", str + offset, regex->pattern, regex->replace,
272 regexec(&regex->re, str + offset, (size_t) MAXPAR, pmatch, 0) ? "No Match" : "Matched" );
273
274 /* Run regex - return original string if no match (might be nice to print error msg? */
275 if ( regexec(&regex->re, str + offset, (size_t) MAXPAR, pmatch, 0) )
276 return str;
277
278
279 /* Flag that a pattern matched */
280 (*matched)++;
281
282
283 /* allocate a string long enough */
284 newstr = (char *) emalloc( offset + strlen( str ) + regex->replace_length + (regex->replace_count * strlen( str )) + 1 );
285
286 /* Copy everything before string */
287 for ( j=0; j < offset; j++ )
288 newstr[pos++] = str[j];
289
290
291 /* Copy everything before the match */
292 if ( pmatch[0].rm_so > 0 )
293 for ( j = offset; j < pmatch[0].rm_so + offset; j++ )
294 newstr[pos++] = str[j];
295
296
297 /* ugly section */
298 for ( c = regex->replace; *c; c++ )
299 {
300 if ( escape )
301 {
302 newstr[pos++] = *c;
303 last_offset = pos;
304 escape = 0;
305 continue;
306 }
307
308 if ( *c == '\\' && *(c+1) )
309 {
310 escape = 1;
311 continue;
312 }
313
314 if ( '$' == *c && *(c+1) )
315 {
316 char *start = NULL;
317 char *end = NULL;
318
319 c++;
320
321 /* chars before match */
322 if ( '`' == *c )
323 {
324 if ( pmatch[0].rm_so + offset > 0 )
325 {
326 start = str;
327 end = str + pmatch[0].rm_so + offset;
328 }
329 }
330
331 /* chars after match */
332 else if ( '\'' == *c )
333 {
334 start = str + pmatch[0].rm_eo + offset;
335 end = str + strlen( str );
336 }
337
338 else if ( *c >= '0' && *c <= '9' )
339 {
340 int i = (int)( *c ) - (int)'0';
341
342 if ( pmatch[i].rm_so != -1 )
343 {
344 start = str + pmatch[i].rm_so + offset;
345 end = str + pmatch[i].rm_eo + offset;
346 }
347 }
348
349 else /* just copy the pattern */
350 {
351 start = c - 1;
352 end = c + 1;
353 }
354
355 if ( start )
356 for ( ; start < end; start++ )
357 newstr[pos++] = *start;
358 }
359
360 /* not a replace pattern, just copy the char */
361 else
362 newstr[pos++] = *c;
363
364 last_offset = pos;
365 }
366
367 newstr[pos] = '\0';
368
369 /* Append any pattern after the string */
370 strcat( newstr, str+pmatch[0].rm_eo + offset );
371
372
373 efree( str );
374
375
376 /* This allow /g processing to match repeatedly */
377 /* I'm sure there a way to mess this up and end up with a regex loop... */
378
379 if ( regex->global && last_offset < strlen( newstr ) )
380 newstr = regex_replace( newstr, regex, last_offset, matched );
381
382 return newstr;
383 }
384
385 /*********************************************************
386 * Free a regular express list
387 *
388 *********************************************************/
389
390 void free_regex_list( regex_list **reg_list )
391 {
392 regex_list *list = *reg_list;
393 regex_list *next;
394 while ( list )
395 {
396 if ( list->replace )
397 efree( list->replace );
398
399 if ( list->pattern )
400 efree( list->pattern );
401
402 regfree(&list->re);
403
404 next = list->next;
405 efree( list );
406 list = next;
407 }
408 *reg_list = NULL;
409 }
410
411 /****************************************************************************
412 * Create or Add a regular expression to a list
413 * pre-compiles expression to check for errors and for speed
414 *
415 * Pattern and replace string passed in are duplicated
416 *
417 *
418 *****************************************************************************/
419
420 void add_regular_expression( regex_list **reg_list, char *pattern, char *replace, int cflags, int global, int negate )
421 {
422 regex_list *new_node = emalloc( sizeof( regex_list ) );
423 regex_list *last;
424 char *c;
425 int status;
426 int escape = 0;
427
428 if ( (status = regcomp( &new_node->re, pattern, cflags )))
429 progerr("Failed to complie regular expression '%s', pattern. Error: %d", pattern, status );
430
431
432
433 new_node->pattern = pattern ? estrdup(pattern) : estrdup(""); /* only used for -T debugging */
434 new_node->replace = replace ? estrdup(replace) : estrdup("");
435 new_node->negate = negate;
436
437 new_node->global = global; /* repeat flag */
438
439 new_node->replace_length = strlen( new_node->replace );
440
441 new_node->replace_count = 0;
442 for ( c = new_node->replace; *c; c++ )
443 {
444 if ( escape )
445 {
446 escape = 0;
447 continue;
448 }
449
450 if ( *c == '\\' )
451 {
452 escape = 1;
453 continue;
454 }
455
456 if ( *c == '$' && *(c+1) )
457 new_node->replace_count++;
458 }
459
460
461 new_node->next = NULL;
462
463
464 if ( *reg_list == NULL )
465 *reg_list = new_node;
466 else
467 {
468 /* get end of list */
469 for ( last = *reg_list; last->next; last = last->next );
470
471 last->next = new_node;
472 }
473
474 }
475

  ViewVC Help
Powered by ViewVC 1.1.22