/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/swregex.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/swregex.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 adcroft 1.1 /*
2     $Id: swregex.c,v 1.1 2002/03/17 03:54:19 whmoseley Exp $
3     **
4     **
5     ** This program and library is free software; you can redistribute it and/or
6     ** modify it under the terms of the GNU (Library) General Public License
7     ** as published by the Free Software Foundation; either version 2
8     ** of the License, or any later version.
9     **
10     ** This program is distributed in the hope that it will be useful,
11     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     ** GNU (Library) General Public License for more details.
14     **
15     ** You should have received a copy of the GNU (Library) General Public License
16     ** along with this program; if not, write to the Free Software
17     ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18     **---------------------------------------------------------
19     **
20     **
21     ** March 16, 2002 - Bill Moseley: moved regex routines out of string.c
22     **
23     ** This is a collection of routines for building and testing regular expressions
24     ** for use with swish-e.
25     **
26     */
27    
28     //#include <ctype.h>
29     #include "swish.h"
30     #include "mem.h"
31     //#include "index.h"
32     //#include "swish_qsort.h"
33     #include "string.h"
34     #include "error.h"
35     #include "swregex.h"
36    
37     static char *regex_replace( char *str, regex_list *regex, int offset, int *matched );
38    
39    
40     /*********************************************************************
41     * Adds a list of patterns to a reg_list. Calls progerr on failure.
42     * Call With:
43     * name = Descriptive name for errors - e.g. the name of the directive currently being processed
44     * regex_list = pointer to the list of regular expressions
45     * params = null-terminated list of pointers to strings
46     * regex_pattern = flag to indicate that it's a delimited pattern (instead of just the pattern)
47     *
48     * Returns:
49     * void
50     *
51     * ToDO:
52     * Really should get passed in *SWISH so can set error string and return
53     *
54     * Notes:
55     * An expression can be proceeded by the word "not" to negate the matching of the pattern.
56     *
57     *
58     **********************************************************************/
59     void add_regex_patterns( char *name, regex_list **reg_list, char **params, int regex_pattern )
60     {
61     int negate;
62     char *word;
63     char *pos;
64     char *ptr;
65     int delimiter;
66     int cflags;
67     int global;
68    
69    
70     while ( *params )
71     {
72     negate = 0;
73     global = 0;
74     cflags = REG_EXTENDED;
75    
76    
77     if ( (strcasecmp( *params, "not" ) == 0) && *(params+1) )
78     {
79     negate = 1;
80     params++;
81     }
82    
83     /* Simple case of a string pattern */
84     if ( !regex_pattern )
85     {
86     add_regular_expression( reg_list, *params, NULL, cflags, global, negate );
87     params++;
88     continue;
89     }
90    
91     word = *params;
92     delimiter = (int)*word;
93    
94     word++; /* past the first delimiter */
95    
96     if ( !(pos = strchr( word, delimiter )))
97     progerr("%s regex: failed to find search pattern delimiter '%c' in pattern '%s'", name, (char)delimiter, *params );
98    
99     *pos = '\0';
100    
101    
102     /* now check for flags */
103     for ( ptr = pos + 1; *ptr; ptr++ )
104     {
105     if ( *ptr == 'i' )
106     cflags |= REG_ICASE;
107     else if ( *ptr == 'm' )
108     cflags |= REG_NEWLINE;
109     else
110     progerr("%s regexp %s: unknown flag '%c'", name, *params, *ptr );
111     }
112    
113     add_regular_expression( reg_list, word, NULL, cflags, global, negate );
114    
115     *pos = delimiter; /* put it back */
116     params++;
117     }
118     }
119    
120     /*********************************************************************
121     * Adds a single regex replacement pattern
122     *
123     * Call With:
124     * name = Descriptive name for errors - e.g. the name of the directive currently being processed
125     * regex_list = pointer to the list of regular expressions
126     * word = delimited regex pattern
127     *
128     * Returns:
129     * void
130     *
131     *
132     *
133     **********************************************************************/
134    
135     void add_replace_expression( char *name, regex_list **reg_list, char *expression )
136    
137     {
138     char *word = estrdup( expression );
139     char *save = word;
140     int delimiter = (int)*word;
141     char *pos;
142     char *pattern = NULL;
143     char *replace = NULL;
144     int cflags = REG_EXTENDED;
145     int global = 0;
146     char *ptr;
147    
148    
149     word++; /* past the first delimiter */
150    
151     if ( !(pos = strchr( word, delimiter )))
152     progerr("%s regex: failed to find search pattern delimiter '%c' in pattern '%s'", name, (char)delimiter, word );
153    
154     *pos = '\0';
155     pattern = estrdup(word);
156    
157     word = pos + 1; /* now at replace pattern */
158    
159     if ( !(pos = strchr( word, delimiter )))
160     progerr("%s regex: failed to find replace pattern delimiter '%c' in pattern '%s'", name, (char)delimiter, word );
161    
162     *pos = '\0';
163     replace = estrdup(word);
164    
165    
166     /* now check for flags */
167     for ( ptr = pos + 1; *ptr; ptr++ )
168     {
169     if ( *ptr == 'i' )
170     cflags |= REG_ICASE;
171    
172     else if ( *ptr == 'm' )
173     cflags |= REG_NEWLINE;
174    
175     else if ( *ptr == 'g' )
176     global++;
177     else
178     progerr("%s regexp %s: unknown flag '%c'", name, expression, *ptr );
179     }
180    
181     add_regular_expression( reg_list, pattern, replace, cflags, global, 0 );
182    
183     efree( pattern );
184     efree( replace );
185     efree( save );
186     }
187    
188    
189    
190     /*********************************************************************
191     * Match regular expressions
192     * Works on a list of expressions, and returns true if *ANY* match
193     *
194     *
195     **********************************************************************/
196     int match_regex_list( char *str, regex_list *regex )
197     {
198     regmatch_t pmatch[1];
199     int matched;
200    
201     while ( regex )
202     {
203     matched = regex->negate
204     ? regexec(&regex->re, str, (size_t) 1, pmatch, 0) != 0
205     : regexec(&regex->re, str, (size_t) 1, pmatch, 0) == 0;
206    
207     if ( DEBUG_MASK & DEBUG_REGEX )
208     printf("match %s %c~ m[%s] : %s\n", str, (int)(regex->negate ? '!' : '='), regex->pattern, matched ? "matched" : "nope" );
209    
210     if ( matched )
211     return 1;
212    
213     regex = regex->next;
214     }
215    
216     return 0;
217     }
218    
219    
220     /*********************************************************************
221     * Process all the regular expressions in a regex_list
222     *
223     *
224     **********************************************************************/
225     char *process_regex_list( char *str, regex_list *regex, int *matched )
226     {
227     if ( DEBUG_MASK & DEBUG_REGEX && regex )
228     printf("\nOriginal String: '%s'\n", str );
229    
230     while ( regex )
231     {
232     str = regex_replace( str, regex, 0, matched );
233     regex = regex->next;
234    
235     if ( DEBUG_MASK & DEBUG_REGEX )
236     printf(" Result String: '%s'\n", str );
237    
238     }
239    
240     return str;
241     }
242    
243     /*********************************************************************
244     * Regular Expression Substitution
245     *
246     * Rewritten 7/31/2001 - general purpose regexp
247     *
248     * Pass in a string and a regex_list pointer
249     *
250     * Returns:
251     * a string. Either the original, or a replacement string
252     * Frees passed in string if return is different.
253     *
254     * Notes:
255     * Clearly, there must be a library to do this already. For /g I'm
256     * recursively calling this.
257     *
258     *
259     **********************************************************************/
260     static char *regex_replace( char *str, regex_list *regex, int offset, int *matched )
261     {
262     regmatch_t pmatch[MAXPAR];
263     char *c;
264     char *newstr;
265     int escape = 0;
266     int pos = 0;
267     int j;
268     int last_offset = 0;
269    
270     if ( DEBUG_MASK & DEBUG_REGEX )
271     printf("replace %s =~ m[%s][%s]: %s\n", str + offset, regex->pattern, regex->replace,
272     regexec(&regex->re, str + offset, (size_t) MAXPAR, pmatch, 0) ? "No Match" : "Matched" );
273    
274     /* Run regex - return original string if no match (might be nice to print error msg? */
275     if ( regexec(&regex->re, str + offset, (size_t) MAXPAR, pmatch, 0) )
276     return str;
277    
278    
279     /* Flag that a pattern matched */
280     (*matched)++;
281    
282    
283     /* allocate a string long enough */
284     newstr = (char *) emalloc( offset + strlen( str ) + regex->replace_length + (regex->replace_count * strlen( str )) + 1 );
285    
286     /* Copy everything before string */
287     for ( j=0; j < offset; j++ )
288     newstr[pos++] = str[j];
289    
290    
291     /* Copy everything before the match */
292     if ( pmatch[0].rm_so > 0 )
293     for ( j = offset; j < pmatch[0].rm_so + offset; j++ )
294     newstr[pos++] = str[j];
295    
296    
297     /* ugly section */
298     for ( c = regex->replace; *c; c++ )
299     {
300     if ( escape )
301     {
302     newstr[pos++] = *c;
303     last_offset = pos;
304     escape = 0;
305     continue;
306     }
307    
308     if ( *c == '\\' && *(c+1) )
309     {
310     escape = 1;
311     continue;
312     }
313    
314     if ( '$' == *c && *(c+1) )
315     {
316     char *start = NULL;
317     char *end = NULL;
318    
319     c++;
320    
321     /* chars before match */
322     if ( '`' == *c )
323     {
324     if ( pmatch[0].rm_so + offset > 0 )
325     {
326     start = str;
327     end = str + pmatch[0].rm_so + offset;
328     }
329     }
330    
331     /* chars after match */
332     else if ( '\'' == *c )
333     {
334     start = str + pmatch[0].rm_eo + offset;
335     end = str + strlen( str );
336     }
337    
338     else if ( *c >= '0' && *c <= '9' )
339     {
340     int i = (int)( *c ) - (int)'0';
341    
342     if ( pmatch[i].rm_so != -1 )
343     {
344     start = str + pmatch[i].rm_so + offset;
345     end = str + pmatch[i].rm_eo + offset;
346     }
347     }
348    
349     else /* just copy the pattern */
350     {
351     start = c - 1;
352     end = c + 1;
353     }
354    
355     if ( start )
356     for ( ; start < end; start++ )
357     newstr[pos++] = *start;
358     }
359    
360     /* not a replace pattern, just copy the char */
361     else
362     newstr[pos++] = *c;
363    
364     last_offset = pos;
365     }
366    
367     newstr[pos] = '\0';
368    
369     /* Append any pattern after the string */
370     strcat( newstr, str+pmatch[0].rm_eo + offset );
371    
372    
373     efree( str );
374    
375    
376     /* This allow /g processing to match repeatedly */
377     /* I'm sure there a way to mess this up and end up with a regex loop... */
378    
379     if ( regex->global && last_offset < strlen( newstr ) )
380     newstr = regex_replace( newstr, regex, last_offset, matched );
381    
382     return newstr;
383     }
384    
385     /*********************************************************
386     * Free a regular express list
387     *
388     *********************************************************/
389    
390     void free_regex_list( regex_list **reg_list )
391     {
392     regex_list *list = *reg_list;
393     regex_list *next;
394     while ( list )
395     {
396     if ( list->replace )
397     efree( list->replace );
398    
399     if ( list->pattern )
400     efree( list->pattern );
401    
402     regfree(&list->re);
403    
404     next = list->next;
405     efree( list );
406     list = next;
407     }
408     *reg_list = NULL;
409     }
410    
411     /****************************************************************************
412     * Create or Add a regular expression to a list
413     * pre-compiles expression to check for errors and for speed
414     *
415     * Pattern and replace string passed in are duplicated
416     *
417     *
418     *****************************************************************************/
419    
420     void add_regular_expression( regex_list **reg_list, char *pattern, char *replace, int cflags, int global, int negate )
421     {
422     regex_list *new_node = emalloc( sizeof( regex_list ) );
423     regex_list *last;
424     char *c;
425     int status;
426     int escape = 0;
427    
428     if ( (status = regcomp( &new_node->re, pattern, cflags )))
429     progerr("Failed to complie regular expression '%s', pattern. Error: %d", pattern, status );
430    
431    
432    
433     new_node->pattern = pattern ? estrdup(pattern) : estrdup(""); /* only used for -T debugging */
434     new_node->replace = replace ? estrdup(replace) : estrdup("");
435     new_node->negate = negate;
436    
437     new_node->global = global; /* repeat flag */
438    
439     new_node->replace_length = strlen( new_node->replace );
440    
441     new_node->replace_count = 0;
442     for ( c = new_node->replace; *c; c++ )
443     {
444     if ( escape )
445     {
446     escape = 0;
447     continue;
448     }
449    
450     if ( *c == '\\' )
451     {
452     escape = 1;
453     continue;
454     }
455    
456     if ( *c == '$' && *(c+1) )
457     new_node->replace_count++;
458     }
459    
460    
461     new_node->next = NULL;
462    
463    
464     if ( *reg_list == NULL )
465     *reg_list = new_node;
466     else
467     {
468     /* get end of list */
469     for ( last = *reg_list; last->next; last = last->next );
470    
471     last->next = new_node;
472     }
473    
474     }
475    

  ViewVC Help
Powered by ViewVC 1.1.22