/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/vms/regex.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/vms/regex.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (hide annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 adcroft 1.1 /* Extended regular expression matching and search library,
2     version 0.12.
3     (Implements POSIX draft P10003.2/D11.2, except for
4     internationalization features.)
5    
6     Copyright (C) 1993 Free Software Foundation, Inc.
7    
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2, or (at your option)
11     any later version.
12    
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     GNU General Public License for more details.
17    
18     You should have received a copy of the GNU General Public License
19     along with this program; if not, write to the Free Software
20     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
21    
22     /* AIX requires this to be the first thing in the file. */
23     #if defined (_AIX) && !defined (REGEX_MALLOC)
24     #pragma alloca
25     #endif
26    
27     #define _GNU_SOURCE
28    
29     #ifdef _WIN32
30     #define HAVE_STRING_H 1 /* Win32 */
31     #define REGEX_MALLOC 1 /* Win32 */
32     #endif
33    
34    
35     /* We need this for `regex.h', and perhaps for the Emacs include files. */
36     #include <sys/types.h>
37    
38     #ifdef HAVE_CONFIG_H
39     #include "config.h"
40     #endif
41    
42     /* The `emacs' switch turns on certain matching commands
43     that make sense only in Emacs. */
44     #ifdef emacs
45    
46     #include "lisp.h"
47     #include "buffer.h"
48     #include "syntax.h"
49    
50     /* Emacs uses `NULL' as a predicate. */
51     #undef NULL
52    
53     #else /* not emacs */
54    
55     /* We used to test for `BSTRING' here, but only GCC and Emacs define
56     `BSTRING', as far as I know, and neither of them use this code. */
57     #if HAVE_STRING_H || STDC_HEADERS
58     #include <string.h>
59     #ifndef bcmp
60     #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
61     #endif
62     #ifndef bcopy
63     #define bcopy(s, d, n) memcpy ((d), (s), (n))
64     #endif
65     #ifndef bzero
66     #define bzero(s, n) memset ((s), 0, (n))
67     #endif
68     #else
69     #include <strings.h>
70     #endif
71    
72     #ifdef STDC_HEADERS
73     #include <stdlib.h>
74     #else
75     char *malloc ();
76     char *realloc ();
77     #endif
78    
79    
80     /* Define the syntax stuff for \<, \>, etc. */
81    
82     /* This must be nonzero for the wordchar and notwordchar pattern
83     commands in re_match_2. */
84     #ifndef Sword
85     #define Sword 1
86     #endif
87    
88     #ifdef SYNTAX_TABLE
89    
90     extern char *re_syntax_table;
91    
92     #else /* not SYNTAX_TABLE */
93    
94     /* How many characters in the character set. */
95     #define CHAR_SET_SIZE 256
96    
97     static char re_syntax_table[CHAR_SET_SIZE];
98    
99     static void
100     init_syntax_once ()
101     {
102     register int c;
103     static int done = 0;
104    
105     if (done)
106     return;
107    
108     bzero (re_syntax_table, sizeof re_syntax_table);
109    
110     for (c = 'a'; c <= 'z'; c++)
111     re_syntax_table[c] = Sword;
112    
113     for (c = 'A'; c <= 'Z'; c++)
114     re_syntax_table[c] = Sword;
115    
116     for (c = '0'; c <= '9'; c++)
117     re_syntax_table[c] = Sword;
118    
119     re_syntax_table['_'] = Sword;
120    
121     done = 1;
122     }
123    
124     #endif /* not SYNTAX_TABLE */
125    
126     #define SYNTAX(c) re_syntax_table[c]
127    
128     #endif /* not emacs */
129    
130     /* Get the interface, including the syntax bits. */
131     #include "regex.h"
132    
133     /* isalpha etc. are used for the character classes. */
134     #include <ctype.h>
135    
136     #ifndef isascii
137     #define isascii(c) 1
138     #endif
139    
140     #ifdef isblank
141     #define ISBLANK(c) (isascii (c) && isblank (c))
142     #else
143     #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
144     #endif
145     #ifdef isgraph
146     #define ISGRAPH(c) (isascii (c) && isgraph (c))
147     #else
148     #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
149     #endif
150    
151     #define ISPRINT(c) (isascii (c) && isprint (c))
152     #define ISDIGIT(c) (isascii (c) && isdigit (c))
153     #define ISALNUM(c) (isascii (c) && isalnum (c))
154     #define ISALPHA(c) (isascii (c) && isalpha (c))
155     #define ISCNTRL(c) (isascii (c) && iscntrl (c))
156     #define ISLOWER(c) (isascii (c) && islower (c))
157     #define ISPUNCT(c) (isascii (c) && ispunct (c))
158     #define ISSPACE(c) (isascii (c) && isspace (c))
159     #define ISUPPER(c) (isascii (c) && isupper (c))
160     #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
161    
162     #ifndef NULL
163     #define NULL 0
164     #endif
165    
166     /* We remove any previous definition of `SIGN_EXTEND_CHAR',
167     since ours (we hope) works properly with all combinations of
168     machines, compilers, `char' and `unsigned char' argument types.
169     (Per Bothner suggested the basic approach.) */
170     #undef SIGN_EXTEND_CHAR
171     #if __STDC__
172     #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
173     #else /* not __STDC__ */
174     /* As in Harbison and Steele. */
175     #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
176     #endif
177    
178     /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
179     use `alloca' instead of `malloc'. This is because using malloc in
180     re_search* or re_match* could cause memory leaks when C-g is used in
181     Emacs; also, malloc is slower and causes storage fragmentation. On
182     the other hand, malloc is more portable, and easier to debug.
183    
184     Because we sometimes use alloca, some routines have to be macros,
185     not functions -- `alloca'-allocated space disappears at the end of the
186     function it is called in. */
187    
188     #ifdef REGEX_MALLOC
189    
190     #define REGEX_ALLOCATE malloc
191     #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
192    
193     #else /* not REGEX_MALLOC */
194    
195     /* Emacs already defines alloca, sometimes. */
196     #ifndef alloca
197    
198     /* Make alloca work the best possible way. */
199     #ifdef __GNUC__
200     #define alloca __builtin_alloca
201     #else /* not __GNUC__ */
202     #if HAVE_ALLOCA_H
203     #include <alloca.h>
204     #else /* not __GNUC__ or HAVE_ALLOCA_H */
205     #ifndef _AIX /* Already did AIX, up at the top. */
206     char *alloca ();
207     #endif /* not _AIX */
208     #endif /* not HAVE_ALLOCA_H */
209     #endif /* not __GNUC__ */
210    
211     #endif /* not alloca */
212    
213     #define REGEX_ALLOCATE alloca
214    
215     /* Assumes a `char *destination' variable. */
216     #define REGEX_REALLOCATE(source, osize, nsize) \
217     (destination = (char *) alloca (nsize), \
218     bcopy (source, destination, osize), \
219     destination)
220    
221     #endif /* not REGEX_MALLOC */
222    
223    
224     /* True if `size1' is non-NULL and PTR is pointing anywhere inside
225     `string1' or just past its end. This works if PTR is NULL, which is
226     a good thing. */
227     #define FIRST_STRING_P(ptr) \
228     (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
229    
230     /* (Re)Allocate N items of type T using malloc, or fail. */
231     #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
232     #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
233     #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
234    
235     #define BYTEWIDTH 8 /* In bits. */
236    
237     #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
238    
239     #define MAX(a, b) ((a) > (b) ? (a) : (b))
240     #define MIN(a, b) ((a) < (b) ? (a) : (b))
241    
242     typedef char boolean;
243     #define false 0
244     #define true 1
245    
246     /* These are the command codes that appear in compiled regular
247     expressions. Some opcodes are followed by argument bytes. A
248     command code can specify any interpretation whatsoever for its
249     arguments. Zero bytes may appear in the compiled regular expression.
250    
251     The value of `exactn' is needed in search.c (search_buffer) in Emacs.
252     So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
253     `exactn' we use here must also be 1. */
254    
255     typedef enum
256     {
257     no_op = 0,
258    
259     /* Followed by one byte giving n, then by n literal bytes. */
260     exactn = 1,
261    
262     /* Matches any (more or less) character. */
263     anychar,
264    
265     /* Matches any one char belonging to specified set. First
266     following byte is number of bitmap bytes. Then come bytes
267     for a bitmap saying which chars are in. Bits in each byte
268     are ordered low-bit-first. A character is in the set if its
269     bit is 1. A character too large to have a bit in the map is
270     automatically not in the set. */
271     charset,
272    
273     /* Same parameters as charset, but match any character that is
274     not one of those specified. */
275     charset_not,
276    
277     /* Start remembering the text that is matched, for storing in a
278     register. Followed by one byte with the register number, in
279     the range 0 to one less than the pattern buffer's re_nsub
280     field. Then followed by one byte with the number of groups
281     inner to this one. (This last has to be part of the
282     start_memory only because we need it in the on_failure_jump
283     of re_match_2.) */
284     start_memory,
285    
286     /* Stop remembering the text that is matched and store it in a
287     memory register. Followed by one byte with the register
288     number, in the range 0 to one less than `re_nsub' in the
289     pattern buffer, and one byte with the number of inner groups,
290     just like `start_memory'. (We need the number of inner
291     groups here because we don't have any easy way of finding the
292     corresponding start_memory when we're at a stop_memory.) */
293     stop_memory,
294    
295     /* Match a duplicate of something remembered. Followed by one
296     byte containing the register number. */
297     duplicate,
298    
299     /* Fail unless at beginning of line. */
300     begline,
301    
302     /* Fail unless at end of line. */
303     endline,
304    
305     /* Succeeds if at beginning of buffer (if emacs) or at beginning
306     of string to be matched (if not). */
307     begbuf,
308    
309     /* Analogously, for end of buffer/string. */
310     endbuf,
311    
312     /* Followed by two byte relative address to which to jump. */
313     jump,
314    
315     /* Same as jump, but marks the end of an alternative. */
316     jump_past_alt,
317    
318     /* Followed by two-byte relative address of place to resume at
319     in case of failure. */
320     on_failure_jump,
321    
322     /* Like on_failure_jump, but pushes a placeholder instead of the
323     current string position when executed. */
324     on_failure_keep_string_jump,
325    
326     /* Throw away latest failure point and then jump to following
327     two-byte relative address. */
328     pop_failure_jump,
329    
330     /* Change to pop_failure_jump if know won't have to backtrack to
331     match; otherwise change to jump. This is used to jump
332     back to the beginning of a repeat. If what follows this jump
333     clearly won't match what the repeat does, such that we can be
334     sure that there is no use backtracking out of repetitions
335     already matched, then we change it to a pop_failure_jump.
336     Followed by two-byte address. */
337     maybe_pop_jump,
338    
339     /* Jump to following two-byte address, and push a dummy failure
340     point. This failure point will be thrown away if an attempt
341     is made to use it for a failure. A `+' construct makes this
342     before the first repeat. Also used as an intermediary kind
343     of jump when compiling an alternative. */
344     dummy_failure_jump,
345    
346     /* Push a dummy failure point and continue. Used at the end of
347     alternatives. */
348     push_dummy_failure,
349    
350     /* Followed by two-byte relative address and two-byte number n.
351     After matching N times, jump to the address upon failure. */
352     succeed_n,
353    
354     /* Followed by two-byte relative address, and two-byte number n.
355     Jump to the address N times, then fail. */
356     jump_n,
357    
358     /* Set the following two-byte relative address to the
359     subsequent two-byte number. The address *includes* the two
360     bytes of number. */
361     set_number_at,
362    
363     wordchar, /* Matches any word-constituent character. */
364     notwordchar, /* Matches any char that is not a word-constituent. */
365    
366     wordbeg, /* Succeeds if at word beginning. */
367     wordend, /* Succeeds if at word end. */
368    
369     wordbound, /* Succeeds if at a word boundary. */
370     notwordbound /* Succeeds if not at a word boundary. */
371    
372     #ifdef emacs
373     ,before_dot, /* Succeeds if before point. */
374     at_dot, /* Succeeds if at point. */
375     after_dot, /* Succeeds if after point. */
376    
377     /* Matches any character whose syntax is specified. Followed by
378     a byte which contains a syntax code, e.g., Sword. */
379     syntaxspec,
380    
381     /* Matches any character whose syntax is not that specified. */
382     notsyntaxspec
383     #endif /* emacs */
384     } re_opcode_t;
385    
386     /* Common operations on the compiled pattern. */
387    
388     /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
389    
390     #define STORE_NUMBER(destination, number) \
391     do { \
392     (destination)[0] = (number) & 0377; \
393     (destination)[1] = (number) >> 8; \
394     } while (0)
395    
396     /* Same as STORE_NUMBER, except increment DESTINATION to
397     the byte after where the number is stored. Therefore, DESTINATION
398     must be an lvalue. */
399    
400     #define STORE_NUMBER_AND_INCR(destination, number) \
401     do { \
402     STORE_NUMBER (destination, number); \
403     (destination) += 2; \
404     } while (0)
405    
406     /* Put into DESTINATION a number stored in two contiguous bytes starting
407     at SOURCE. */
408    
409     #define EXTRACT_NUMBER(destination, source) \
410     do { \
411     (destination) = *(source) & 0377; \
412     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
413     } while (0)
414    
415     #ifdef DEBUG
416     static void
417     extract_number (dest, source)
418     int *dest;
419     unsigned char *source;
420     {
421     int temp = SIGN_EXTEND_CHAR (*(source + 1));
422     *dest = *source & 0377;
423     *dest += temp << 8;
424     }
425    
426     #ifndef EXTRACT_MACROS /* To debug the macros. */
427     #undef EXTRACT_NUMBER
428     #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
429     #endif /* not EXTRACT_MACROS */
430    
431     #endif /* DEBUG */
432    
433     /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
434     SOURCE must be an lvalue. */
435    
436     #define EXTRACT_NUMBER_AND_INCR(destination, source) \
437     do { \
438     EXTRACT_NUMBER (destination, source); \
439     (source) += 2; \
440     } while (0)
441    
442     #ifdef DEBUG
443     static void
444     extract_number_and_incr (destination, source)
445     int *destination;
446     unsigned char **source;
447     {
448     extract_number (destination, *source);
449     *source += 2;
450     }
451    
452     #ifndef EXTRACT_MACROS
453     #undef EXTRACT_NUMBER_AND_INCR
454     #define EXTRACT_NUMBER_AND_INCR(dest, src) \
455     extract_number_and_incr (&dest, &src)
456     #endif /* not EXTRACT_MACROS */
457    
458     #endif /* DEBUG */
459    
460     /* If DEBUG is defined, Regex prints many voluminous messages about what
461     it is doing (if the variable `debug' is nonzero). If linked with the
462     main program in `iregex.c', you can enter patterns and strings
463     interactively. And if linked with the main program in `main.c' and
464     the other test files, you can run the already-written tests. */
465    
466     #ifdef DEBUG
467    
468     /* We use standard I/O for debugging. */
469     #include <stdio.h>
470    
471     /* It is useful to test things that ``must'' be true when debugging. */
472     #include <assert.h>
473    
474     static int debug = 0;
475    
476     #define DEBUG_STATEMENT(e) e
477     #define DEBUG_PRINT1(x) if (debug) printf (x)
478     #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
479     #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
480     #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
481     #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
482     if (debug) print_partial_compiled_pattern (s, e)
483     #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
484     if (debug) print_double_string (w, s1, sz1, s2, sz2)
485    
486    
487     extern void printchar ();
488    
489     /* Print the fastmap in human-readable form. */
490    
491     void
492     print_fastmap (fastmap)
493     char *fastmap;
494     {
495     unsigned was_a_range = 0;
496     unsigned i = 0;
497    
498     while (i < (1 << BYTEWIDTH))
499     {
500     if (fastmap[i++])
501     {
502     was_a_range = 0;
503     printchar (i - 1);
504     while (i < (1 << BYTEWIDTH) && fastmap[i])
505     {
506     was_a_range = 1;
507     i++;
508     }
509     if (was_a_range)
510     {
511     printf ("-");
512     printchar (i - 1);
513     }
514     }
515     }
516     putchar ('\n');
517     }
518    
519    
520     /* Print a compiled pattern string in human-readable form, starting at
521     the START pointer into it and ending just before the pointer END. */
522    
523     void
524     print_partial_compiled_pattern (start, end)
525     unsigned char *start;
526     unsigned char *end;
527     {
528     int mcnt, mcnt2;
529     unsigned char *p = start;
530     unsigned char *pend = end;
531    
532     if (start == NULL)
533     {
534     printf ("(null)\n");
535     return;
536     }
537    
538     /* Loop over pattern commands. */
539     while (p < pend)
540     {
541     switch ((re_opcode_t) *p++)
542     {
543     case no_op:
544     printf ("/no_op");
545     break;
546    
547     case exactn:
548     mcnt = *p++;
549     printf ("/exactn/%d", mcnt);
550     do
551     {
552     putchar ('/');
553     printchar (*p++);
554     }
555     while (--mcnt);
556     break;
557    
558     case start_memory:
559     mcnt = *p++;
560     printf ("/start_memory/%d/%d", mcnt, *p++);
561     break;
562    
563     case stop_memory:
564     mcnt = *p++;
565     printf ("/stop_memory/%d/%d", mcnt, *p++);
566     break;
567    
568     case duplicate:
569     printf ("/duplicate/%d", *p++);
570     break;
571    
572     case anychar:
573     printf ("/anychar");
574     break;
575    
576     case charset:
577     case charset_not:
578     {
579     register int c;
580    
581     printf ("/charset%s",
582     (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
583    
584     assert (p + *p < pend);
585    
586     for (c = 0; c < *p; c++)
587     {
588     unsigned bit;
589     unsigned char map_byte = p[1 + c];
590    
591     putchar ('/');
592    
593     for (bit = 0; bit < BYTEWIDTH; bit++)
594     if (map_byte & (1 << bit))
595     printchar (c * BYTEWIDTH + bit);
596     }
597     p += 1 + *p;
598     break;
599     }
600    
601     case begline:
602     printf ("/begline");
603     break;
604    
605     case endline:
606     printf ("/endline");
607     break;
608    
609     case on_failure_jump:
610     extract_number_and_incr (&mcnt, &p);
611     printf ("/on_failure_jump/0/%d", mcnt);
612     break;
613    
614     case on_failure_keep_string_jump:
615     extract_number_and_incr (&mcnt, &p);
616     printf ("/on_failure_keep_string_jump/0/%d", mcnt);
617     break;
618    
619     case dummy_failure_jump:
620     extract_number_and_incr (&mcnt, &p);
621     printf ("/dummy_failure_jump/0/%d", mcnt);
622     break;
623    
624     case push_dummy_failure:
625     printf ("/push_dummy_failure");
626     break;
627    
628     case maybe_pop_jump:
629     extract_number_and_incr (&mcnt, &p);
630     printf ("/maybe_pop_jump/0/%d", mcnt);
631     break;
632    
633     case pop_failure_jump:
634     extract_number_and_incr (&mcnt, &p);
635     printf ("/pop_failure_jump/0/%d", mcnt);
636     break;
637    
638     case jump_past_alt:
639     extract_number_and_incr (&mcnt, &p);
640     printf ("/jump_past_alt/0/%d", mcnt);
641     break;
642    
643     case jump:
644     extract_number_and_incr (&mcnt, &p);
645     printf ("/jump/0/%d", mcnt);
646     break;
647    
648     case succeed_n:
649     extract_number_and_incr (&mcnt, &p);
650     extract_number_and_incr (&mcnt2, &p);
651     printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
652     break;
653    
654     case jump_n:
655     extract_number_and_incr (&mcnt, &p);
656     extract_number_and_incr (&mcnt2, &p);
657     printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
658     break;
659    
660     case set_number_at:
661     extract_number_and_incr (&mcnt, &p);
662     extract_number_and_incr (&mcnt2, &p);
663     printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
664     break;
665    
666     case wordbound:
667     printf ("/wordbound");
668     break;
669    
670     case notwordbound:
671     printf ("/notwordbound");
672     break;
673    
674     case wordbeg:
675     printf ("/wordbeg");
676     break;
677    
678     case wordend:
679     printf ("/wordend");
680    
681     #ifdef emacs
682     case before_dot:
683     printf ("/before_dot");
684     break;
685    
686     case at_dot:
687     printf ("/at_dot");
688     break;
689    
690     case after_dot:
691     printf ("/after_dot");
692     break;
693    
694     case syntaxspec:
695     printf ("/syntaxspec");
696     mcnt = *p++;
697     printf ("/%d", mcnt);
698     break;
699    
700     case notsyntaxspec:
701     printf ("/notsyntaxspec");
702     mcnt = *p++;
703     printf ("/%d", mcnt);
704     break;
705     #endif /* emacs */
706    
707     case wordchar:
708     printf ("/wordchar");
709     break;
710    
711     case notwordchar:
712     printf ("/notwordchar");
713     break;
714    
715     case begbuf:
716     printf ("/begbuf");
717     break;
718    
719     case endbuf:
720     printf ("/endbuf");
721     break;
722    
723     default:
724     printf ("?%d", *(p-1));
725     }
726     }
727     printf ("/\n");
728     }
729    
730    
731     void
732     print_compiled_pattern (bufp)
733     struct re_pattern_buffer *bufp;
734     {
735     unsigned char *buffer = bufp->buffer;
736    
737     print_partial_compiled_pattern (buffer, buffer + bufp->used);
738     printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
739    
740     if (bufp->fastmap_accurate && bufp->fastmap)
741     {
742     printf ("fastmap: ");
743     print_fastmap (bufp->fastmap);
744     }
745    
746     printf ("re_nsub: %d\t", bufp->re_nsub);
747     printf ("regs_alloc: %d\t", bufp->regs_allocated);
748     printf ("can_be_null: %d\t", bufp->can_be_null);
749     printf ("newline_anchor: %d\n", bufp->newline_anchor);
750     printf ("no_sub: %d\t", bufp->no_sub);
751     printf ("not_bol: %d\t", bufp->not_bol);
752     printf ("not_eol: %d\t", bufp->not_eol);
753     printf ("syntax: %d\n", bufp->syntax);
754     /* Perhaps we should print the translate table? */
755     }
756    
757    
758     void
759     print_double_string (where, string1, size1, string2, size2)
760     const char *where;
761     const char *string1;
762     const char *string2;
763     int size1;
764     int size2;
765     {
766     unsigned this_char;
767    
768     if (where == NULL)
769     printf ("(null)");
770     else
771     {
772     if (FIRST_STRING_P (where))
773     {
774     for (this_char = where - string1; this_char < size1; this_char++)
775     printchar (string1[this_char]);
776    
777     where = string2;
778     }
779    
780     for (this_char = where - string2; this_char < size2; this_char++)
781     printchar (string2[this_char]);
782     }
783     }
784    
785     #else /* not DEBUG */
786    
787     #undef assert
788     #define assert(e)
789    
790     #define DEBUG_STATEMENT(e)
791     #define DEBUG_PRINT1(x)
792     #define DEBUG_PRINT2(x1, x2)
793     #define DEBUG_PRINT3(x1, x2, x3)
794     #define DEBUG_PRINT4(x1, x2, x3, x4)
795     #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
796     #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
797    
798     #endif /* not DEBUG */
799    
800     /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
801     also be assigned to arbitrarily: each pattern buffer stores its own
802     syntax, so it can be changed between regex compilations. */
803     reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
804    
805    
806     /* Specify the precise syntax of regexps for compilation. This provides
807     for compatibility for various utilities which historically have
808     different, incompatible syntaxes.
809    
810     The argument SYNTAX is a bit mask comprised of the various bits
811     defined in regex.h. We return the old syntax. */
812    
813     reg_syntax_t
814     re_set_syntax (syntax)
815     reg_syntax_t syntax;
816     {
817     reg_syntax_t ret = re_syntax_options;
818    
819     re_syntax_options = syntax;
820     return ret;
821     }
822    
823     /* This table gives an error message for each of the error codes listed
824     in regex.h. Obviously the order here has to be same as there. */
825    
826     static const char *re_error_msg[] =
827     { NULL, /* REG_NOERROR */
828     "No match", /* REG_NOMATCH */
829     "Invalid regular expression", /* REG_BADPAT */
830     "Invalid collation character", /* REG_ECOLLATE */
831     "Invalid character class name", /* REG_ECTYPE */
832     "Trailing backslash", /* REG_EESCAPE */
833     "Invalid back reference", /* REG_ESUBREG */
834     "Unmatched [ or [^", /* REG_EBRACK */
835     "Unmatched ( or \\(", /* REG_EPAREN */
836     "Unmatched \\{", /* REG_EBRACE */
837     "Invalid content of \\{\\}", /* REG_BADBR */
838     "Invalid range end", /* REG_ERANGE */
839     "Memory exhausted", /* REG_ESPACE */
840     "Invalid preceding regular expression", /* REG_BADRPT */
841     "Premature end of regular expression", /* REG_EEND */
842     "Regular expression too big", /* REG_ESIZE */
843     "Unmatched ) or \\)", /* REG_ERPAREN */
844     };
845    
846     /* Subroutine declarations and macros for regex_compile. */
847    
848     static void store_op1 (), store_op2 ();
849     static void insert_op1 (), insert_op2 ();
850     static boolean at_begline_loc_p (), at_endline_loc_p ();
851     static boolean group_in_compile_stack ();
852     static reg_errcode_t compile_range ();
853    
854     /* Fetch the next character in the uncompiled pattern---translating it
855     if necessary. Also cast from a signed character in the constant
856     string passed to us by the user to an unsigned char that we can use
857     as an array index (in, e.g., `translate'). */
858     #define PATFETCH(c) \
859     do {if (p == pend) return REG_EEND; \
860     c = (unsigned char) *p++; \
861     if (translate) c = translate[c]; \
862     } while (0)
863    
864     /* Fetch the next character in the uncompiled pattern, with no
865     translation. */
866     #define PATFETCH_RAW(c) \
867     do {if (p == pend) return REG_EEND; \
868     c = (unsigned char) *p++; \
869     } while (0)
870    
871     /* Go backwards one character in the pattern. */
872     #define PATUNFETCH p--
873    
874    
875     /* If `translate' is non-null, return translate[D], else just D. We
876     cast the subscript to translate because some data is declared as
877     `char *', to avoid warnings when a string constant is passed. But
878     when we use a character as a subscript we must make it unsigned. */
879     #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
880    
881    
882     /* Macros for outputting the compiled pattern into `buffer'. */
883    
884     /* If the buffer isn't allocated when it comes in, use this. */
885     #define INIT_BUF_SIZE 32
886    
887     /* Make sure we have at least N more bytes of space in buffer. */
888     #define GET_BUFFER_SPACE(n) \
889     while (b - bufp->buffer + (n) > bufp->allocated) \
890     EXTEND_BUFFER ()
891    
892     /* Make sure we have one more byte of buffer space and then add C to it. */
893     #define BUF_PUSH(c) \
894     do { \
895     GET_BUFFER_SPACE (1); \
896     *b++ = (unsigned char) (c); \
897     } while (0)
898    
899    
900     /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
901     #define BUF_PUSH_2(c1, c2) \
902     do { \
903     GET_BUFFER_SPACE (2); \
904     *b++ = (unsigned char) (c1); \
905     *b++ = (unsigned char) (c2); \
906     } while (0)
907    
908    
909     /* As with BUF_PUSH_2, except for three bytes. */
910     #define BUF_PUSH_3(c1, c2, c3) \
911     do { \
912     GET_BUFFER_SPACE (3); \
913     *b++ = (unsigned char) (c1); \
914     *b++ = (unsigned char) (c2); \
915     *b++ = (unsigned char) (c3); \
916     } while (0)
917    
918    
919     /* Store a jump with opcode OP at LOC to location TO. We store a
920     relative address offset by the three bytes the jump itself occupies. */
921     #define STORE_JUMP(op, loc, to) \
922     store_op1 (op, loc, (to) - (loc) - 3)
923    
924     /* Likewise, for a two-argument jump. */
925     #define STORE_JUMP2(op, loc, to, arg) \
926     store_op2 (op, loc, (to) - (loc) - 3, arg)
927    
928     /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
929     #define INSERT_JUMP(op, loc, to) \
930     insert_op1 (op, loc, (to) - (loc) - 3, b)
931    
932     /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
933     #define INSERT_JUMP2(op, loc, to, arg) \
934     insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
935    
936    
937     /* This is not an arbitrary limit: the arguments which represent offsets
938     into the pattern are two bytes long. So if 2^16 bytes turns out to
939     be too small, many things would have to change. */
940     #define MAX_BUF_SIZE (1L << 16)
941    
942    
943     /* Extend the buffer by twice its current size via realloc and
944     reset the pointers that pointed into the old block to point to the
945     correct places in the new one. If extending the buffer results in it
946     being larger than MAX_BUF_SIZE, then flag memory exhausted. */
947     #define EXTEND_BUFFER() \
948     do { \
949     unsigned char *old_buffer = bufp->buffer; \
950     if (bufp->allocated == MAX_BUF_SIZE) \
951     return REG_ESIZE; \
952     bufp->allocated <<= 1; \
953     if (bufp->allocated > MAX_BUF_SIZE) \
954     bufp->allocated = MAX_BUF_SIZE; \
955     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
956     if (bufp->buffer == NULL) \
957     return REG_ESPACE; \
958     /* If the buffer moved, move all the pointers into it. */ \
959     if (old_buffer != bufp->buffer) \
960     { \
961     b = (b - old_buffer) + bufp->buffer; \
962     begalt = (begalt - old_buffer) + bufp->buffer; \
963     if (fixup_alt_jump) \
964     fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
965     if (laststart) \
966     laststart = (laststart - old_buffer) + bufp->buffer; \
967     if (pending_exact) \
968     pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
969     } \
970     } while (0)
971    
972    
973     /* Since we have one byte reserved for the register number argument to
974     {start,stop}_memory, the maximum number of groups we can report
975     things about is what fits in that byte. */
976     #define MAX_REGNUM 255
977    
978     /* But patterns can have more than `MAX_REGNUM' registers. We just
979     ignore the excess. */
980     typedef unsigned regnum_t;
981    
982    
983     /* Macros for the compile stack. */
984    
985     /* Since offsets can go either forwards or backwards, this type needs to
986     be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
987     typedef int pattern_offset_t;
988    
989     typedef struct
990     {
991     pattern_offset_t begalt_offset;
992     pattern_offset_t fixup_alt_jump;
993     pattern_offset_t inner_group_offset;
994     pattern_offset_t laststart_offset;
995     regnum_t regnum;
996     } compile_stack_elt_t;
997    
998    
999     typedef struct
1000     {
1001     compile_stack_elt_t *stack;
1002     unsigned size;
1003     unsigned avail; /* Offset of next open position. */
1004     } compile_stack_type;
1005    
1006    
1007     #define INIT_COMPILE_STACK_SIZE 32
1008    
1009     #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1010     #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1011    
1012     /* The next available element. */
1013     #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1014    
1015    
1016     /* Set the bit for character C in a list. */
1017     #define SET_LIST_BIT(c) \
1018     (b[((unsigned char) (c)) / BYTEWIDTH] \
1019     |= 1 << (((unsigned char) c) % BYTEWIDTH))
1020    
1021    
1022     /* Get the next unsigned number in the uncompiled pattern. */
1023     #define GET_UNSIGNED_NUMBER(num) \
1024     { if (p != pend) \
1025     { \
1026     PATFETCH (c); \
1027     while (ISDIGIT (c)) \
1028     { \
1029     if (num < 0) \
1030     num = 0; \
1031     num = num * 10 + c - '0'; \
1032     if (p == pend) \
1033     break; \
1034     PATFETCH (c); \
1035     } \
1036     } \
1037     }
1038    
1039     #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1040    
1041     #define IS_CHAR_CLASS(string) \
1042     (STREQ (string, "alpha") || STREQ (string, "upper") \
1043     || STREQ (string, "lower") || STREQ (string, "digit") \
1044     || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1045     || STREQ (string, "space") || STREQ (string, "print") \
1046     || STREQ (string, "punct") || STREQ (string, "graph") \
1047     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1048    
1049     /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1050     Returns one of error codes defined in `regex.h', or zero for success.
1051    
1052     Assumes the `allocated' (and perhaps `buffer') and `translate'
1053     fields are set in BUFP on entry.
1054    
1055     If it succeeds, results are put in BUFP (if it returns an error, the
1056     contents of BUFP are undefined):
1057     `buffer' is the compiled pattern;
1058     `syntax' is set to SYNTAX;
1059     `used' is set to the length of the compiled pattern;
1060     `fastmap_accurate' is zero;
1061     `re_nsub' is the number of subexpressions in PATTERN;
1062     `not_bol' and `not_eol' are zero;
1063    
1064     The `fastmap' and `newline_anchor' fields are neither
1065     examined nor set. */
1066    
1067     static reg_errcode_t
1068     regex_compile (pattern, size, syntax, bufp)
1069     const char *pattern;
1070     int size;
1071     reg_syntax_t syntax;
1072     struct re_pattern_buffer *bufp;
1073     {
1074     /* We fetch characters from PATTERN here. Even though PATTERN is
1075     `char *' (i.e., signed), we declare these variables as unsigned, so
1076     they can be reliably used as array indices. */
1077     register unsigned char c, c1;
1078    
1079     /* A random tempory spot in PATTERN. */
1080     const char *p1;
1081    
1082     /* Points to the end of the buffer, where we should append. */
1083     register unsigned char *b;
1084    
1085     /* Keeps track of unclosed groups. */
1086     compile_stack_type compile_stack;
1087    
1088     /* Points to the current (ending) position in the pattern. */
1089     const char *p = pattern;
1090     const char *pend = pattern + size;
1091    
1092     /* How to translate the characters in the pattern. */
1093     char *translate = bufp->translate;
1094    
1095     /* Address of the count-byte of the most recently inserted `exactn'
1096     command. This makes it possible to tell if a new exact-match
1097     character can be added to that command or if the character requires
1098     a new `exactn' command. */
1099     unsigned char *pending_exact = 0;
1100    
1101     /* Address of start of the most recently finished expression.
1102     This tells, e.g., postfix * where to find the start of its
1103     operand. Reset at the beginning of groups and alternatives. */
1104     unsigned char *laststart = 0;
1105    
1106     /* Address of beginning of regexp, or inside of last group. */
1107     unsigned char *begalt;
1108    
1109     /* Place in the uncompiled pattern (i.e., the {) to
1110     which to go back if the interval is invalid. */
1111     const char *beg_interval;
1112    
1113     /* Address of the place where a forward jump should go to the end of
1114     the containing expression. Each alternative of an `or' -- except the
1115     last -- ends with a forward jump of this sort. */
1116     unsigned char *fixup_alt_jump = 0;
1117    
1118     /* Counts open-groups as they are encountered. Remembered for the
1119     matching close-group on the compile stack, so the same register
1120     number is put in the stop_memory as the start_memory. */
1121     regnum_t regnum = 0;
1122    
1123     #ifdef DEBUG
1124     DEBUG_PRINT1 ("\nCompiling pattern: ");
1125     if (debug)
1126     {
1127     unsigned debug_count;
1128    
1129     for (debug_count = 0; debug_count < size; debug_count++)
1130     printchar (pattern[debug_count]);
1131     putchar ('\n');
1132     }
1133     #endif /* DEBUG */
1134    
1135     /* Initialize the compile stack. */
1136     compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1137     if (compile_stack.stack == NULL)
1138     return REG_ESPACE;
1139    
1140     compile_stack.size = INIT_COMPILE_STACK_SIZE;
1141     compile_stack.avail = 0;
1142    
1143     /* Initialize the pattern buffer. */
1144     bufp->syntax = syntax;
1145     bufp->fastmap_accurate = 0;
1146     bufp->not_bol = bufp->not_eol = 0;
1147    
1148     /* Set `used' to zero, so that if we return an error, the pattern
1149     printer (for debugging) will think there's no pattern. We reset it
1150     at the end. */
1151     bufp->used = 0;
1152    
1153     /* Always count groups, whether or not bufp->no_sub is set. */
1154     bufp->re_nsub = 0;
1155    
1156     #if !defined (emacs) && !defined (SYNTAX_TABLE)
1157     /* Initialize the syntax table. */
1158     init_syntax_once ();
1159     #endif
1160    
1161     if (bufp->allocated == 0)
1162     {
1163     if (bufp->buffer)
1164     { /* If zero allocated, but buffer is non-null, try to realloc
1165     enough space. This loses if buffer's address is bogus, but
1166     that is the user's responsibility. */
1167     RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1168     }
1169     else
1170     { /* Caller did not allocate a buffer. Do it for them. */
1171     bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1172     }
1173     if (!bufp->buffer) return REG_ESPACE;
1174    
1175     bufp->allocated = INIT_BUF_SIZE;
1176     }
1177    
1178     begalt = b = bufp->buffer;
1179    
1180     /* Loop through the uncompiled pattern until we're at the end. */
1181     while (p != pend)
1182     {
1183     PATFETCH (c);
1184    
1185     switch (c)
1186     {
1187     case '^':
1188     {
1189     if ( /* If at start of pattern, it's an operator. */
1190     p == pattern + 1
1191     /* If context independent, it's an operator. */
1192     || syntax & RE_CONTEXT_INDEP_ANCHORS
1193     /* Otherwise, depends on what's come before. */
1194     || at_begline_loc_p (pattern, p, syntax))
1195     BUF_PUSH (begline);
1196     else
1197     goto normal_char;
1198     }
1199     break;
1200    
1201    
1202     case '$':
1203     {
1204     if ( /* If at end of pattern, it's an operator. */
1205     p == pend
1206     /* If context independent, it's an operator. */
1207     || syntax & RE_CONTEXT_INDEP_ANCHORS
1208     /* Otherwise, depends on what's next. */
1209     || at_endline_loc_p (p, pend, syntax))
1210     BUF_PUSH (endline);
1211     else
1212     goto normal_char;
1213     }
1214     break;
1215    
1216    
1217     case '+':
1218     case '?':
1219     if ((syntax & RE_BK_PLUS_QM)
1220     || (syntax & RE_LIMITED_OPS))
1221     goto normal_char;
1222     handle_plus:
1223     case '*':
1224     /* If there is no previous pattern... */
1225     if (!laststart)
1226     {
1227     if (syntax & RE_CONTEXT_INVALID_OPS)
1228     return REG_BADRPT;
1229     else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1230     goto normal_char;
1231     }
1232    
1233     {
1234     /* Are we optimizing this jump? */
1235     boolean keep_string_p = false;
1236    
1237     /* 1 means zero (many) matches is allowed. */
1238     char zero_times_ok = 0, many_times_ok = 0;
1239    
1240     /* If there is a sequence of repetition chars, collapse it
1241     down to just one (the right one). We can't combine
1242     interval operators with these because of, e.g., `a{2}*',
1243     which should only match an even number of `a's. */
1244    
1245     for (;;)
1246     {
1247     zero_times_ok |= c != '+';
1248     many_times_ok |= c != '?';
1249    
1250     if (p == pend)
1251     break;
1252    
1253     PATFETCH (c);
1254    
1255     if (c == '*'
1256     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1257     ;
1258    
1259     else if (syntax & RE_BK_PLUS_QM && c == '\\')
1260     {
1261     if (p == pend) return REG_EESCAPE;
1262    
1263     PATFETCH (c1);
1264     if (!(c1 == '+' || c1 == '?'))
1265     {
1266     PATUNFETCH;
1267     PATUNFETCH;
1268     break;
1269     }
1270    
1271     c = c1;
1272     }
1273     else
1274     {
1275     PATUNFETCH;
1276     break;
1277     }
1278    
1279     /* If we get here, we found another repeat character. */
1280     }
1281    
1282     /* Star, etc. applied to an empty pattern is equivalent
1283     to an empty pattern. */
1284     if (!laststart)
1285     break;
1286    
1287     /* Now we know whether or not zero matches is allowed
1288     and also whether or not two or more matches is allowed. */
1289     if (many_times_ok)
1290     { /* More than one repetition is allowed, so put in at the
1291     end a backward relative jump from `b' to before the next
1292     jump we're going to put in below (which jumps from
1293     laststart to after this jump).
1294    
1295     But if we are at the `*' in the exact sequence `.*\n',
1296     insert an unconditional jump backwards to the .,
1297     instead of the beginning of the loop. This way we only
1298     push a failure point once, instead of every time
1299     through the loop. */
1300     assert (p - 1 > pattern);
1301    
1302     /* Allocate the space for the jump. */
1303     GET_BUFFER_SPACE (3);
1304    
1305     /* We know we are not at the first character of the pattern,
1306     because laststart was nonzero. And we've already
1307     incremented `p', by the way, to be the character after
1308     the `*'. Do we have to do something analogous here
1309     for null bytes, because of RE_DOT_NOT_NULL? */
1310     if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1311     && zero_times_ok
1312     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1313     && !(syntax & RE_DOT_NEWLINE))
1314     { /* We have .*\n. */
1315     STORE_JUMP (jump, b, laststart);
1316     keep_string_p = true;
1317     }
1318     else
1319     /* Anything else. */
1320     STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1321    
1322     /* We've added more stuff to the buffer. */
1323     b += 3;
1324     }
1325    
1326     /* On failure, jump from laststart to b + 3, which will be the
1327     end of the buffer after this jump is inserted. */
1328     GET_BUFFER_SPACE (3);
1329     INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1330     : on_failure_jump,
1331     laststart, b + 3);
1332     pending_exact = 0;
1333     b += 3;
1334    
1335     if (!zero_times_ok)
1336     {
1337     /* At least one repetition is required, so insert a
1338     `dummy_failure_jump' before the initial
1339     `on_failure_jump' instruction of the loop. This
1340     effects a skip over that instruction the first time
1341     we hit that loop. */
1342     GET_BUFFER_SPACE (3);
1343     INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1344     b += 3;
1345     }
1346     }
1347     break;
1348    
1349    
1350     case '.':
1351     laststart = b;
1352     BUF_PUSH (anychar);
1353     break;
1354    
1355    
1356     case '[':
1357     {
1358     boolean had_char_class = false;
1359    
1360     if (p == pend) return REG_EBRACK;
1361    
1362     /* Ensure that we have enough space to push a charset: the
1363     opcode, the length count, and the bitset; 34 bytes in all. */
1364     GET_BUFFER_SPACE (34);
1365    
1366     laststart = b;
1367    
1368     /* We test `*p == '^' twice, instead of using an if
1369     statement, so we only need one BUF_PUSH. */
1370     BUF_PUSH (*p == '^' ? charset_not : charset);
1371     if (*p == '^')
1372     p++;
1373    
1374     /* Remember the first position in the bracket expression. */
1375     p1 = p;
1376    
1377     /* Push the number of bytes in the bitmap. */
1378     BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1379    
1380     /* Clear the whole map. */
1381     bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1382    
1383     /* charset_not matches newline according to a syntax bit. */
1384     if ((re_opcode_t) b[-2] == charset_not
1385     && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1386     SET_LIST_BIT ('\n');
1387    
1388     /* Read in characters and ranges, setting map bits. */
1389     for (;;)
1390     {
1391     if (p == pend) return REG_EBRACK;
1392    
1393     PATFETCH (c);
1394    
1395     /* \ might escape characters inside [...] and [^...]. */
1396     if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1397     {
1398     if (p == pend) return REG_EESCAPE;
1399    
1400     PATFETCH (c1);
1401     SET_LIST_BIT (c1);
1402     continue;
1403     }
1404    
1405     /* Could be the end of the bracket expression. If it's
1406     not (i.e., when the bracket expression is `[]' so
1407     far), the ']' character bit gets set way below. */
1408     if (c == ']' && p != p1 + 1)
1409     break;
1410    
1411     /* Look ahead to see if it's a range when the last thing
1412     was a character class. */
1413     if (had_char_class && c == '-' && *p != ']')
1414     return REG_ERANGE;
1415    
1416     /* Look ahead to see if it's a range when the last thing
1417     was a character: if this is a hyphen not at the
1418     beginning or the end of a list, then it's the range
1419     operator. */
1420     if (c == '-'
1421     && !(p - 2 >= pattern && p[-2] == '[')
1422     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1423     && *p != ']')
1424     {
1425     reg_errcode_t ret
1426     = compile_range (&p, pend, translate, syntax, b);
1427     if (ret != REG_NOERROR) return ret;
1428     }
1429    
1430     else if (p[0] == '-' && p[1] != ']')
1431     { /* This handles ranges made up of characters only. */
1432     reg_errcode_t ret;
1433    
1434     /* Move past the `-'. */
1435     PATFETCH (c1);
1436    
1437     ret = compile_range (&p, pend, translate, syntax, b);
1438     if (ret != REG_NOERROR) return ret;
1439     }
1440    
1441     /* See if we're at the beginning of a possible character
1442     class. */
1443    
1444     else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1445     { /* Leave room for the null. */
1446     char str[CHAR_CLASS_MAX_LENGTH + 1];
1447    
1448     PATFETCH (c);
1449     c1 = 0;
1450    
1451     /* If pattern is `[[:'. */
1452     if (p == pend) return REG_EBRACK;
1453    
1454     for (;;)
1455     {
1456     PATFETCH (c);
1457     if (c == ':' || c == ']' || p == pend
1458     || c1 == CHAR_CLASS_MAX_LENGTH)
1459     break;
1460     str[c1++] = c;
1461     }
1462     str[c1] = '\0';
1463    
1464     /* If isn't a word bracketed by `[:' and:`]':
1465     undo the ending character, the letters, and leave
1466     the leading `:' and `[' (but set bits for them). */
1467     if (c == ':' && *p == ']')
1468     {
1469     int ch;
1470     boolean is_alnum = STREQ (str, "alnum");
1471     boolean is_alpha = STREQ (str, "alpha");
1472     boolean is_blank = STREQ (str, "blank");
1473     boolean is_cntrl = STREQ (str, "cntrl");
1474     boolean is_digit = STREQ (str, "digit");
1475     boolean is_graph = STREQ (str, "graph");
1476     boolean is_lower = STREQ (str, "lower");
1477     boolean is_print = STREQ (str, "print");
1478     boolean is_punct = STREQ (str, "punct");
1479     boolean is_space = STREQ (str, "space");
1480     boolean is_upper = STREQ (str, "upper");
1481     boolean is_xdigit = STREQ (str, "xdigit");
1482    
1483     if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1484    
1485     /* Throw away the ] at the end of the character
1486     class. */
1487     PATFETCH (c);
1488    
1489     if (p == pend) return REG_EBRACK;
1490    
1491     for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1492     {
1493     if ( (is_alnum && ISALNUM (ch))
1494     || (is_alpha && ISALPHA (ch))
1495     || (is_blank && ISBLANK (ch))
1496     || (is_cntrl && ISCNTRL (ch))
1497     || (is_digit && ISDIGIT (ch))
1498     || (is_graph && ISGRAPH (ch))
1499     || (is_lower && ISLOWER (ch))
1500     || (is_print && ISPRINT (ch))
1501     || (is_punct && ISPUNCT (ch))
1502     || (is_space && ISSPACE (ch))
1503     || (is_upper && ISUPPER (ch))
1504     || (is_xdigit && ISXDIGIT (ch)))
1505     SET_LIST_BIT (ch);
1506     }
1507     had_char_class = true;
1508     }
1509     else
1510     {
1511     c1++;
1512     while (c1--)
1513     PATUNFETCH;
1514     SET_LIST_BIT ('[');
1515     SET_LIST_BIT (':');
1516     had_char_class = false;
1517     }
1518     }
1519     else
1520     {
1521     had_char_class = false;
1522     SET_LIST_BIT (c);
1523     }
1524     }
1525    
1526     /* Discard any (non)matching list bytes that are all 0 at the
1527     end of the map. Decrease the map-length byte too. */
1528     while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1529     b[-1]--;
1530     b += b[-1];
1531     }
1532     break;
1533    
1534    
1535     case '(':
1536     if (syntax & RE_NO_BK_PARENS)
1537     goto handle_open;
1538     else
1539     goto normal_char;
1540    
1541    
1542     case ')':
1543     if (syntax & RE_NO_BK_PARENS)
1544     goto handle_close;
1545     else
1546     goto normal_char;
1547    
1548    
1549     case '\n':
1550     if (syntax & RE_NEWLINE_ALT)
1551     goto handle_alt;
1552     else
1553     goto normal_char;
1554    
1555    
1556     case '|':
1557     if (syntax & RE_NO_BK_VBAR)
1558     goto handle_alt;
1559     else
1560     goto normal_char;
1561    
1562    
1563     case '{':
1564     if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1565     goto handle_interval;
1566     else
1567     goto normal_char;
1568    
1569    
1570     case '\\':
1571     if (p == pend) return REG_EESCAPE;
1572    
1573     /* Do not translate the character after the \, so that we can
1574     distinguish, e.g., \B from \b, even if we normally would
1575     translate, e.g., B to b. */
1576     PATFETCH_RAW (c);
1577    
1578     switch (c)
1579     {
1580     case '(':
1581     if (syntax & RE_NO_BK_PARENS)
1582     goto normal_backslash;
1583    
1584     handle_open:
1585     bufp->re_nsub++;
1586     regnum++;
1587    
1588     if (COMPILE_STACK_FULL)
1589     {
1590     RETALLOC (compile_stack.stack, compile_stack.size << 1,
1591     compile_stack_elt_t);
1592     if (compile_stack.stack == NULL) return REG_ESPACE;
1593    
1594     compile_stack.size <<= 1;
1595     }
1596    
1597     /* These are the values to restore when we hit end of this
1598     group. They are all relative offsets, so that if the
1599     whole pattern moves because of realloc, they will still
1600     be valid. */
1601     COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1602     COMPILE_STACK_TOP.fixup_alt_jump
1603     = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1604     COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1605     COMPILE_STACK_TOP.regnum = regnum;
1606    
1607     /* We will eventually replace the 0 with the number of
1608     groups inner to this one. But do not push a
1609     start_memory for groups beyond the last one we can
1610     represent in the compiled pattern. */
1611     if (regnum <= MAX_REGNUM)
1612     {
1613     COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1614     BUF_PUSH_3 (start_memory, regnum, 0);
1615     }
1616    
1617     compile_stack.avail++;
1618    
1619     fixup_alt_jump = 0;
1620     laststart = 0;
1621     begalt = b;
1622     /* If we've reached MAX_REGNUM groups, then this open
1623     won't actually generate any code, so we'll have to
1624     clear pending_exact explicitly. */
1625     pending_exact = 0;
1626     break;
1627    
1628    
1629     case ')':
1630     if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1631    
1632     if (COMPILE_STACK_EMPTY)
1633     if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1634     goto normal_backslash;
1635     else
1636     return REG_ERPAREN;
1637    
1638     handle_close:
1639     if (fixup_alt_jump)
1640     { /* Push a dummy failure point at the end of the
1641     alternative for a possible future
1642     `pop_failure_jump' to pop. See comments at
1643     `push_dummy_failure' in `re_match_2'. */
1644     BUF_PUSH (push_dummy_failure);
1645    
1646     /* We allocated space for this jump when we assigned
1647     to `fixup_alt_jump', in the `handle_alt' case below. */
1648     STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1649     }
1650    
1651     /* See similar code for backslashed left paren above. */
1652     if (COMPILE_STACK_EMPTY)
1653     if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1654     goto normal_char;
1655     else
1656     return REG_ERPAREN;
1657    
1658     /* Since we just checked for an empty stack above, this
1659     ``can't happen''. */
1660     assert (compile_stack.avail != 0);
1661     {
1662     /* We don't just want to restore into `regnum', because
1663     later groups should continue to be numbered higher,
1664     as in `(ab)c(de)' -- the second group is #2. */
1665     regnum_t this_group_regnum;
1666    
1667     compile_stack.avail--;
1668     begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1669     fixup_alt_jump
1670     = COMPILE_STACK_TOP.fixup_alt_jump
1671     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1672     : 0;
1673     laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1674     this_group_regnum = COMPILE_STACK_TOP.regnum;
1675     /* If we've reached MAX_REGNUM groups, then this open
1676     won't actually generate any code, so we'll have to
1677     clear pending_exact explicitly. */
1678     pending_exact = 0;
1679    
1680     /* We're at the end of the group, so now we know how many
1681     groups were inside this one. */
1682     if (this_group_regnum <= MAX_REGNUM)
1683     {
1684     unsigned char *inner_group_loc
1685     = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1686    
1687     *inner_group_loc = regnum - this_group_regnum;
1688     BUF_PUSH_3 (stop_memory, this_group_regnum,
1689     regnum - this_group_regnum);
1690     }
1691     }
1692     break;
1693    
1694    
1695     case '|': /* `\|'. */
1696     if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1697     goto normal_backslash;
1698     handle_alt:
1699     if (syntax & RE_LIMITED_OPS)
1700     goto normal_char;
1701    
1702     /* Insert before the previous alternative a jump which
1703     jumps to this alternative if the former fails. */
1704     GET_BUFFER_SPACE (3);
1705     INSERT_JUMP (on_failure_jump, begalt, b + 6);
1706     pending_exact = 0;
1707     b += 3;
1708    
1709     /* The alternative before this one has a jump after it
1710     which gets executed if it gets matched. Adjust that
1711     jump so it will jump to this alternative's analogous
1712     jump (put in below, which in turn will jump to the next
1713     (if any) alternative's such jump, etc.). The last such
1714     jump jumps to the correct final destination. A picture:
1715     _____ _____
1716     | | | |
1717     | v | v
1718     a | b | c
1719    
1720     If we are at `b', then fixup_alt_jump right now points to a
1721     three-byte space after `a'. We'll put in the jump, set
1722     fixup_alt_jump to right after `b', and leave behind three
1723     bytes which we'll fill in when we get to after `c'. */
1724    
1725     if (fixup_alt_jump)
1726     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1727    
1728     /* Mark and leave space for a jump after this alternative,
1729     to be filled in later either by next alternative or
1730     when know we're at the end of a series of alternatives. */
1731     fixup_alt_jump = b;
1732     GET_BUFFER_SPACE (3);
1733     b += 3;
1734    
1735     laststart = 0;
1736     begalt = b;
1737     break;
1738    
1739    
1740     case '{':
1741     /* If \{ is a literal. */
1742     if (!(syntax & RE_INTERVALS)
1743     /* If we're at `\{' and it's not the open-interval
1744     operator. */
1745     || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1746     || (p - 2 == pattern && p == pend))
1747     goto normal_backslash;
1748    
1749     handle_interval:
1750     {
1751     /* If got here, then the syntax allows intervals. */
1752    
1753     /* At least (most) this many matches must be made. */
1754     int lower_bound = -1, upper_bound = -1;
1755    
1756     beg_interval = p - 1;
1757    
1758     if (p == pend)
1759     {
1760     if (syntax & RE_NO_BK_BRACES)
1761     goto unfetch_interval;
1762     else
1763     return REG_EBRACE;
1764     }
1765    
1766     GET_UNSIGNED_NUMBER (lower_bound);
1767    
1768     if (c == ',')
1769     {
1770     GET_UNSIGNED_NUMBER (upper_bound);
1771     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1772     }
1773     else
1774     /* Interval such as `{1}' => match exactly once. */
1775     upper_bound = lower_bound;
1776    
1777     if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1778     || lower_bound > upper_bound)
1779     {
1780     if (syntax & RE_NO_BK_BRACES)
1781     goto unfetch_interval;
1782     else
1783     return REG_BADBR;
1784     }
1785    
1786     if (!(syntax & RE_NO_BK_BRACES))
1787     {
1788     if (c != '\\') return REG_EBRACE;
1789    
1790     PATFETCH (c);
1791     }
1792    
1793     if (c != '}')
1794     {
1795     if (syntax & RE_NO_BK_BRACES)
1796     goto unfetch_interval;
1797     else
1798     return REG_BADBR;
1799     }
1800    
1801     /* We just parsed a valid interval. */
1802    
1803     /* If it's invalid to have no preceding re. */
1804     if (!laststart)
1805     {
1806     if (syntax & RE_CONTEXT_INVALID_OPS)
1807     return REG_BADRPT;
1808     else if (syntax & RE_CONTEXT_INDEP_OPS)
1809     laststart = b;
1810     else
1811     goto unfetch_interval;
1812     }
1813    
1814     /* If the upper bound is zero, don't want to succeed at
1815     all; jump from `laststart' to `b + 3', which will be
1816     the end of the buffer after we insert the jump. */
1817     if (upper_bound == 0)
1818     {
1819     GET_BUFFER_SPACE (3);
1820     INSERT_JUMP (jump, laststart, b + 3);
1821     b += 3;
1822     }
1823    
1824     /* Otherwise, we have a nontrivial interval. When
1825     we're all done, the pattern will look like:
1826     set_number_at <jump count> <upper bound>
1827     set_number_at <succeed_n count> <lower bound>
1828     succeed_n <after jump addr> <succed_n count>
1829     <body of loop>
1830     jump_n <succeed_n addr> <jump count>
1831     (The upper bound and `jump_n' are omitted if
1832     `upper_bound' is 1, though.) */
1833     else
1834     { /* If the upper bound is > 1, we need to insert
1835     more at the end of the loop. */
1836     unsigned nbytes = 10 + (upper_bound > 1) * 10;
1837    
1838     GET_BUFFER_SPACE (nbytes);
1839    
1840     /* Initialize lower bound of the `succeed_n', even
1841     though it will be set during matching by its
1842     attendant `set_number_at' (inserted next),
1843     because `re_compile_fastmap' needs to know.
1844     Jump to the `jump_n' we might insert below. */
1845     INSERT_JUMP2 (succeed_n, laststart,
1846     b + 5 + (upper_bound > 1) * 5,
1847     lower_bound);
1848     b += 5;
1849    
1850     /* Code to initialize the lower bound. Insert
1851     before the `succeed_n'. The `5' is the last two
1852     bytes of this `set_number_at', plus 3 bytes of
1853     the following `succeed_n'. */
1854     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1855     b += 5;
1856    
1857     if (upper_bound > 1)
1858     { /* More than one repetition is allowed, so
1859     append a backward jump to the `succeed_n'
1860     that starts this interval.
1861    
1862     When we've reached this during matching,
1863     we'll have matched the interval once, so
1864     jump back only `upper_bound - 1' times. */
1865     STORE_JUMP2 (jump_n, b, laststart + 5,
1866     upper_bound - 1);
1867     b += 5;
1868    
1869     /* The location we want to set is the second
1870     parameter of the `jump_n'; that is `b-2' as
1871     an absolute address. `laststart' will be
1872     the `set_number_at' we're about to insert;
1873     `laststart+3' the number to set, the source
1874     for the relative address. But we are
1875     inserting into the middle of the pattern --
1876     so everything is getting moved up by 5.
1877     Conclusion: (b - 2) - (laststart + 3) + 5,
1878     i.e., b - laststart.
1879    
1880     We insert this at the beginning of the loop
1881     so that if we fail during matching, we'll
1882     reinitialize the bounds. */
1883     insert_op2 (set_number_at, laststart, b - laststart,
1884     upper_bound - 1, b);
1885     b += 5;
1886     }
1887     }
1888     pending_exact = 0;
1889     beg_interval = NULL;
1890     }
1891     break;
1892    
1893     unfetch_interval:
1894     /* If an invalid interval, match the characters as literals. */
1895     assert (beg_interval);
1896     p = beg_interval;
1897     beg_interval = NULL;
1898    
1899     /* normal_char and normal_backslash need `c'. */
1900     PATFETCH (c);
1901    
1902     if (!(syntax & RE_NO_BK_BRACES))
1903     {
1904     if (p > pattern && p[-1] == '\\')
1905     goto normal_backslash;
1906     }
1907     goto normal_char;
1908    
1909     #ifdef emacs
1910     /* There is no way to specify the before_dot and after_dot
1911     operators. rms says this is ok. --karl */
1912     case '=':
1913     BUF_PUSH (at_dot);
1914     break;
1915    
1916     case 's':
1917     laststart = b;
1918     PATFETCH (c);
1919     BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1920     break;
1921    
1922     case 'S':
1923     laststart = b;
1924     PATFETCH (c);
1925     BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1926     break;
1927     #endif /* emacs */
1928    
1929    
1930     case 'w':
1931     laststart = b;
1932     BUF_PUSH (wordchar);
1933     break;
1934    
1935    
1936     case 'W':
1937     laststart = b;
1938     BUF_PUSH (notwordchar);
1939     break;
1940    
1941    
1942     case '<':
1943     BUF_PUSH (wordbeg);
1944     break;
1945    
1946     case '>':
1947     BUF_PUSH (wordend);
1948     break;
1949    
1950     case 'b':
1951     BUF_PUSH (wordbound);
1952     break;
1953    
1954     case 'B':
1955     BUF_PUSH (notwordbound);
1956     break;
1957    
1958     case '`':
1959     BUF_PUSH (begbuf);
1960     break;
1961    
1962     case '\'':
1963     BUF_PUSH (endbuf);
1964     break;
1965    
1966     case '1': case '2': case '3': case '4': case '5':
1967     case '6': case '7': case '8': case '9':
1968     if (syntax & RE_NO_BK_REFS)
1969     goto normal_char;
1970    
1971     c1 = c - '0';
1972    
1973     if (c1 > regnum)
1974     return REG_ESUBREG;
1975    
1976     /* Can't back reference to a subexpression if inside of it. */
1977     if (group_in_compile_stack (compile_stack, c1))
1978     goto normal_char;
1979    
1980     laststart = b;
1981     BUF_PUSH_2 (duplicate, c1);
1982     break;
1983    
1984    
1985     case '+':
1986     case '?':
1987     if (syntax & RE_BK_PLUS_QM)
1988     goto handle_plus;
1989     else
1990     goto normal_backslash;
1991    
1992     default:
1993     normal_backslash:
1994     /* You might think it would be useful for \ to mean
1995     not to translate; but if we don't translate it
1996     it will never match anything. */
1997     c = TRANSLATE (c);
1998     goto normal_char;
1999     }
2000     break;
2001    
2002    
2003     default:
2004     /* Expects the character in `c'. */
2005     normal_char:
2006     /* If no exactn currently being built. */
2007     if (!pending_exact
2008    
2009     /* If last exactn not at current position. */
2010     || pending_exact + *pending_exact + 1 != b
2011    
2012     /* We have only one byte following the exactn for the count. */
2013     || *pending_exact == (1 << BYTEWIDTH) - 1
2014    
2015     /* If followed by a repetition operator. */
2016     || *p == '*' || *p == '^'
2017     || ((syntax & RE_BK_PLUS_QM)
2018     ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2019     : (*p == '+' || *p == '?'))
2020     || ((syntax & RE_INTERVALS)
2021     && ((syntax & RE_NO_BK_BRACES)
2022     ? *p == '{'
2023     : (p[0] == '\\' && p[1] == '{'))))
2024     {
2025     /* Start building a new exactn. */
2026    
2027     laststart = b;
2028    
2029     BUF_PUSH_2 (exactn, 0);
2030     pending_exact = b - 1;
2031     }
2032    
2033     BUF_PUSH (c);
2034     (*pending_exact)++;
2035     break;
2036     } /* switch (c) */
2037     } /* while p != pend */
2038    
2039    
2040     /* Through the pattern now. */
2041    
2042     if (fixup_alt_jump)
2043     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2044    
2045     if (!COMPILE_STACK_EMPTY)
2046     return REG_EPAREN;
2047    
2048     free (compile_stack.stack);
2049    
2050     /* We have succeeded; set the length of the buffer. */
2051     bufp->used = b - bufp->buffer;
2052    
2053     #ifdef DEBUG
2054     if (debug)
2055     {
2056     DEBUG_PRINT1 ("\nCompiled pattern: ");
2057     print_compiled_pattern (bufp);
2058     }
2059     #endif /* DEBUG */
2060    
2061     return REG_NOERROR;
2062     } /* regex_compile */
2063    
2064     /* Subroutines for `regex_compile'. */
2065    
2066     /* Store OP at LOC followed by two-byte integer parameter ARG. */
2067    
2068     static void
2069     store_op1 (op, loc, arg)
2070     re_opcode_t op;
2071     unsigned char *loc;
2072     int arg;
2073     {
2074     *loc = (unsigned char) op;
2075     STORE_NUMBER (loc + 1, arg);
2076     }
2077    
2078    
2079     /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
2080    
2081     static void
2082     store_op2 (op, loc, arg1, arg2)
2083     re_opcode_t op;
2084     unsigned char *loc;
2085     int arg1, arg2;
2086     {
2087     *loc = (unsigned char) op;
2088     STORE_NUMBER (loc + 1, arg1);
2089     STORE_NUMBER (loc + 3, arg2);
2090     }
2091    
2092    
2093     /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2094     for OP followed by two-byte integer parameter ARG. */
2095    
2096     static void
2097     insert_op1 (op, loc, arg, end)
2098     re_opcode_t op;
2099     unsigned char *loc;
2100     int arg;
2101     unsigned char *end;
2102     {
2103     register unsigned char *pfrom = end;
2104     register unsigned char *pto = end + 3;
2105    
2106     while (pfrom != loc)
2107     *--pto = *--pfrom;
2108    
2109     store_op1 (op, loc, arg);
2110     }
2111    
2112    
2113     /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
2114    
2115     static void
2116     insert_op2 (op, loc, arg1, arg2, end)
2117     re_opcode_t op;
2118     unsigned char *loc;
2119     int arg1, arg2;
2120     unsigned char *end;
2121     {
2122     register unsigned char *pfrom = end;
2123     register unsigned char *pto = end + 5;
2124    
2125     while (pfrom != loc)
2126     *--pto = *--pfrom;
2127    
2128     store_op2 (op, loc, arg1, arg2);
2129     }
2130    
2131    
2132     /* P points to just after a ^ in PATTERN. Return true if that ^ comes
2133     after an alternative or a begin-subexpression. We assume there is at
2134     least one character before the ^. */
2135    
2136     static boolean
2137     at_begline_loc_p (pattern, p, syntax)
2138     const char *pattern, *p;
2139     reg_syntax_t syntax;
2140     {
2141     const char *prev = p - 2;
2142     boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2143    
2144     return
2145     /* After a subexpression? */
2146     (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2147     /* After an alternative? */
2148     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2149     }
2150    
2151    
2152     /* The dual of at_begline_loc_p. This one is for $. We assume there is
2153     at least one character after the $, i.e., `P < PEND'. */
2154    
2155     static boolean
2156     at_endline_loc_p (p, pend, syntax)
2157     const char *p, *pend;
2158     int syntax;
2159     {
2160     const char *next = p;
2161     boolean next_backslash = *next == '\\';
2162     const char *next_next = p + 1 < pend ? p + 1 : NULL;
2163    
2164     return
2165     /* Before a subexpression? */
2166     (syntax & RE_NO_BK_PARENS ? *next == ')'
2167     : next_backslash && next_next && *next_next == ')')
2168     /* Before an alternative? */
2169     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2170     : next_backslash && next_next && *next_next == '|');
2171     }
2172    
2173    
2174     /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2175     false if it's not. */
2176    
2177     static boolean
2178     group_in_compile_stack (compile_stack, regnum)
2179     compile_stack_type compile_stack;
2180     regnum_t regnum;
2181     {
2182     int this_element;
2183    
2184     for (this_element = compile_stack.avail - 1;
2185     this_element >= 0;
2186     this_element--)
2187     if (compile_stack.stack[this_element].regnum == regnum)
2188     return true;
2189    
2190     return false;
2191     }
2192    
2193    
2194     /* Read the ending character of a range (in a bracket expression) from the
2195     uncompiled pattern *P_PTR (which ends at PEND). We assume the
2196     starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
2197     Then we set the translation of all bits between the starting and
2198     ending characters (inclusive) in the compiled pattern B.
2199    
2200     Return an error code.
2201    
2202     We use these short variable names so we can use the same macros as
2203     `regex_compile' itself. */
2204    
2205     static reg_errcode_t
2206     compile_range (p_ptr, pend, translate, syntax, b)
2207     const char **p_ptr, *pend;
2208     char *translate;
2209     reg_syntax_t syntax;
2210     unsigned char *b;
2211     {
2212     unsigned this_char;
2213    
2214     const char *p = *p_ptr;
2215     int range_start, range_end;
2216    
2217     if (p == pend)
2218     return REG_ERANGE;
2219    
2220     /* Even though the pattern is a signed `char *', we need to fetch
2221     with unsigned char *'s; if the high bit of the pattern character
2222     is set, the range endpoints will be negative if we fetch using a
2223     signed char *.
2224    
2225     We also want to fetch the endpoints without translating them; the
2226     appropriate translation is done in the bit-setting loop below. */
2227     range_start = ((unsigned char *) p)[-2];
2228     range_end = ((unsigned char *) p)[0];
2229    
2230     /* Have to increment the pointer into the pattern string, so the
2231     caller isn't still at the ending character. */
2232     (*p_ptr)++;
2233    
2234     /* If the start is after the end, the range is empty. */
2235     if (range_start > range_end)
2236     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2237    
2238     /* Here we see why `this_char' has to be larger than an `unsigned
2239     char' -- the range is inclusive, so if `range_end' == 0xff
2240     (assuming 8-bit characters), we would otherwise go into an infinite
2241     loop, since all characters <= 0xff. */
2242     for (this_char = range_start; this_char <= range_end; this_char++)
2243     {
2244     SET_LIST_BIT (TRANSLATE (this_char));
2245     }
2246    
2247     return REG_NOERROR;
2248     }
2249    
2250     /* Failure stack declarations and macros; both re_compile_fastmap and
2251     re_match_2 use a failure stack. These have to be macros because of
2252     REGEX_ALLOCATE. */
2253    
2254    
2255     /* Number of failure points for which to initially allocate space
2256     when matching. If this number is exceeded, we allocate more
2257     space, so it is not a hard limit. */
2258     #ifndef INIT_FAILURE_ALLOC
2259     #define INIT_FAILURE_ALLOC 5
2260     #endif
2261    
2262     /* Roughly the maximum number of failure points on the stack. Would be
2263     exactly that if always used MAX_FAILURE_SPACE each time we failed.
2264     This is a variable only so users of regex can assign to it; we never
2265     change it ourselves. */
2266     int re_max_failures = 2000;
2267    
2268     typedef const unsigned char *fail_stack_elt_t;
2269    
2270     typedef struct
2271     {
2272     fail_stack_elt_t *stack;
2273     unsigned size;
2274     unsigned avail; /* Offset of next open position. */
2275     } fail_stack_type;
2276    
2277     #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
2278     #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2279     #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
2280     #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
2281    
2282    
2283     /* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
2284    
2285     #define INIT_FAIL_STACK() \
2286     do { \
2287     fail_stack.stack = (fail_stack_elt_t *) \
2288     REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
2289     \
2290     if (fail_stack.stack == NULL) \
2291     return -2; \
2292     \
2293     fail_stack.size = INIT_FAILURE_ALLOC; \
2294     fail_stack.avail = 0; \
2295     } while (0)
2296    
2297    
2298     /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2299    
2300     Return 1 if succeeds, and 0 if either ran out of memory
2301     allocating space for it or it was already too large.
2302    
2303     REGEX_REALLOCATE requires `destination' be declared. */
2304    
2305     #define DOUBLE_FAIL_STACK(fail_stack) \
2306     ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
2307     ? 0 \
2308     : ((fail_stack).stack = (fail_stack_elt_t *) \
2309     REGEX_REALLOCATE ((fail_stack).stack, \
2310     (fail_stack).size * sizeof (fail_stack_elt_t), \
2311     ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
2312     \
2313     (fail_stack).stack == NULL \
2314     ? 0 \
2315     : ((fail_stack).size <<= 1, \
2316     1)))
2317    
2318    
2319     /* Push PATTERN_OP on FAIL_STACK.
2320    
2321     Return 1 if was able to do so and 0 if ran out of memory allocating
2322     space to do so. */
2323     #define PUSH_PATTERN_OP(pattern_op, fail_stack) \
2324     ((FAIL_STACK_FULL () \
2325     && !DOUBLE_FAIL_STACK (fail_stack)) \
2326     ? 0 \
2327     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
2328     1))
2329    
2330     /* This pushes an item onto the failure stack. Must be a four-byte
2331     value. Assumes the variable `fail_stack'. Probably should only
2332     be called from within `PUSH_FAILURE_POINT'. */
2333     #define PUSH_FAILURE_ITEM(item) \
2334     fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2335    
2336     /* The complement operation. Assumes `fail_stack' is nonempty. */
2337     #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2338    
2339     /* Used to omit pushing failure point id's when we're not debugging. */
2340     #ifdef DEBUG
2341     #define DEBUG_PUSH PUSH_FAILURE_ITEM
2342     #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2343     #else
2344     #define DEBUG_PUSH(item)
2345     #define DEBUG_POP(item_addr)
2346     #endif
2347    
2348    
2349     /* Push the information about the state we will need
2350     if we ever fail back to it.
2351    
2352     Requires variables fail_stack, regstart, regend, reg_info, and
2353     num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
2354     declared.
2355    
2356     Does `return FAILURE_CODE' if runs out of memory. */
2357    
2358     #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
2359     do { \
2360     char *destination; \
2361     /* Must be int, so when we don't save any registers, the arithmetic \
2362     of 0 + -1 isn't done as unsigned. */ \
2363     int this_reg; \
2364     \
2365     DEBUG_STATEMENT (failure_id++); \
2366     DEBUG_STATEMENT (nfailure_points_pushed++); \
2367     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
2368     DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
2369     DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
2370     \
2371     DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
2372     DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
2373     \
2374     /* Ensure we have enough space allocated for what we will push. */ \
2375     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
2376     { \
2377     if (!DOUBLE_FAIL_STACK (fail_stack)) \
2378     return failure_code; \
2379     \
2380     DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
2381     (fail_stack).size); \
2382     DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2383     } \
2384     \
2385     /* Push the info, starting with the registers. */ \
2386     DEBUG_PRINT1 ("\n"); \
2387     \
2388     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
2389     this_reg++) \
2390     { \
2391     DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
2392     DEBUG_STATEMENT (num_regs_pushed++); \
2393     \
2394     DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2395     PUSH_FAILURE_ITEM (regstart[this_reg]); \
2396     \
2397     DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2398     PUSH_FAILURE_ITEM (regend[this_reg]); \
2399     \
2400     DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
2401     DEBUG_PRINT2 (" match_null=%d", \
2402     REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
2403     DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
2404     DEBUG_PRINT2 (" matched_something=%d", \
2405     MATCHED_SOMETHING (reg_info[this_reg])); \
2406     DEBUG_PRINT2 (" ever_matched=%d", \
2407     EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
2408     DEBUG_PRINT1 ("\n"); \
2409     PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
2410     } \
2411     \
2412     DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
2413     PUSH_FAILURE_ITEM (lowest_active_reg); \
2414     \
2415     DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
2416     PUSH_FAILURE_ITEM (highest_active_reg); \
2417     \
2418     DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
2419     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
2420     PUSH_FAILURE_ITEM (pattern_place); \
2421     \
2422     DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
2423     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
2424     size2); \
2425     DEBUG_PRINT1 ("'\n"); \
2426     PUSH_FAILURE_ITEM (string_place); \
2427     \
2428     DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
2429     DEBUG_PUSH (failure_id); \
2430     } while (0)
2431    
2432     /* This is the number of items that are pushed and popped on the stack
2433     for each register. */
2434     #define NUM_REG_ITEMS 3
2435    
2436     /* Individual items aside from the registers. */
2437     #ifdef DEBUG
2438     #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
2439     #else
2440     #define NUM_NONREG_ITEMS 4
2441     #endif
2442    
2443     /* We push at most this many items on the stack. */
2444     #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2445    
2446     /* We actually push this many items. */
2447     #define NUM_FAILURE_ITEMS \
2448     ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
2449     + NUM_NONREG_ITEMS)
2450    
2451     /* How many items can still be added to the stack without overflowing it. */
2452     #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2453    
2454    
2455     /* Pops what PUSH_FAIL_STACK pushes.
2456    
2457     We restore into the parameters, all of which should be lvalues:
2458     STR -- the saved data position.
2459     PAT -- the saved pattern position.
2460     LOW_REG, HIGH_REG -- the highest and lowest active registers.
2461     REGSTART, REGEND -- arrays of string positions.
2462     REG_INFO -- array of information about each subexpression.
2463    
2464     Also assumes the variables `fail_stack' and (if debugging), `bufp',
2465     `pend', `string1', `size1', `string2', and `size2'. */
2466    
2467     #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2468     { \
2469     DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
2470     int this_reg; \
2471     const unsigned char *string_temp; \
2472     \
2473     assert (!FAIL_STACK_EMPTY ()); \
2474     \
2475     /* Remove failure points and point to how many regs pushed. */ \
2476     DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
2477     DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
2478     DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
2479     \
2480     assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
2481     \
2482     DEBUG_POP (&failure_id); \
2483     DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
2484     \
2485     /* If the saved string location is NULL, it came from an \
2486     on_failure_keep_string_jump opcode, and we want to throw away the \
2487     saved NULL, thus retaining our current position in the string. */ \
2488     string_temp = POP_FAILURE_ITEM (); \
2489     if (string_temp != NULL) \
2490     str = (const char *) string_temp; \
2491     \
2492     DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
2493     DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
2494     DEBUG_PRINT1 ("'\n"); \
2495     \
2496     pat = (unsigned char *) POP_FAILURE_ITEM (); \
2497     DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
2498     DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
2499     \
2500     /* Restore register info. */ \
2501     high_reg = (unsigned) POP_FAILURE_ITEM (); \
2502     DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
2503     \
2504     low_reg = (unsigned) POP_FAILURE_ITEM (); \
2505     DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
2506     \
2507     for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
2508     { \
2509     DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
2510     \
2511     reg_info[this_reg].word = POP_FAILURE_ITEM (); \
2512     DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
2513     \
2514     regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2515     DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2516     \
2517     regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2518     DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2519     } \
2520     \
2521     DEBUG_STATEMENT (nfailure_points_popped++); \
2522     } /* POP_FAILURE_POINT */
2523    
2524     /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2525     BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
2526     characters can start a string that matches the pattern. This fastmap
2527     is used by re_search to skip quickly over impossible starting points.
2528    
2529     The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2530     area as BUFP->fastmap.
2531    
2532     We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2533     the pattern buffer.
2534    
2535     Returns 0 if we succeed, -2 if an internal error. */
2536    
2537     int
2538     re_compile_fastmap (bufp)
2539     struct re_pattern_buffer *bufp;
2540     {
2541     int j, k;
2542     fail_stack_type fail_stack;
2543     #ifndef REGEX_MALLOC
2544     char *destination;
2545     #endif
2546     /* We don't push any register information onto the failure stack. */
2547     unsigned num_regs = 0;
2548    
2549     register char *fastmap = bufp->fastmap;
2550     unsigned char *pattern = bufp->buffer;
2551     unsigned long size = bufp->used;
2552     const unsigned char *p = pattern;
2553     register unsigned char *pend = pattern + size;
2554    
2555     /* Assume that each path through the pattern can be null until
2556     proven otherwise. We set this false at the bottom of switch
2557     statement, to which we get only if a particular path doesn't
2558     match the empty string. */
2559     boolean path_can_be_null = true;
2560    
2561     /* We aren't doing a `succeed_n' to begin with. */
2562     boolean succeed_n_p = false;
2563    
2564     assert (fastmap != NULL && p != NULL);
2565    
2566     INIT_FAIL_STACK ();
2567     bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
2568     bufp->fastmap_accurate = 1; /* It will be when we're done. */
2569     bufp->can_be_null = 0;
2570    
2571     while (p != pend || !FAIL_STACK_EMPTY ())
2572     {
2573     if (p == pend)
2574     {
2575     bufp->can_be_null |= path_can_be_null;
2576    
2577     /* Reset for next path. */
2578     path_can_be_null = true;
2579    
2580     p = fail_stack.stack[--fail_stack.avail];
2581     }
2582    
2583     /* We should never be about to go beyond the end of the pattern. */
2584     assert (p < pend);
2585    
2586     #ifdef SWITCH_ENUM_BUG
2587     switch ((int) ((re_opcode_t) *p++))
2588     #else
2589     switch ((re_opcode_t) *p++)
2590     #endif
2591     {
2592    
2593     /* I guess the idea here is to simply not bother with a fastmap
2594     if a backreference is used, since it's too hard to figure out
2595     the fastmap for the corresponding group. Setting
2596     `can_be_null' stops `re_search_2' from using the fastmap, so
2597     that is all we do. */
2598     case duplicate:
2599     bufp->can_be_null = 1;
2600     return 0;
2601    
2602    
2603     /* Following are the cases which match a character. These end
2604     with `break'. */
2605    
2606     case exactn:
2607     fastmap[p[1]] = 1;
2608     break;
2609    
2610    
2611     case charset:
2612     for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2613     if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2614     fastmap[j] = 1;
2615     break;
2616    
2617    
2618     case charset_not:
2619     /* Chars beyond end of map must be allowed. */
2620     for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2621     fastmap[j] = 1;
2622    
2623     for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2624     if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2625     fastmap[j] = 1;
2626     break;
2627    
2628    
2629     case wordchar:
2630     for (j = 0; j < (1 << BYTEWIDTH); j++)
2631     if (SYNTAX (j) == Sword)
2632     fastmap[j] = 1;
2633     break;
2634    
2635    
2636     case notwordchar:
2637     for (j = 0; j < (1 << BYTEWIDTH); j++)
2638     if (SYNTAX (j) != Sword)
2639     fastmap[j] = 1;
2640     break;
2641    
2642    
2643     case anychar:
2644     /* `.' matches anything ... */
2645     for (j = 0; j < (1 << BYTEWIDTH); j++)
2646     fastmap[j] = 1;
2647    
2648     /* ... except perhaps newline. */
2649     if (!(bufp->syntax & RE_DOT_NEWLINE))
2650     fastmap['\n'] = 0;
2651    
2652     /* Return if we have already set `can_be_null'; if we have,
2653     then the fastmap is irrelevant. Something's wrong here. */
2654     else if (bufp->can_be_null)
2655     return 0;
2656    
2657     /* Otherwise, have to check alternative paths. */
2658     break;
2659    
2660    
2661     #ifdef emacs
2662     case syntaxspec:
2663     k = *p++;
2664     for (j = 0; j < (1 << BYTEWIDTH); j++)
2665     if (SYNTAX (j) == (enum syntaxcode) k)
2666     fastmap[j] = 1;
2667     break;
2668    
2669    
2670     case notsyntaxspec:
2671     k = *p++;
2672     for (j = 0; j < (1 << BYTEWIDTH); j++)
2673     if (SYNTAX (j) != (enum syntaxcode) k)
2674     fastmap[j] = 1;
2675     break;
2676    
2677    
2678     /* All cases after this match the empty string. These end with
2679     `continue'. */
2680    
2681    
2682     case before_dot:
2683     case at_dot:
2684     case after_dot:
2685     continue;
2686     #endif /* not emacs */
2687    
2688    
2689     case no_op:
2690     case begline:
2691     case endline:
2692     case begbuf:
2693     case endbuf:
2694     case wordbound:
2695     case notwordbound:
2696     case wordbeg:
2697     case wordend:
2698     case push_dummy_failure:
2699     continue;
2700    
2701    
2702     case jump_n:
2703     case pop_failure_jump:
2704     case maybe_pop_jump:
2705     case jump:
2706     case jump_past_alt:
2707     case dummy_failure_jump:
2708     EXTRACT_NUMBER_AND_INCR (j, p);
2709     p += j;
2710     if (j > 0)
2711     continue;
2712    
2713     /* Jump backward implies we just went through the body of a
2714     loop and matched nothing. Opcode jumped to should be
2715     `on_failure_jump' or `succeed_n'. Just treat it like an
2716     ordinary jump. For a * loop, it has pushed its failure
2717     point already; if so, discard that as redundant. */
2718     if ((re_opcode_t) *p != on_failure_jump
2719     && (re_opcode_t) *p != succeed_n)
2720     continue;
2721    
2722     p++;
2723     EXTRACT_NUMBER_AND_INCR (j, p);
2724     p += j;
2725    
2726     /* If what's on the stack is where we are now, pop it. */
2727     if (!FAIL_STACK_EMPTY ()
2728     && fail_stack.stack[fail_stack.avail - 1] == p)
2729     fail_stack.avail--;
2730    
2731     continue;
2732    
2733    
2734     case on_failure_jump:
2735     case on_failure_keep_string_jump:
2736     handle_on_failure_jump:
2737     EXTRACT_NUMBER_AND_INCR (j, p);
2738    
2739     /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2740     end of the pattern. We don't want to push such a point,
2741     since when we restore it above, entering the switch will
2742     increment `p' past the end of the pattern. We don't need
2743     to push such a point since we obviously won't find any more
2744     fastmap entries beyond `pend'. Such a pattern can match
2745     the null string, though. */
2746     if (p + j < pend)
2747     {
2748     if (!PUSH_PATTERN_OP (p + j, fail_stack))
2749     return -2;
2750     }
2751     else
2752     bufp->can_be_null = 1;
2753    
2754     if (succeed_n_p)
2755     {
2756     EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
2757     succeed_n_p = false;
2758     }
2759    
2760     continue;
2761    
2762    
2763     case succeed_n:
2764     /* Get to the number of times to succeed. */
2765     p += 2;
2766    
2767     /* Increment p past the n for when k != 0. */
2768     EXTRACT_NUMBER_AND_INCR (k, p);
2769     if (k == 0)
2770     {
2771     p -= 4;
2772     succeed_n_p = true; /* Spaghetti code alert. */
2773     goto handle_on_failure_jump;
2774     }
2775     continue;
2776    
2777    
2778     case set_number_at:
2779     p += 4;
2780     continue;
2781    
2782    
2783     case start_memory:
2784     case stop_memory:
2785     p += 2;
2786     continue;
2787    
2788    
2789     default:
2790     abort (); /* We have listed all the cases. */
2791     } /* switch *p++ */
2792    
2793     /* Getting here means we have found the possible starting
2794     characters for one path of the pattern -- and that the empty
2795     string does not match. We need not follow this path further.
2796     Instead, look at the next alternative (remembered on the
2797     stack), or quit if no more. The test at the top of the loop
2798     does these things. */
2799     path_can_be_null = false;
2800     p = pend;
2801     } /* while p */
2802    
2803     /* Set `can_be_null' for the last path (also the first path, if the
2804     pattern is empty). */
2805     bufp->can_be_null |= path_can_be_null;
2806     return 0;
2807     } /* re_compile_fastmap */
2808    
2809     /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2810     ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
2811     this memory for recording register information. STARTS and ENDS
2812     must be allocated using the malloc library routine, and must each
2813     be at least NUM_REGS * sizeof (regoff_t) bytes long.
2814    
2815     If NUM_REGS == 0, then subsequent matches should allocate their own
2816     register data.
2817    
2818     Unless this function is called, the first search or match using
2819     PATTERN_BUFFER will allocate its own register data, without
2820     freeing the old data. */
2821    
2822     void
2823     re_set_registers (bufp, regs, num_regs, starts, ends)
2824     struct re_pattern_buffer *bufp;
2825     struct re_registers *regs;
2826     unsigned num_regs;
2827     regoff_t *starts, *ends;
2828     {
2829     if (num_regs)
2830     {
2831     bufp->regs_allocated = REGS_REALLOCATE;
2832     regs->num_regs = num_regs;
2833     regs->start = starts;
2834     regs->end = ends;
2835     }
2836     else
2837     {
2838     bufp->regs_allocated = REGS_UNALLOCATED;
2839     regs->num_regs = 0;
2840     regs->start = regs->end = (regoff_t) 0;
2841     }
2842     }
2843    
2844     /* Searching routines. */
2845    
2846     /* Like re_search_2, below, but only one string is specified, and
2847     doesn't let you say where to stop matching. */
2848    
2849     int
2850     re_search (bufp, string, size, startpos, range, regs)
2851     struct re_pattern_buffer *bufp;
2852     const char *string;
2853     int size, startpos, range;
2854     struct re_registers *regs;
2855     {
2856     return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2857     regs, size);
2858     }
2859    
2860    
2861     /* Using the compiled pattern in BUFP->buffer, first tries to match the
2862     virtual concatenation of STRING1 and STRING2, starting first at index
2863     STARTPOS, then at STARTPOS + 1, and so on.
2864    
2865     STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2866    
2867     RANGE is how far to scan while trying to match. RANGE = 0 means try
2868     only at STARTPOS; in general, the last start tried is STARTPOS +
2869     RANGE.
2870    
2871     In REGS, return the indices of the virtual concatenation of STRING1
2872     and STRING2 that matched the entire BUFP->buffer and its contained
2873     subexpressions.
2874    
2875     Do not consider matching one past the index STOP in the virtual
2876     concatenation of STRING1 and STRING2.
2877    
2878     We return either the position in the strings at which the match was
2879     found, -1 if no match, or -2 if error (such as failure
2880     stack overflow). */
2881    
2882     int
2883     re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2884     struct re_pattern_buffer *bufp;
2885     const char *string1, *string2;
2886     int size1, size2;
2887     int startpos;
2888     int range;
2889     struct re_registers *regs;
2890     int stop;
2891     {
2892     int val;
2893     register char *fastmap = bufp->fastmap;
2894     register char *translate = bufp->translate;
2895     int total_size = size1 + size2;
2896     int endpos = startpos + range;
2897    
2898     /* Check for out-of-range STARTPOS. */
2899     if (startpos < 0 || startpos > total_size)
2900     return -1;
2901    
2902     /* Fix up RANGE if it might eventually take us outside
2903     the virtual concatenation of STRING1 and STRING2. */
2904     if (endpos < -1)
2905     range = -1 - startpos;
2906     else if (endpos > total_size)
2907     range = total_size - startpos;
2908    
2909     /* If the search isn't to be a backwards one, don't waste time in a
2910     search for a pattern that must be anchored. */
2911     if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2912     {
2913     if (startpos > 0)
2914     return -1;
2915     else
2916     range = 1;
2917     }
2918    
2919     /* Update the fastmap now if not correct already. */
2920     if (fastmap && !bufp->fastmap_accurate)
2921     if (re_compile_fastmap (bufp) == -2)
2922     return -2;
2923    
2924     /* Loop through the string, looking for a place to start matching. */
2925     for (;;)
2926     {
2927     /* If a fastmap is supplied, skip quickly over characters that
2928     cannot be the start of a match. If the pattern can match the
2929     null string, however, we don't need to skip characters; we want
2930     the first null string. */
2931     if (fastmap && startpos < total_size && !bufp->can_be_null)
2932     {
2933     if (range > 0) /* Searching forwards. */
2934     {
2935     register const char *d;
2936     register int lim = 0;
2937     int irange = range;
2938    
2939     if (startpos < size1 && startpos + range >= size1)
2940     lim = range - (size1 - startpos);
2941    
2942     d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2943    
2944     /* Written out as an if-else to avoid testing `translate'
2945     inside the loop. */
2946     if (translate)
2947     while (range > lim
2948     && !fastmap[(unsigned char)
2949     translate[(unsigned char) *d++]])
2950     range--;
2951     else
2952     while (range > lim && !fastmap[(unsigned char) *d++])
2953     range--;
2954    
2955     startpos += irange - range;
2956     }
2957     else /* Searching backwards. */
2958     {
2959     register char c = (size1 == 0 || startpos >= size1
2960     ? string2[startpos - size1]
2961     : string1[startpos]);
2962    
2963     if (!fastmap[(unsigned char) TRANSLATE (c)])
2964     goto advance;
2965     }
2966     }
2967    
2968     /* If can't match the null string, and that's all we have left, fail. */
2969     if (range >= 0 && startpos == total_size && fastmap
2970     && !bufp->can_be_null)
2971     return -1;
2972    
2973     val = re_match_2 (bufp, string1, size1, string2, size2,
2974     startpos, regs, stop);
2975     if (val >= 0)
2976     return startpos;
2977    
2978     if (val == -2)
2979     return -2;
2980    
2981     advance:
2982     if (!range)
2983     break;
2984     else if (range > 0)
2985     {
2986     range--;
2987     startpos++;
2988     }
2989     else
2990     {
2991     range++;
2992     startpos--;
2993     }
2994     }
2995     return -1;
2996     } /* re_search_2 */
2997    
2998     /* Declarations and macros for re_match_2. */
2999    
3000     static int bcmp_translate ();
3001     static boolean alt_match_null_string_p (),
3002     common_op_match_null_string_p (),
3003     group_match_null_string_p ();
3004    
3005     /* Structure for per-register (a.k.a. per-group) information.
3006     This must not be longer than one word, because we push this value
3007     onto the failure stack. Other register information, such as the
3008     starting and ending positions (which are addresses), and the list of
3009     inner groups (which is a bits list) are maintained in separate
3010     variables.
3011    
3012     We are making a (strictly speaking) nonportable assumption here: that
3013     the compiler will pack our bit fields into something that fits into
3014     the type of `word', i.e., is something that fits into one item on the
3015     failure stack. */
3016     typedef union
3017     {
3018     fail_stack_elt_t word;
3019     struct
3020     {
3021     /* This field is one if this group can match the empty string,
3022     zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
3023     #define MATCH_NULL_UNSET_VALUE 3
3024     unsigned match_null_string_p : 2;
3025     unsigned is_active : 1;
3026     unsigned matched_something : 1;
3027     unsigned ever_matched_something : 1;
3028     } bits;
3029     } register_info_type;
3030    
3031     #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
3032     #define IS_ACTIVE(R) ((R).bits.is_active)
3033     #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
3034     #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
3035    
3036    
3037     /* Call this when have matched a real character; it sets `matched' flags
3038     for the subexpressions which we are currently inside. Also records
3039     that those subexprs have matched. */
3040     #define SET_REGS_MATCHED() \
3041     do \
3042     { \
3043     unsigned r; \
3044     for (r = lowest_active_reg; r <= highest_active_reg; r++) \
3045     { \
3046     MATCHED_SOMETHING (reg_info[r]) \
3047     = EVER_MATCHED_SOMETHING (reg_info[r]) \
3048     = 1; \
3049     } \
3050     } \
3051     while (0)
3052    
3053    
3054     /* This converts PTR, a pointer into one of the search strings `string1'
3055     and `string2' into an offset from the beginning of that string. */
3056     #define POINTER_TO_OFFSET(ptr) \
3057     (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3058    
3059     /* Registers are set to a sentinel when they haven't yet matched. */
3060     #define REG_UNSET_VALUE ((char *) -1)
3061     #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3062    
3063    
3064     /* Macros for dealing with the split strings in re_match_2. */
3065    
3066     #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3067    
3068     /* Call before fetching a character with *d. This switches over to
3069     string2 if necessary. */
3070     #define PREFETCH() \
3071     while (d == dend) \
3072     { \
3073     /* End of string2 => fail. */ \
3074     if (dend == end_match_2) \
3075     goto fail; \
3076     /* End of string1 => advance to string2. */ \
3077     d = string2; \
3078     dend = end_match_2; \
3079     }
3080    
3081    
3082     /* Test if at very beginning or at very end of the virtual concatenation
3083     of `string1' and `string2'. If only one string, it's `string2'. */
3084     #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3085     #define AT_STRINGS_END(d) ((d) == end2)
3086    
3087    
3088     /* Test if D points to a character which is word-constituent. We have
3089     two special cases to check for: if past the end of string1, look at
3090     the first character in string2; and if before the beginning of
3091     string2, look at the last character in string1. */
3092     #define WORDCHAR_P(d) \
3093     (SYNTAX ((d) == end1 ? *string2 \
3094     : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3095     == Sword)
3096    
3097     /* Test if the character before D and the one at D differ with respect
3098     to being word-constituent. */
3099     #define AT_WORD_BOUNDARY(d) \
3100     (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3101     || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3102    
3103    
3104     /* Free everything we malloc. */
3105     #ifdef REGEX_MALLOC
3106     #define FREE_VAR(var) if (var) free (var); var = NULL
3107     #define FREE_VARIABLES() \
3108     do { \
3109     FREE_VAR (fail_stack.stack); \
3110     FREE_VAR (regstart); \
3111     FREE_VAR (regend); \
3112     FREE_VAR (old_regstart); \
3113     FREE_VAR (old_regend); \
3114     FREE_VAR (best_regstart); \
3115     FREE_VAR (best_regend); \
3116     FREE_VAR (reg_info); \
3117     FREE_VAR (reg_dummy); \
3118     FREE_VAR (reg_info_dummy); \
3119     } while (0)
3120     #else /* not REGEX_MALLOC */
3121     /* Some MIPS systems (at least) want this to free alloca'd storage. */
3122     #define FREE_VARIABLES() alloca (0)
3123     #endif /* not REGEX_MALLOC */
3124    
3125    
3126     /* These values must meet several constraints. They must not be valid
3127     register values; since we have a limit of 255 registers (because
3128     we use only one byte in the pattern for the register number), we can
3129     use numbers larger than 255. They must differ by 1, because of
3130     NUM_FAILURE_ITEMS above. And the value for the lowest register must
3131     be larger than the value for the highest register, so we do not try
3132     to actually save any registers when none are active. */
3133     #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3134     #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3135    
3136     /* Matching routines. */
3137    
3138     #ifndef emacs /* Emacs never uses this. */
3139     /* re_match is like re_match_2 except it takes only a single string. */
3140    
3141     int
3142     re_match (bufp, string, size, pos, regs)
3143     struct re_pattern_buffer *bufp;
3144     const char *string;
3145     int size, pos;
3146     struct re_registers *regs;
3147     {
3148     return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3149     }
3150     #endif /* not emacs */
3151    
3152    
3153     /* re_match_2 matches the compiled pattern in BUFP against the
3154     the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3155     and SIZE2, respectively). We start matching at POS, and stop
3156     matching at STOP.
3157    
3158     If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3159     store offsets for the substring each group matched in REGS. See the
3160     documentation for exactly how many groups we fill.
3161    
3162     We return -1 if no match, -2 if an internal error (such as the
3163     failure stack overflowing). Otherwise, we return the length of the
3164     matched substring. */
3165    
3166     int
3167     re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3168     struct re_pattern_buffer *bufp;
3169     const char *string1, *string2;
3170     int size1, size2;
3171     int pos;
3172     struct re_registers *regs;
3173     int stop;
3174     {
3175     /* General temporaries. */
3176     int mcnt;
3177     unsigned char *p1;
3178    
3179     /* Just past the end of the corresponding string. */
3180     const char *end1, *end2;
3181    
3182     /* Pointers into string1 and string2, just past the last characters in
3183     each to consider matching. */
3184     const char *end_match_1, *end_match_2;
3185    
3186     /* Where we are in the data, and the end of the current string. */
3187     const char *d, *dend;
3188    
3189     /* Where we are in the pattern, and the end of the pattern. */
3190     unsigned char *p = bufp->buffer;
3191     register unsigned char *pend = p + bufp->used;
3192    
3193     /* We use this to map every character in the string. */
3194     char *translate = bufp->translate;
3195    
3196     /* Failure point stack. Each place that can handle a failure further
3197     down the line pushes a failure point on this stack. It consists of
3198     restart, regend, and reg_info for all registers corresponding to
3199     the subexpressions we're currently inside, plus the number of such
3200     registers, and, finally, two char *'s. The first char * is where
3201     to resume scanning the pattern; the second one is where to resume
3202     scanning the strings. If the latter is zero, the failure point is
3203     a ``dummy''; if a failure happens and the failure point is a dummy,
3204     it gets discarded and the next next one is tried. */
3205     fail_stack_type fail_stack;
3206     #ifdef DEBUG
3207     static unsigned failure_id = 0;
3208     unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3209     #endif
3210    
3211     /* We fill all the registers internally, independent of what we
3212     return, for use in backreferences. The number here includes
3213     an element for register zero. */
3214     unsigned num_regs = bufp->re_nsub + 1;
3215    
3216     /* The currently active registers. */
3217     unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3218     unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3219    
3220     /* Information on the contents of registers. These are pointers into
3221     the input strings; they record just what was matched (on this
3222     attempt) by a subexpression part of the pattern, that is, the
3223     regnum-th regstart pointer points to where in the pattern we began
3224     matching and the regnum-th regend points to right after where we
3225     stopped matching the regnum-th subexpression. (The zeroth register
3226     keeps track of what the whole pattern matches.) */
3227     const char **regstart, **regend;
3228    
3229     /* If a group that's operated upon by a repetition operator fails to
3230     match anything, then the register for its start will need to be
3231     restored because it will have been set to wherever in the string we
3232     are when we last see its open-group operator. Similarly for a
3233     register's end. */
3234     const char **old_regstart, **old_regend;
3235    
3236     /* The is_active field of reg_info helps us keep track of which (possibly
3237     nested) subexpressions we are currently in. The matched_something
3238     field of reg_info[reg_num] helps us tell whether or not we have
3239     matched any of the pattern so far this time through the reg_num-th
3240     subexpression. These two fields get reset each time through any
3241     loop their register is in. */
3242     register_info_type *reg_info;
3243    
3244     /* The following record the register info as found in the above
3245     variables when we find a match better than any we've seen before.
3246     This happens as we backtrack through the failure points, which in
3247     turn happens only if we have not yet matched the entire string. */
3248     unsigned best_regs_set = false;
3249     const char **best_regstart, **best_regend;
3250    
3251     /* Logically, this is `best_regend[0]'. But we don't want to have to
3252     allocate space for that if we're not allocating space for anything
3253     else (see below). Also, we never need info about register 0 for
3254     any of the other register vectors, and it seems rather a kludge to
3255     treat `best_regend' differently than the rest. So we keep track of
3256     the end of the best match so far in a separate variable. We
3257     initialize this to NULL so that when we backtrack the first time
3258     and need to test it, it's not garbage. */
3259     const char *match_end = NULL;
3260    
3261     /* Used when we pop values we don't care about. */
3262     const char **reg_dummy;
3263     register_info_type *reg_info_dummy;
3264    
3265     #ifdef DEBUG
3266     /* Counts the total number of registers pushed. */
3267     unsigned num_regs_pushed = 0;
3268     #endif
3269    
3270     DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3271    
3272     INIT_FAIL_STACK ();
3273    
3274     /* Do not bother to initialize all the register variables if there are
3275     no groups in the pattern, as it takes a fair amount of time. If
3276     there are groups, we include space for register 0 (the whole
3277     pattern), even though we never use it, since it simplifies the
3278     array indexing. We should fix this. */
3279     if (bufp->re_nsub)
3280     {
3281     regstart = REGEX_TALLOC (num_regs, const char *);
3282     regend = REGEX_TALLOC (num_regs, const char *);
3283     old_regstart = REGEX_TALLOC (num_regs, const char *);
3284     old_regend = REGEX_TALLOC (num_regs, const char *);
3285     best_regstart = REGEX_TALLOC (num_regs, const char *);
3286     best_regend = REGEX_TALLOC (num_regs, const char *);
3287     reg_info = REGEX_TALLOC (num_regs, register_info_type);
3288     reg_dummy = REGEX_TALLOC (num_regs, const char *);
3289     reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3290    
3291     if (!(regstart && regend && old_regstart && old_regend && reg_info
3292     && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3293     {
3294     FREE_VARIABLES ();
3295     return -2;
3296     }
3297     }
3298     #ifdef REGEX_MALLOC
3299     else
3300     {
3301     /* We must initialize all our variables to NULL, so that
3302     `FREE_VARIABLES' doesn't try to free them. */
3303     regstart = regend = old_regstart = old_regend = best_regstart
3304     = best_regend = reg_dummy = NULL;
3305     reg_info = reg_info_dummy = (register_info_type *) NULL;
3306     }
3307     #endif /* REGEX_MALLOC */
3308    
3309     /* The starting position is bogus. */
3310     if (pos < 0 || pos > size1 + size2)
3311     {
3312     FREE_VARIABLES ();
3313     return -1;
3314     }
3315    
3316     /* Initialize subexpression text positions to -1 to mark ones that no
3317     start_memory/stop_memory has been seen for. Also initialize the
3318     register information struct. */
3319     for (mcnt = 1; mcnt < num_regs; mcnt++)
3320     {
3321     regstart[mcnt] = regend[mcnt]
3322     = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3323    
3324     REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3325     IS_ACTIVE (reg_info[mcnt]) = 0;
3326     MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3327     EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3328     }
3329    
3330     /* We move `string1' into `string2' if the latter's empty -- but not if
3331     `string1' is null. */
3332     if (size2 == 0 && string1 != NULL)
3333     {
3334     string2 = string1;
3335     size2 = size1;
3336     string1 = 0;
3337     size1 = 0;
3338     }
3339     end1 = string1 + size1;
3340     end2 = string2 + size2;
3341    
3342     /* Compute where to stop matching, within the two strings. */
3343     if (stop <= size1)
3344     {
3345     end_match_1 = string1 + stop;
3346     end_match_2 = string2;
3347     }
3348     else
3349     {
3350     end_match_1 = end1;
3351     end_match_2 = string2 + stop - size1;
3352     }
3353    
3354     /* `p' scans through the pattern as `d' scans through the data.
3355     `dend' is the end of the input string that `d' points within. `d'
3356     is advanced into the following input string whenever necessary, but
3357     this happens before fetching; therefore, at the beginning of the
3358     loop, `d' can be pointing at the end of a string, but it cannot
3359     equal `string2'. */
3360     if (size1 > 0 && pos <= size1)
3361     {
3362     d = string1 + pos;
3363     dend = end_match_1;
3364     }
3365     else
3366     {
3367     d = string2 + pos - size1;
3368     dend = end_match_2;
3369     }
3370    
3371     DEBUG_PRINT1 ("The compiled pattern is: ");
3372     DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3373     DEBUG_PRINT1 ("The string to match is: `");
3374     DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3375     DEBUG_PRINT1 ("'\n");
3376    
3377     /* This loops over pattern commands. It exits by returning from the
3378     function if the match is complete, or it drops through if the match
3379     fails at this starting point in the input data. */
3380     for (;;)
3381     {
3382     DEBUG_PRINT2 ("\n0x%x: ", p);
3383    
3384     if (p == pend)
3385     { /* End of pattern means we might have succeeded. */
3386     DEBUG_PRINT1 ("end of pattern ... ");
3387    
3388     /* If we haven't matched the entire string, and we want the
3389     longest match, try backtracking. */
3390     if (d != end_match_2)
3391     {
3392     DEBUG_PRINT1 ("backtracking.\n");
3393    
3394     if (!FAIL_STACK_EMPTY ())
3395     { /* More failure points to try. */
3396     boolean same_str_p = (FIRST_STRING_P (match_end)
3397     == MATCHING_IN_FIRST_STRING);
3398    
3399     /* If exceeds best match so far, save it. */
3400     if (!best_regs_set
3401     || (same_str_p && d > match_end)
3402     || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3403     {
3404     best_regs_set = true;
3405     match_end = d;
3406    
3407     DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3408    
3409     for (mcnt = 1; mcnt < num_regs; mcnt++)
3410     {
3411     best_regstart[mcnt] = regstart[mcnt];
3412     best_regend[mcnt] = regend[mcnt];
3413     }
3414     }
3415     goto fail;
3416     }
3417    
3418     /* If no failure points, don't restore garbage. */
3419     else if (best_regs_set)
3420     {
3421     restore_best_regs:
3422     /* Restore best match. It may happen that `dend ==
3423     end_match_1' while the restored d is in string2.
3424     For example, the pattern `x.*y.*z' against the
3425     strings `x-' and `y-z-', if the two strings are
3426     not consecutive in memory. */
3427     DEBUG_PRINT1 ("Restoring best registers.\n");
3428    
3429     d = match_end;
3430     dend = ((d >= string1 && d <= end1)
3431     ? end_match_1 : end_match_2);
3432    
3433     for (mcnt = 1; mcnt < num_regs; mcnt++)
3434     {
3435     regstart[mcnt] = best_regstart[mcnt];
3436     regend[mcnt] = best_regend[mcnt];
3437     }
3438     }
3439     } /* d != end_match_2 */
3440    
3441     DEBUG_PRINT1 ("Accepting match.\n");
3442    
3443     /* If caller wants register contents data back, do it. */
3444     if (regs && !bufp->no_sub)
3445     {
3446     /* Have the register data arrays been allocated? */
3447     if (bufp->regs_allocated == REGS_UNALLOCATED)
3448     { /* No. So allocate them with malloc. We need one
3449     extra element beyond `num_regs' for the `-1' marker
3450     GNU code uses. */
3451     regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3452     regs->start = TALLOC (regs->num_regs, regoff_t);
3453     regs->end = TALLOC (regs->num_regs, regoff_t);
3454     if (regs->start == NULL || regs->end == NULL)
3455     return -2;
3456     bufp->regs_allocated = REGS_REALLOCATE;
3457     }
3458     else if (bufp->regs_allocated == REGS_REALLOCATE)
3459     { /* Yes. If we need more elements than were already
3460     allocated, reallocate them. If we need fewer, just
3461     leave it alone. */
3462     if (regs->num_regs < num_regs + 1)
3463     {
3464     regs->num_regs = num_regs + 1;
3465     RETALLOC (regs->start, regs->num_regs, regoff_t);
3466     RETALLOC (regs->end, regs->num_regs, regoff_t);
3467     if (regs->start == NULL || regs->end == NULL)
3468     return -2;
3469     }
3470     }
3471     else
3472     assert (bufp->regs_allocated == REGS_FIXED);
3473    
3474     /* Convert the pointer data in `regstart' and `regend' to
3475     indices. Register zero has to be set differently,
3476     since we haven't kept track of any info for it. */
3477     if (regs->num_regs > 0)
3478     {
3479     regs->start[0] = pos;
3480     regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3481     : d - string2 + size1);
3482     }
3483    
3484     /* Go through the first `min (num_regs, regs->num_regs)'
3485     registers, since that is all we initialized. */
3486     for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3487     {
3488     if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3489     regs->start[mcnt] = regs->end[mcnt] = -1;
3490     else
3491     {
3492     regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3493     regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3494     }
3495     }
3496    
3497     /* If the regs structure we return has more elements than
3498     were in the pattern, set the extra elements to -1. If
3499     we (re)allocated the registers, this is the case,
3500     because we always allocate enough to have at least one
3501     -1 at the end. */
3502     for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3503     regs->start[mcnt] = regs->end[mcnt] = -1;
3504     } /* regs && !bufp->no_sub */
3505    
3506     FREE_VARIABLES ();
3507     DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3508     nfailure_points_pushed, nfailure_points_popped,
3509     nfailure_points_pushed - nfailure_points_popped);
3510     DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3511    
3512     mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3513     ? string1
3514     : string2 - size1);
3515    
3516     DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3517    
3518     return mcnt;
3519     }
3520    
3521     /* Otherwise match next pattern command. */
3522     #ifdef SWITCH_ENUM_BUG
3523     switch ((int) ((re_opcode_t) *p++))
3524     #else
3525     switch ((re_opcode_t) *p++)
3526     #endif
3527     {
3528     /* Ignore these. Used to ignore the n of succeed_n's which
3529     currently have n == 0. */
3530     case no_op:
3531     DEBUG_PRINT1 ("EXECUTING no_op.\n");
3532     break;
3533    
3534    
3535     /* Match the next n pattern characters exactly. The following
3536     byte in the pattern defines n, and the n bytes after that
3537     are the characters to match. */
3538     case exactn:
3539     mcnt = *p++;
3540     DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3541    
3542     /* This is written out as an if-else so we don't waste time
3543     testing `translate' inside the loop. */
3544     if (translate)
3545     {
3546     do
3547     {
3548     PREFETCH ();
3549     if (translate[(unsigned char) *d++] != (char) *p++)
3550     goto fail;
3551     }
3552     while (--mcnt);
3553     }
3554     else
3555     {
3556     do
3557     {
3558     PREFETCH ();
3559     if (*d++ != (char) *p++) goto fail;
3560     }
3561     while (--mcnt);
3562     }
3563     SET_REGS_MATCHED ();
3564     break;
3565    
3566    
3567     /* Match any character except possibly a newline or a null. */
3568     case anychar:
3569     DEBUG_PRINT1 ("EXECUTING anychar.\n");
3570    
3571     PREFETCH ();
3572    
3573     if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3574     || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3575     goto fail;
3576    
3577     SET_REGS_MATCHED ();
3578     DEBUG_PRINT2 (" Matched `%d'.\n", *d);
3579     d++;
3580     break;
3581    
3582    
3583     case charset:
3584     case charset_not:
3585     {
3586     register unsigned char c;
3587     boolean not = (re_opcode_t) *(p - 1) == charset_not;
3588    
3589     DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3590    
3591     PREFETCH ();
3592     c = TRANSLATE (*d); /* The character to match. */
3593    
3594     /* Cast to `unsigned' instead of `unsigned char' in case the
3595     bit list is a full 32 bytes long. */
3596     if (c < (unsigned) (*p * BYTEWIDTH)
3597     && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3598     not = !not;
3599    
3600     p += 1 + *p;
3601    
3602     if (!not) goto fail;
3603    
3604     SET_REGS_MATCHED ();
3605     d++;
3606     break;
3607     }
3608    
3609    
3610     /* The beginning of a group is represented by start_memory.
3611     The arguments are the register number in the next byte, and the
3612     number of groups inner to this one in the next. The text
3613     matched within the group is recorded (in the internal
3614     registers data structure) under the register number. */
3615     case start_memory:
3616     DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3617    
3618     /* Find out if this group can match the empty string. */
3619     p1 = p; /* To send to group_match_null_string_p. */
3620    
3621     if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3622     REG_MATCH_NULL_STRING_P (reg_info[*p])
3623     = group_match_null_string_p (&p1, pend, reg_info);
3624    
3625     /* Save the position in the string where we were the last time
3626     we were at this open-group operator in case the group is
3627     operated upon by a repetition operator, e.g., with `(a*)*b'
3628     against `ab'; then we want to ignore where we are now in
3629     the string in case this attempt to match fails. */
3630     old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3631     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3632     : regstart[*p];
3633     DEBUG_PRINT2 (" old_regstart: %d\n",
3634     POINTER_TO_OFFSET (old_regstart[*p]));
3635    
3636     regstart[*p] = d;
3637     DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3638    
3639     IS_ACTIVE (reg_info[*p]) = 1;
3640     MATCHED_SOMETHING (reg_info[*p]) = 0;
3641    
3642     /* This is the new highest active register. */
3643     highest_active_reg = *p;
3644    
3645     /* If nothing was active before, this is the new lowest active
3646     register. */
3647     if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3648     lowest_active_reg = *p;
3649    
3650     /* Move past the register number and inner group count. */
3651     p += 2;
3652     break;
3653    
3654    
3655     /* The stop_memory opcode represents the end of a group. Its
3656     arguments are the same as start_memory's: the register
3657     number, and the number of inner groups. */
3658     case stop_memory:
3659     DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3660    
3661     /* We need to save the string position the last time we were at
3662     this close-group operator in case the group is operated
3663     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3664     against `aba'; then we want to ignore where we are now in
3665     the string in case this attempt to match fails. */
3666     old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3667     ? REG_UNSET (regend[*p]) ? d : regend[*p]
3668     : regend[*p];
3669     DEBUG_PRINT2 (" old_regend: %d\n",
3670     POINTER_TO_OFFSET (old_regend[*p]));
3671    
3672     regend[*p] = d;
3673     DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3674    
3675     /* This register isn't active anymore. */
3676     IS_ACTIVE (reg_info[*p]) = 0;
3677    
3678     /* If this was the only register active, nothing is active
3679     anymore. */
3680     if (lowest_active_reg == highest_active_reg)
3681     {
3682     lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3683     highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3684     }
3685     else
3686     { /* We must scan for the new highest active register, since
3687     it isn't necessarily one less than now: consider
3688     (a(b)c(d(e)f)g). When group 3 ends, after the f), the
3689     new highest active register is 1. */
3690     unsigned char r = *p - 1;
3691     while (r > 0 && !IS_ACTIVE (reg_info[r]))
3692     r--;
3693    
3694     /* If we end up at register zero, that means that we saved
3695     the registers as the result of an `on_failure_jump', not
3696     a `start_memory', and we jumped to past the innermost
3697     `stop_memory'. For example, in ((.)*) we save
3698     registers 1 and 2 as a result of the *, but when we pop
3699     back to the second ), we are at the stop_memory 1.
3700     Thus, nothing is active. */
3701     if (r == 0)
3702     {
3703     lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3704     highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3705     }
3706     else
3707     highest_active_reg = r;
3708     }
3709    
3710     /* If just failed to match something this time around with a
3711     group that's operated on by a repetition operator, try to
3712     force exit from the ``loop'', and restore the register
3713     information for this group that we had before trying this
3714     last match. */
3715     if ((!MATCHED_SOMETHING (reg_info[*p])
3716     || (re_opcode_t) p[-3] == start_memory)
3717     && (p + 2) < pend)
3718     {
3719     boolean is_a_jump_n = false;
3720    
3721     p1 = p + 2;
3722     mcnt = 0;
3723     switch ((re_opcode_t) *p1++)
3724     {
3725     case jump_n:
3726     is_a_jump_n = true;
3727     case pop_failure_jump:
3728     case maybe_pop_jump:
3729     case jump:
3730     case dummy_failure_jump:
3731     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3732     if (is_a_jump_n)
3733     p1 += 2;
3734     break;
3735    
3736     default:
3737     /* do nothing */ ;
3738     }
3739     p1 += mcnt;
3740    
3741     /* If the next operation is a jump backwards in the pattern
3742     to an on_failure_jump right before the start_memory
3743     corresponding to this stop_memory, exit from the loop
3744     by forcing a failure after pushing on the stack the
3745     on_failure_jump's jump in the pattern, and d. */
3746     if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3747     && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3748     {
3749     /* If this group ever matched anything, then restore
3750     what its registers were before trying this last
3751     failed match, e.g., with `(a*)*b' against `ab' for
3752     regstart[1], and, e.g., with `((a*)*(b*)*)*'
3753     against `aba' for regend[3].
3754    
3755     Also restore the registers for inner groups for,
3756     e.g., `((a*)(b*))*' against `aba' (register 3 would
3757     otherwise get trashed). */
3758    
3759     if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3760     {
3761     unsigned r;
3762    
3763     EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3764    
3765     /* Restore this and inner groups' (if any) registers. */
3766     for (r = *p; r < *p + *(p + 1); r++)
3767     {
3768     regstart[r] = old_regstart[r];
3769    
3770     /* xx why this test? */
3771     if ((int) old_regend[r] >= (int) regstart[r])
3772     regend[r] = old_regend[r];
3773     }
3774     }
3775     p1++;
3776     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3777     PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3778    
3779     goto fail;
3780     }
3781     }
3782    
3783     /* Move past the register number and the inner group count. */
3784     p += 2;
3785     break;
3786    
3787    
3788     /* \<digit> has been turned into a `duplicate' command which is
3789     followed by the numeric value of <digit> as the register number. */
3790     case duplicate:
3791     {
3792     register const char *d2, *dend2;
3793     int regno = *p++; /* Get which register to match against. */
3794     DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3795    
3796     /* Can't back reference a group which we've never matched. */
3797     if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3798     goto fail;
3799    
3800     /* Where in input to try to start matching. */
3801     d2 = regstart[regno];
3802    
3803     /* Where to stop matching; if both the place to start and
3804     the place to stop matching are in the same string, then
3805     set to the place to stop, otherwise, for now have to use
3806     the end of the first string. */
3807    
3808     dend2 = ((FIRST_STRING_P (regstart[regno])
3809     == FIRST_STRING_P (regend[regno]))
3810     ? regend[regno] : end_match_1);
3811     for (;;)
3812     {
3813     /* If necessary, advance to next segment in register
3814     contents. */
3815     while (d2 == dend2)
3816     {
3817     if (dend2 == end_match_2) break;
3818     if (dend2 == regend[regno]) break;
3819    
3820     /* End of string1 => advance to string2. */
3821     d2 = string2;
3822     dend2 = regend[regno];
3823     }
3824     /* At end of register contents => success */
3825     if (d2 == dend2) break;
3826    
3827     /* If necessary, advance to next segment in data. */
3828     PREFETCH ();
3829    
3830     /* How many characters left in this segment to match. */
3831     mcnt = dend - d;
3832    
3833     /* Want how many consecutive characters we can match in
3834     one shot, so, if necessary, adjust the count. */
3835     if (mcnt > dend2 - d2)
3836     mcnt = dend2 - d2;
3837    
3838     /* Compare that many; failure if mismatch, else move
3839     past them. */
3840     if (translate
3841     ? bcmp_translate (d, d2, mcnt, translate)
3842     : bcmp (d, d2, mcnt))
3843     goto fail;
3844     d += mcnt, d2 += mcnt;
3845     }
3846     }
3847     break;
3848    
3849    
3850     /* begline matches the empty string at the beginning of the string
3851     (unless `not_bol' is set in `bufp'), and, if
3852     `newline_anchor' is set, after newlines. */
3853     case begline:
3854     DEBUG_PRINT1 ("EXECUTING begline.\n");
3855    
3856     if (AT_STRINGS_BEG (d))
3857     {
3858     if (!bufp->not_bol) break;
3859     }
3860     else if (d[-1] == '\n' && bufp->newline_anchor)
3861     {
3862     break;
3863     }
3864     /* In all other cases, we fail. */
3865     goto fail;
3866    
3867    
3868     /* endline is the dual of begline. */
3869     case endline:
3870     DEBUG_PRINT1 ("EXECUTING endline.\n");
3871    
3872     if (AT_STRINGS_END (d))
3873     {
3874     if (!bufp->not_eol) break;
3875     }
3876    
3877     /* We have to ``prefetch'' the next character. */
3878     else if ((d == end1 ? *string2 : *d) == '\n'
3879     && bufp->newline_anchor)
3880     {
3881     break;
3882     }
3883     goto fail;
3884    
3885    
3886     /* Match at the very beginning of the data. */
3887     case begbuf:
3888     DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3889     if (AT_STRINGS_BEG (d))
3890     break;
3891     goto fail;
3892    
3893    
3894     /* Match at the very end of the data. */
3895     case endbuf:
3896     DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3897     if (AT_STRINGS_END (d))
3898     break;
3899     goto fail;
3900    
3901    
3902     /* on_failure_keep_string_jump is used to optimize `.*\n'. It
3903     pushes NULL as the value for the string on the stack. Then
3904     `pop_failure_point' will keep the current value for the
3905     string, instead of restoring it. To see why, consider
3906     matching `foo\nbar' against `.*\n'. The .* matches the foo;
3907     then the . fails against the \n. But the next thing we want
3908     to do is match the \n against the \n; if we restored the
3909     string value, we would be back at the foo.
3910    
3911     Because this is used only in specific cases, we don't need to
3912     check all the things that `on_failure_jump' does, to make
3913     sure the right things get saved on the stack. Hence we don't
3914     share its code. The only reason to push anything on the
3915     stack at all is that otherwise we would have to change
3916     `anychar's code to do something besides goto fail in this
3917     case; that seems worse than this. */
3918     case on_failure_keep_string_jump:
3919     DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3920    
3921     EXTRACT_NUMBER_AND_INCR (mcnt, p);
3922     DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3923    
3924     PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3925     break;
3926    
3927    
3928     /* Uses of on_failure_jump:
3929    
3930     Each alternative starts with an on_failure_jump that points
3931     to the beginning of the next alternative. Each alternative
3932     except the last ends with a jump that in effect jumps past
3933     the rest of the alternatives. (They really jump to the
3934     ending jump of the following alternative, because tensioning
3935     these jumps is a hassle.)
3936    
3937     Repeats start with an on_failure_jump that points past both
3938     the repetition text and either the following jump or
3939     pop_failure_jump back to this on_failure_jump. */
3940     case on_failure_jump:
3941     on_failure:
3942     DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3943    
3944     EXTRACT_NUMBER_AND_INCR (mcnt, p);
3945     DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3946    
3947     /* If this on_failure_jump comes right before a group (i.e.,
3948     the original * applied to a group), save the information
3949     for that group and all inner ones, so that if we fail back
3950     to this point, the group's information will be correct.
3951     For example, in \(a*\)*\1, we need the preceding group,
3952     and in \(\(a*\)b*\)\2, we need the inner group. */
3953    
3954     /* We can't use `p' to check ahead because we push
3955     a failure point to `p + mcnt' after we do this. */
3956     p1 = p;
3957    
3958     /* We need to skip no_op's before we look for the
3959     start_memory in case this on_failure_jump is happening as
3960     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3961     against aba. */
3962     while (p1 < pend && (re_opcode_t) *p1 == no_op)
3963     p1++;
3964    
3965     if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3966     {
3967     /* We have a new highest active register now. This will
3968     get reset at the start_memory we are about to get to,
3969     but we will have saved all the registers relevant to
3970     this repetition op, as described above. */
3971     highest_active_reg = *(p1 + 1) + *(p1 + 2);
3972     if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3973     lowest_active_reg = *(p1 + 1);
3974     }
3975    
3976     DEBUG_PRINT1 (":\n");
3977     PUSH_FAILURE_POINT (p + mcnt, d, -2);
3978     break;
3979    
3980    
3981     /* A smart repeat ends with `maybe_pop_jump'.
3982     We change it to either `pop_failure_jump' or `jump'. */
3983     case maybe_pop_jump:
3984     EXTRACT_NUMBER_AND_INCR (mcnt, p);
3985     DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3986     {
3987     register unsigned char *p2 = p;
3988    
3989     /* Compare the beginning of the repeat with what in the
3990     pattern follows its end. If we can establish that there
3991     is nothing that they would both match, i.e., that we
3992     would have to backtrack because of (as in, e.g., `a*a')
3993     then we can change to pop_failure_jump, because we'll
3994     never have to backtrack.
3995    
3996     This is not true in the case of alternatives: in
3997     `(a|ab)*' we do need to backtrack to the `ab' alternative
3998     (e.g., if the string was `ab'). But instead of trying to
3999     detect that here, the alternative has put on a dummy
4000     failure point which is what we will end up popping. */
4001    
4002     /* Skip over open/close-group commands. */
4003     while (p2 + 2 < pend
4004     && ((re_opcode_t) *p2 == stop_memory
4005     || (re_opcode_t) *p2 == start_memory))
4006     p2 += 3; /* Skip over args, too. */
4007    
4008     /* If we're at the end of the pattern, we can change. */
4009     if (p2 == pend)
4010     {
4011     /* Consider what happens when matching ":\(.*\)"
4012     against ":/". I don't really understand this code
4013     yet. */
4014     p[-3] = (unsigned char) pop_failure_jump;
4015     DEBUG_PRINT1
4016     (" End of pattern: change to `pop_failure_jump'.\n");
4017     }
4018    
4019     else if ((re_opcode_t) *p2 == exactn
4020     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4021     {
4022     register unsigned char c
4023     = *p2 == (unsigned char) endline ? '\n' : p2[2];
4024     p1 = p + mcnt;
4025    
4026     /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4027     to the `maybe_finalize_jump' of this case. Examine what
4028     follows. */
4029     if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4030     {
4031     p[-3] = (unsigned char) pop_failure_jump;
4032     DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4033     c, p1[5]);
4034     }
4035    
4036     else if ((re_opcode_t) p1[3] == charset
4037     || (re_opcode_t) p1[3] == charset_not)
4038     {
4039     int not = (re_opcode_t) p1[3] == charset_not;
4040    
4041     if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4042     && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4043     not = !not;
4044    
4045     /* `not' is equal to 1 if c would match, which means
4046     that we can't change to pop_failure_jump. */
4047     if (!not)
4048     {
4049     p[-3] = (unsigned char) pop_failure_jump;
4050     DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4051     }
4052     }
4053     }
4054     }
4055     p -= 2; /* Point at relative address again. */
4056     if ((re_opcode_t) p[-1] != pop_failure_jump)
4057     {
4058     p[-1] = (unsigned char) jump;
4059     DEBUG_PRINT1 (" Match => jump.\n");
4060     goto unconditional_jump;
4061     }
4062     /* Note fall through. */
4063    
4064    
4065     /* The end of a simple repeat has a pop_failure_jump back to
4066     its matching on_failure_jump, where the latter will push a
4067     failure point. The pop_failure_jump takes off failure
4068     points put on by this pop_failure_jump's matching
4069     on_failure_jump; we got through the pattern to here from the
4070     matching on_failure_jump, so didn't fail. */
4071     case pop_failure_jump:
4072     {
4073     /* We need to pass separate storage for the lowest and
4074     highest registers, even though we don't care about the
4075     actual values. Otherwise, we will restore only one
4076     register from the stack, since lowest will == highest in
4077     `pop_failure_point'. */
4078     unsigned dummy_low_reg, dummy_high_reg;
4079     unsigned char *pdummy;
4080     const char *sdummy;
4081    
4082     DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4083     POP_FAILURE_POINT (sdummy, pdummy,
4084     dummy_low_reg, dummy_high_reg,
4085     reg_dummy, reg_dummy, reg_info_dummy);
4086     }
4087     /* Note fall through. */
4088    
4089    
4090     /* Unconditionally jump (without popping any failure points). */
4091     case jump:
4092     unconditional_jump:
4093     EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
4094     DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4095     p += mcnt; /* Do the jump. */
4096     DEBUG_PRINT2 ("(to 0x%x).\n", p);
4097     break;
4098    
4099    
4100     /* We need this opcode so we can detect where alternatives end
4101     in `group_match_null_string_p' et al. */
4102     case jump_past_alt:
4103     DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4104     goto unconditional_jump;
4105    
4106    
4107     /* Normally, the on_failure_jump pushes a failure point, which
4108     then gets popped at pop_failure_jump. We will end up at
4109     pop_failure_jump, also, and with a pattern of, say, `a+', we
4110     are skipping over the on_failure_jump, so we have to push
4111     something meaningless for pop_failure_jump to pop. */
4112     case dummy_failure_jump:
4113     DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4114     /* It doesn't matter what we push for the string here. What
4115     the code at `fail' tests is the value for the pattern. */
4116     PUSH_FAILURE_POINT (0, 0, -2);
4117     goto unconditional_jump;
4118    
4119    
4120     /* At the end of an alternative, we need to push a dummy failure
4121     point in case we are followed by a `pop_failure_jump', because
4122     we don't want the failure point for the alternative to be
4123     popped. For example, matching `(a|ab)*' against `aab'
4124     requires that we match the `ab' alternative. */
4125     case push_dummy_failure:
4126     DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4127     /* See comments just above at `dummy_failure_jump' about the
4128     two zeroes. */
4129     PUSH_FAILURE_POINT (0, 0, -2);
4130     break;
4131    
4132     /* Have to succeed matching what follows at least n times.
4133     After that, handle like `on_failure_jump'. */
4134     case succeed_n:
4135     EXTRACT_NUMBER (mcnt, p + 2);
4136     DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4137    
4138     assert (mcnt >= 0);
4139     /* Originally, this is how many times we HAVE to succeed. */
4140     if (mcnt > 0)
4141     {
4142     mcnt--;
4143     p += 2;
4144     STORE_NUMBER_AND_INCR (p, mcnt);
4145     DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
4146     }
4147     else if (mcnt == 0)
4148     {
4149     DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
4150     p[2] = (unsigned char) no_op;
4151     p[3] = (unsigned char) no_op;
4152     goto on_failure;
4153     }
4154     break;
4155    
4156     case jump_n:
4157     EXTRACT_NUMBER (mcnt, p + 2);
4158     DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4159    
4160     /* Originally, this is how many times we CAN jump. */
4161     if (mcnt)
4162     {
4163     mcnt--;
4164     STORE_NUMBER (p + 2, mcnt);
4165     goto unconditional_jump;
4166     }
4167     /* If don't have to jump any more, skip over the rest of command. */
4168     else
4169     p += 4;
4170     break;
4171    
4172     case set_number_at:
4173     {
4174     DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4175    
4176     EXTRACT_NUMBER_AND_INCR (mcnt, p);
4177     p1 = p + mcnt;
4178     EXTRACT_NUMBER_AND_INCR (mcnt, p);
4179     DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
4180     STORE_NUMBER (p1, mcnt);
4181     break;
4182     }
4183    
4184     case wordbound:
4185     DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4186     if (AT_WORD_BOUNDARY (d))
4187     break;
4188     goto fail;
4189    
4190     case notwordbound:
4191     DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4192     if (AT_WORD_BOUNDARY (d))
4193     goto fail;
4194     break;
4195    
4196     case wordbeg:
4197     DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4198     if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4199     break;
4200     goto fail;
4201    
4202     case wordend:
4203     DEBUG_PRINT1 ("EXECUTING wordend.\n");
4204     if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4205     && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4206     break;
4207     goto fail;
4208    
4209     #ifdef emacs
4210     #ifdef emacs19
4211     case before_dot:
4212     DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4213     if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4214     goto fail;
4215     break;
4216    
4217     case at_dot:
4218     DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4219     if (PTR_CHAR_POS ((unsigned char *) d) != point)
4220     goto fail;
4221     break;
4222    
4223     case after_dot:
4224     DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4225     if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4226     goto fail;
4227     break;
4228     #else /* not emacs19 */
4229     case at_dot:
4230     DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4231     if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4232     goto fail;
4233     break;
4234     #endif /* not emacs19 */
4235    
4236     case syntaxspec:
4237     DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4238     mcnt = *p++;
4239     goto matchsyntax;
4240    
4241     case wordchar:
4242     DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4243     mcnt = (int) Sword;
4244     matchsyntax:
4245     PREFETCH ();
4246     if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4247     goto fail;
4248     SET_REGS_MATCHED ();
4249     break;
4250    
4251     case notsyntaxspec:
4252     DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4253     mcnt = *p++;
4254     goto matchnotsyntax;
4255    
4256     case notwordchar:
4257     DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4258     mcnt = (int) Sword;
4259     matchnotsyntax:
4260     PREFETCH ();
4261     if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4262     goto fail;
4263     SET_REGS_MATCHED ();
4264     break;
4265    
4266     #else /* not emacs */
4267     case wordchar:
4268     DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4269     PREFETCH ();
4270     if (!WORDCHAR_P (d))
4271     goto fail;
4272     SET_REGS_MATCHED ();
4273     d++;
4274     break;
4275    
4276     case notwordchar:
4277     DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4278     PREFETCH ();
4279     if (WORDCHAR_P (d))
4280     goto fail;
4281     SET_REGS_MATCHED ();
4282     d++;
4283     break;
4284     #endif /* not emacs */
4285    
4286     default:
4287     abort ();
4288     }
4289     continue; /* Successfully executed one pattern command; keep going. */
4290    
4291    
4292     /* We goto here if a matching operation fails. */
4293     fail:
4294     if (!FAIL_STACK_EMPTY ())
4295     { /* A restart point is known. Restore to that state. */
4296     DEBUG_PRINT1 ("\nFAIL:\n");
4297     POP_FAILURE_POINT (d, p,
4298     lowest_active_reg, highest_active_reg,
4299     regstart, regend, reg_info);
4300    
4301     /* If this failure point is a dummy, try the next one. */
4302     if (!p)
4303     goto fail;
4304    
4305     /* If we failed to the end of the pattern, don't examine *p. */
4306     assert (p <= pend);
4307     if (p < pend)
4308     {
4309     boolean is_a_jump_n = false;
4310    
4311     /* If failed to a backwards jump that's part of a repetition
4312     loop, need to pop this failure point and use the next one. */
4313     switch ((re_opcode_t) *p)
4314     {
4315     case jump_n:
4316     is_a_jump_n = true;
4317     case maybe_pop_jump:
4318     case pop_failure_jump:
4319     case jump:
4320     p1 = p + 1;
4321     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4322     p1 += mcnt;
4323    
4324     if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4325     || (!is_a_jump_n
4326     && (re_opcode_t) *p1 == on_failure_jump))
4327     goto fail;
4328     break;
4329     default:
4330     /* do nothing */ ;
4331     }
4332     }
4333    
4334     if (d >= string1 && d <= end1)
4335     dend = end_match_1;
4336     }
4337     else
4338     break; /* Matching at this starting point really fails. */
4339     } /* for (;;) */
4340    
4341     if (best_regs_set)
4342     goto restore_best_regs;
4343    
4344     FREE_VARIABLES ();
4345    
4346     return -1; /* Failure to match. */
4347     } /* re_match_2 */
4348    
4349     /* Subroutine definitions for re_match_2. */
4350    
4351    
4352     /* We are passed P pointing to a register number after a start_memory.
4353    
4354     Return true if the pattern up to the corresponding stop_memory can
4355     match the empty string, and false otherwise.
4356    
4357     If we find the matching stop_memory, sets P to point to one past its number.
4358     Otherwise, sets P to an undefined byte less than or equal to END.
4359    
4360     We don't handle duplicates properly (yet). */
4361    
4362     static boolean
4363     group_match_null_string_p (p, end, reg_info)
4364     unsigned char **p, *end;
4365     register_info_type *reg_info;
4366     {
4367     int mcnt;
4368     /* Point to after the args to the start_memory. */
4369     unsigned char *p1 = *p + 2;
4370    
4371     while (p1 < end)
4372     {
4373     /* Skip over opcodes that can match nothing, and return true or
4374     false, as appropriate, when we get to one that can't, or to the
4375     matching stop_memory. */
4376    
4377     switch ((re_opcode_t) *p1)
4378     {
4379     /* Could be either a loop or a series of alternatives. */
4380     case on_failure_jump:
4381     p1++;
4382     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4383    
4384     /* If the next operation is not a jump backwards in the
4385     pattern. */
4386    
4387     if (mcnt >= 0)
4388     {
4389     /* Go through the on_failure_jumps of the alternatives,
4390     seeing if any of the alternatives cannot match nothing.
4391     The last alternative starts with only a jump,
4392     whereas the rest start with on_failure_jump and end
4393     with a jump, e.g., here is the pattern for `a|b|c':
4394    
4395     /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4396     /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4397     /exactn/1/c
4398    
4399     So, we have to first go through the first (n-1)
4400     alternatives and then deal with the last one separately. */
4401    
4402    
4403     /* Deal with the first (n-1) alternatives, which start
4404     with an on_failure_jump (see above) that jumps to right
4405     past a jump_past_alt. */
4406    
4407     while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4408     {
4409     /* `mcnt' holds how many bytes long the alternative
4410     is, including the ending `jump_past_alt' and
4411     its number. */
4412    
4413     if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4414     reg_info))
4415     return false;
4416    
4417     /* Move to right after this alternative, including the
4418     jump_past_alt. */
4419     p1 += mcnt;
4420    
4421     /* Break if it's the beginning of an n-th alternative
4422     that doesn't begin with an on_failure_jump. */
4423     if ((re_opcode_t) *p1 != on_failure_jump)
4424     break;
4425    
4426     /* Still have to check that it's not an n-th
4427     alternative that starts with an on_failure_jump. */
4428     p1++;
4429     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4430     if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4431     {
4432     /* Get to the beginning of the n-th alternative. */
4433     p1 -= 3;
4434     break;
4435     }
4436     }
4437    
4438     /* Deal with the last alternative: go back and get number
4439     of the `jump_past_alt' just before it. `mcnt' contains
4440     the length of the alternative. */
4441     EXTRACT_NUMBER (mcnt, p1 - 2);
4442    
4443     if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4444     return false;
4445    
4446     p1 += mcnt; /* Get past the n-th alternative. */
4447     } /* if mcnt > 0 */
4448     break;
4449    
4450    
4451     case stop_memory:
4452     assert (p1[1] == **p);
4453     *p = p1 + 2;
4454     return true;
4455    
4456    
4457     default:
4458     if (!common_op_match_null_string_p (&p1, end, reg_info))
4459     return false;
4460     }
4461     } /* while p1 < end */
4462    
4463     return false;
4464     } /* group_match_null_string_p */
4465    
4466    
4467     /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4468     It expects P to be the first byte of a single alternative and END one
4469     byte past the last. The alternative can contain groups. */
4470    
4471     static boolean
4472     alt_match_null_string_p (p, end, reg_info)
4473     unsigned char *p, *end;
4474     register_info_type *reg_info;
4475     {
4476     int mcnt;
4477     unsigned char *p1 = p;
4478    
4479     while (p1 < end)
4480     {
4481     /* Skip over opcodes that can match nothing, and break when we get
4482     to one that can't. */
4483    
4484     switch ((re_opcode_t) *p1)
4485     {
4486     /* It's a loop. */
4487     case on_failure_jump:
4488     p1++;
4489     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4490     p1 += mcnt;
4491     break;
4492    
4493     default:
4494     if (!common_op_match_null_string_p (&p1, end, reg_info))
4495     return false;
4496     }
4497     } /* while p1 < end */
4498    
4499     return true;
4500     } /* alt_match_null_string_p */
4501    
4502    
4503     /* Deals with the ops common to group_match_null_string_p and
4504     alt_match_null_string_p.
4505    
4506     Sets P to one after the op and its arguments, if any. */
4507    
4508     static boolean
4509     common_op_match_null_string_p (p, end, reg_info)
4510     unsigned char **p, *end;
4511     register_info_type *reg_info;
4512     {
4513     int mcnt;
4514     boolean ret;
4515     int reg_no;
4516     unsigned char *p1 = *p;
4517    
4518     switch ((re_opcode_t) *p1++)
4519     {
4520     case no_op:
4521     case begline:
4522     case endline:
4523     case begbuf:
4524     case endbuf:
4525     case wordbeg:
4526     case wordend:
4527     case wordbound:
4528     case notwordbound:
4529     #ifdef emacs
4530     case before_dot:
4531     case at_dot:
4532     case after_dot:
4533     #endif
4534     break;
4535    
4536     case start_memory:
4537     reg_no = *p1;
4538     assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4539     ret = group_match_null_string_p (&p1, end, reg_info);
4540    
4541     /* Have to set this here in case we're checking a group which
4542     contains a group and a back reference to it. */
4543    
4544     if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4545     REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4546    
4547     if (!ret)
4548     return false;
4549     break;
4550    
4551     /* If this is an optimized succeed_n for zero times, make the jump. */
4552     case jump:
4553     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4554     if (mcnt >= 0)
4555     p1 += mcnt;
4556     else
4557     return false;
4558     break;
4559    
4560     case succeed_n:
4561     /* Get to the number of times to succeed. */
4562     p1 += 2;
4563     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4564    
4565     if (mcnt == 0)
4566     {
4567     p1 -= 4;
4568     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4569     p1 += mcnt;
4570     }
4571     else
4572     return false;
4573     break;
4574    
4575     case duplicate:
4576     if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4577     return false;
4578     break;
4579    
4580     case set_number_at:
4581     p1 += 4;
4582    
4583     default:
4584     /* All other opcodes mean we cannot match the empty string. */
4585     return false;
4586     }
4587    
4588     *p = p1;
4589     return true;
4590     } /* common_op_match_null_string_p */
4591    
4592    
4593     /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4594     bytes; nonzero otherwise. */
4595    
4596     static int
4597     bcmp_translate (s1, s2, len, translate)
4598     unsigned char *s1, *s2;
4599     register int len;
4600     char *translate;
4601     {
4602     register unsigned char *p1 = s1, *p2 = s2;
4603     while (len)
4604     {
4605     if (translate[*p1++] != translate[*p2++]) return 1;
4606     len--;
4607     }
4608     return 0;
4609     }
4610    
4611     /* Entry points for GNU code. */
4612    
4613     /* re_compile_pattern is the GNU regular expression compiler: it
4614     compiles PATTERN (of length SIZE) and puts the result in BUFP.
4615     Returns 0 if the pattern was valid, otherwise an error string.
4616    
4617     Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4618     are set in BUFP on entry.
4619    
4620     We call regex_compile to do the actual compilation. */
4621    
4622     const char *
4623     re_compile_pattern (pattern, length, bufp)
4624     const char *pattern;
4625     int length;
4626     struct re_pattern_buffer *bufp;
4627     {
4628     reg_errcode_t ret;
4629    
4630     /* GNU code is written to assume at least RE_NREGS registers will be set
4631     (and at least one extra will be -1). */
4632     bufp->regs_allocated = REGS_UNALLOCATED;
4633    
4634     /* And GNU code determines whether or not to get register information
4635     by passing null for the REGS argument to re_match, etc., not by
4636     setting no_sub. */
4637     bufp->no_sub = 0;
4638    
4639     /* Match anchors at newline. */
4640     bufp->newline_anchor = 1;
4641    
4642     ret = regex_compile (pattern, length, re_syntax_options, bufp);
4643    
4644     return re_error_msg[(int) ret];
4645     }
4646    
4647     /* Entry points compatible with 4.2 BSD regex library. We don't define
4648     them if this is an Emacs or POSIX compilation. */
4649    
4650     #if !defined (emacs) && !defined (_POSIX_SOURCE)
4651    
4652     /* BSD has one and only one pattern buffer. */
4653     static struct re_pattern_buffer re_comp_buf;
4654    
4655     char *
4656     re_comp (s)
4657     const char *s;
4658     {
4659     reg_errcode_t ret;
4660    
4661     if (!s)
4662     {
4663     if (!re_comp_buf.buffer)
4664     return "No previous regular expression";
4665     return 0;
4666     }
4667    
4668     if (!re_comp_buf.buffer)
4669     {
4670     re_comp_buf.buffer = (unsigned char *) malloc (200);
4671     if (re_comp_buf.buffer == NULL)
4672     return "Memory exhausted";
4673     re_comp_buf.allocated = 200;
4674    
4675     re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4676     if (re_comp_buf.fastmap == NULL)
4677     return "Memory exhausted";
4678     }
4679    
4680     /* Since `re_exec' always passes NULL for the `regs' argument, we
4681     don't need to initialize the pattern buffer fields which affect it. */
4682    
4683     /* Match anchors at newlines. */
4684     re_comp_buf.newline_anchor = 1;
4685    
4686     ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4687    
4688     /* Yes, we're discarding `const' here. */
4689     return (char *) re_error_msg[(int) ret];
4690     }
4691    
4692    
4693     int
4694     re_exec (s)
4695     const char *s;
4696     {
4697     const int len = strlen (s);
4698     return
4699     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4700     }
4701     #endif /* not emacs and not _POSIX_SOURCE */
4702    
4703     /* POSIX.2 functions. Don't define these for Emacs. */
4704    
4705     #ifndef emacs
4706    
4707     /* regcomp takes a regular expression as a string and compiles it.
4708    
4709     PREG is a regex_t *. We do not expect any fields to be initialized,
4710     since POSIX says we shouldn't. Thus, we set
4711    
4712     `buffer' to the compiled pattern;
4713     `used' to the length of the compiled pattern;
4714     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4715     REG_EXTENDED bit in CFLAGS is set; otherwise, to
4716     RE_SYNTAX_POSIX_BASIC;
4717     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4718     `fastmap' and `fastmap_accurate' to zero;
4719     `re_nsub' to the number of subexpressions in PATTERN.
4720    
4721     PATTERN is the address of the pattern string.
4722    
4723     CFLAGS is a series of bits which affect compilation.
4724    
4725     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4726     use POSIX basic syntax.
4727    
4728     If REG_NEWLINE is set, then . and [^...] don't match newline.
4729     Also, regexec will try a match beginning after every newline.
4730    
4731     If REG_ICASE is set, then we considers upper- and lowercase
4732     versions of letters to be equivalent when matching.
4733    
4734     If REG_NOSUB is set, then when PREG is passed to regexec, that
4735     routine will report only success or failure, and nothing about the
4736     registers.
4737    
4738     It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
4739     the return codes and their meanings.) */
4740    
4741     int
4742     regcomp (preg, pattern, cflags)
4743     regex_t *preg;
4744     const char *pattern;
4745     int cflags;
4746     {
4747     reg_errcode_t ret;
4748     unsigned syntax
4749     = (cflags & REG_EXTENDED) ?
4750     RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4751    
4752     /* regex_compile will allocate the space for the compiled pattern. */
4753     preg->buffer = 0;
4754     preg->allocated = 0;
4755    
4756     /* Don't bother to use a fastmap when searching. This simplifies the
4757     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4758     characters after newlines into the fastmap. This way, we just try
4759     every character. */
4760     preg->fastmap = 0;
4761    
4762     if (cflags & REG_ICASE)
4763     {
4764     unsigned i;
4765    
4766     preg->translate = (char *) malloc (CHAR_SET_SIZE);
4767     if (preg->translate == NULL)
4768     return (int) REG_ESPACE;
4769    
4770     /* Map uppercase characters to corresponding lowercase ones. */
4771     for (i = 0; i < CHAR_SET_SIZE; i++)
4772     preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4773     }
4774     else
4775     preg->translate = NULL;
4776    
4777     /* If REG_NEWLINE is set, newlines are treated differently. */
4778     if (cflags & REG_NEWLINE)
4779     { /* REG_NEWLINE implies neither . nor [^...] match newline. */
4780     syntax &= ~RE_DOT_NEWLINE;
4781     syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4782     /* It also changes the matching behavior. */
4783     preg->newline_anchor = 1;
4784     }
4785     else
4786     preg->newline_anchor = 0;
4787    
4788     preg->no_sub = !!(cflags & REG_NOSUB);
4789    
4790     /* POSIX says a null character in the pattern terminates it, so we
4791     can use strlen here in compiling the pattern. */
4792     ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4793    
4794     /* POSIX doesn't distinguish between an unmatched open-group and an
4795     unmatched close-group: both are REG_EPAREN. */
4796     if (ret == REG_ERPAREN) ret = REG_EPAREN;
4797    
4798     return (int) ret;
4799     }
4800    
4801    
4802     /* regexec searches for a given pattern, specified by PREG, in the
4803     string STRING.
4804    
4805     If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4806     `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
4807     least NMATCH elements, and we set them to the offsets of the
4808     corresponding matched substrings.
4809    
4810     EFLAGS specifies `execution flags' which affect matching: if
4811     REG_NOTBOL is set, then ^ does not match at the beginning of the
4812     string; if REG_NOTEOL is set, then $ does not match at the end.
4813    
4814     We return 0 if we find a match and REG_NOMATCH if not. */
4815    
4816     int
4817     regexec (preg, string, nmatch, pmatch, eflags)
4818     const regex_t *preg;
4819     const char *string;
4820     size_t nmatch;
4821     regmatch_t pmatch[];
4822     int eflags;
4823     {
4824     int ret;
4825     struct re_registers regs;
4826     regex_t private_preg;
4827     int len = strlen (string);
4828     boolean want_reg_info = !preg->no_sub && nmatch > 0;
4829    
4830     private_preg = *preg;
4831    
4832     private_preg.not_bol = !!(eflags & REG_NOTBOL);
4833     private_preg.not_eol = !!(eflags & REG_NOTEOL);
4834    
4835     /* The user has told us exactly how many registers to return
4836     information about, via `nmatch'. We have to pass that on to the
4837     matching routines. */
4838     private_preg.regs_allocated = REGS_FIXED;
4839    
4840     if (want_reg_info)
4841     {
4842     regs.num_regs = nmatch;
4843     regs.start = TALLOC (nmatch, regoff_t);
4844     regs.end = TALLOC (nmatch, regoff_t);
4845     if (regs.start == NULL || regs.end == NULL)
4846     return (int) REG_NOMATCH;
4847     }
4848    
4849     /* Perform the searching operation. */
4850     ret = re_search (&private_preg, string, len,
4851     /* start: */ 0, /* range: */ len,
4852     want_reg_info ? &regs : (struct re_registers *) 0);
4853    
4854     /* Copy the register information to the POSIX structure. */
4855     if (want_reg_info)
4856     {
4857     if (ret >= 0)
4858     {
4859     unsigned r;
4860    
4861     for (r = 0; r < nmatch; r++)
4862     {
4863     pmatch[r].rm_so = regs.start[r];
4864     pmatch[r].rm_eo = regs.end[r];
4865     }
4866     }
4867    
4868     /* If we needed the temporary register info, free the space now. */
4869     free (regs.start);
4870     free (regs.end);
4871     }
4872    
4873     /* We want zero return to mean success, unlike `re_search'. */
4874     return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4875     }
4876    
4877    
4878     /* Returns a message corresponding to an error code, ERRCODE, returned
4879     from either regcomp or regexec. We don't use PREG here. */
4880    
4881     size_t
4882     regerror (errcode, preg, errbuf, errbuf_size)
4883     int errcode;
4884     const regex_t *preg;
4885     char *errbuf;
4886     size_t errbuf_size;
4887     {
4888     const char *msg;
4889     size_t msg_size;
4890    
4891     if (errcode < 0
4892     || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4893     /* Only error codes returned by the rest of the code should be passed
4894     to this routine. If we are given anything else, or if other regex
4895     code generates an invalid error code, then the program has a bug.
4896     Dump core so we can fix it. */
4897     abort ();
4898    
4899     msg = re_error_msg[errcode];
4900    
4901     /* POSIX doesn't require that we do anything in this case, but why
4902     not be nice. */
4903     if (! msg)
4904     msg = "Success";
4905    
4906     msg_size = strlen (msg) + 1; /* Includes the null. */
4907    
4908     if (errbuf_size != 0)
4909     {
4910     if (msg_size > errbuf_size)
4911     {
4912     strncpy (errbuf, msg, errbuf_size - 1);
4913     errbuf[errbuf_size - 1] = 0;
4914     }
4915     else
4916     strcpy (errbuf, msg);
4917     }
4918    
4919     return msg_size;
4920     }
4921    
4922    
4923     /* Free dynamically allocated space used by PREG. */
4924    
4925     void
4926     regfree (preg)
4927     regex_t *preg;
4928     {
4929     if (preg->buffer != NULL)
4930     free (preg->buffer);
4931     preg->buffer = NULL;
4932    
4933     preg->allocated = 0;
4934     preg->used = 0;
4935    
4936     if (preg->fastmap != NULL)
4937     free (preg->fastmap);
4938     preg->fastmap = NULL;
4939     preg->fastmap_accurate = 0;
4940    
4941     if (preg->translate != NULL)
4942     free (preg->translate);
4943     preg->translate = NULL;
4944     }
4945    
4946     #endif /* not emacs */
4947    
4948     /*
4949     Local variables:
4950     make-backup-files: t
4951     version-control: t
4952     trim-versions-without-asking: nil
4953     End:
4954     */

  ViewVC Help
Powered by ViewVC 1.1.22