regex.cxx
上传用户:hzhsqp
上传日期:2007-01-06
资源大小:1600k
文件大小:164k
- /* Extended regular expression matching and search library,
- version 0.12.
- (Implements POSIX draft P10003.2/D11.2, except for
- internationalization features.)
- Copyright (C) 1993 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
- /* AIX requires this to be the first thing in the file. */
- #if defined (_AIX) && !defined (REGEX_MALLOC)
- #pragma alloca
- #endif
- /* Technically malloc.h is depricated in most unixes and the malloc
- prototypes moved to stdlib.h. Continue to use malloc.h except for
- Mac OS X*/
- #if !defined(P_MACOSX)
- #include <malloc.h>
- #endif
- #define alloca _alloca
- #define _GNU_SOURCE
- /* We need this for `regex.h', and perhaps for the Emacs include files. */
- #include <sys/types.h>
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- /* The `emacs' switch turns on certain matching commands
- that make sense only in Emacs. */
- #ifdef emacs
- #include "lisp.h"
- #include "buffer.h"
- #include "syntax.h"
- /* Emacs uses `NULL' as a predicate. */
- #undef NULL
- #else /* not emacs */
- /* We used to test for `BSTRING' here, but only GCC and Emacs define
- `BSTRING', as far as I know, and neither of them use this code. */
- #if HAVE_STRING_H || STDC_HEADERS
- #include <string.h>
- #ifndef bcmp
- #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
- #endif
- #ifndef bcopy
- #define bcopy(s, d, n) memcpy ((d), (s), (n))
- #endif
- #ifndef bzero
- #define bzero(s, n) memset ((s), 0, (n))
- #endif
- #else
- #include <strings.h>
- #endif
- #ifdef STDC_HEADERS
- #include <stdlib.h>
- #else
- char *malloc ();
- char *realloc ();
- #endif
- #ifdef __NUCLEUS_PLUS__
- #define REGEX_MALLOC malloc
- #endif
- /* Define the syntax stuff for <, >, etc. */
- /* This must be nonzero for the wordchar and notwordchar pattern
- commands in re_match_2. */
- #ifndef Sword
- #define Sword 1
- #endif
- #ifdef SYNTAX_TABLE
- extern char *re_syntax_table;
- #else /* not SYNTAX_TABLE */
- /* How many characters in the character set. */
- #define CHAR_SET_SIZE 256
- static char re_syntax_table[CHAR_SET_SIZE];
- static void
- init_syntax_once ()
- {
- register int c;
- static int done = 0;
- if (done)
- return;
- bzero (re_syntax_table, sizeof re_syntax_table);
- for (c = 'a'; c <= 'z'; c++)
- re_syntax_table[c] = Sword;
- for (c = 'A'; c <= 'Z'; c++)
- re_syntax_table[c] = Sword;
- for (c = '0'; c <= '9'; c++)
- re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
- done = 1;
- }
- #endif /* not SYNTAX_TABLE */
- #define SYNTAX(c) re_syntax_table[c]
- #endif /* not emacs */
- /* Get the interface, including the syntax bits. */
- #include "regex.h"
- /* isalpha etc. are used for the character classes. */
- #include <ctype.h>
- #ifndef isascii
- #define isascii(c) 1
- #endif
- #ifdef isblank
- #define ISBLANK(c) (isascii (c) && isblank (c))
- #else
- #define ISBLANK(c) ((c) == ' ' || (c) == 't')
- #endif
- #ifdef isgraph
- #define ISGRAPH(c) (isascii (c) && isgraph (c))
- #else
- #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
- #endif
- #define ISPRINT(c) (isascii (c) && isprint (c))
- #define ISDIGIT(c) (isascii (c) && isdigit (c))
- #define ISALNUM(c) (isascii (c) && isalnum (c))
- #define ISALPHA(c) (isascii (c) && isalpha (c))
- #define ISCNTRL(c) (isascii (c) && iscntrl (c))
- #define ISLOWER(c) (isascii (c) && islower (c))
- #define ISPUNCT(c) (isascii (c) && ispunct (c))
- #define ISSPACE(c) (isascii (c) && isspace (c))
- #define ISUPPER(c) (isascii (c) && isupper (c))
- #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
- #ifndef NULL
- #define NULL 0
- #endif
- /* We remove any previous definition of `SIGN_EXTEND_CHAR',
- since ours (we hope) works properly with all combinations of
- machines, compilers, `char' and `unsigned char' argument types.
- (Per Bothner suggested the basic approach.) */
- #undef SIGN_EXTEND_CHAR
- #if __STDC__
- #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
- #else /* not __STDC__ */
- /* As in Harbison and Steele. */
- #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
- #endif
- /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- use `alloca' instead of `malloc'. This is because using malloc in
- re_search* or re_match* could cause memory leaks when C-g is used in
- Emacs; also, malloc is slower and causes storage fragmentation. On
- the other hand, malloc is more portable, and easier to debug.
-
- Because we sometimes use alloca, some routines have to be macros,
- not functions -- `alloca'-allocated space disappears at the end of the
- function it is called in. */
- #ifdef REGEX_MALLOC
- #define REGEX_ALLOCATE malloc
- #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
- #else /* not REGEX_MALLOC */
- /* Emacs already defines alloca, sometimes. */
- #ifndef alloca
- /* Make alloca work the best possible way. */
- #ifdef __GNUC__
- #define alloca __builtin_alloca
- #else /* not __GNUC__ */
- #if HAVE_ALLOCA_H
- #include <alloca.h>
- #else /* not __GNUC__ or HAVE_ALLOCA_H */
- #ifndef _AIX /* Already did AIX, up at the top. */
- char *alloca ();
- #endif /* not _AIX */
- #endif /* not HAVE_ALLOCA_H */
- #endif /* not __GNUC__ */
- #endif /* not alloca */
- #define REGEX_ALLOCATE alloca
- /* Assumes a `char *destination' variable. */
- #define REGEX_REALLOCATE(source, osize, nsize)
- (destination = (char *) alloca (nsize),
- bcopy (source, destination, osize),
- destination)
- #endif /* not REGEX_MALLOC */
- /* True if `size1' is non-NULL and PTR is pointing anywhere inside
- `string1' or just past its end. This works if PTR is NULL, which is
- a good thing. */
- #define FIRST_STRING_P(ptr)
- (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
- /* (Re)Allocate N items of type T using malloc, or fail. */
- #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
- #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
- #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
- #define BYTEWIDTH 8 /* In bits. */
- #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
- typedef char boolean;
- #define false 0
- #define true 1
- /* These are the command codes that appear in compiled regular
- expressions. Some opcodes are followed by argument bytes. A
- command code can specify any interpretation whatsoever for its
- arguments. Zero bytes may appear in the compiled regular expression.
- The value of `exactn' is needed in search.c (search_buffer) in Emacs.
- So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
- `exactn' we use here must also be 1. */
- typedef enum
- {
- no_op = 0,
- /* Followed by one byte giving n, then by n literal bytes. */
- exactn = 1,
- /* Matches any (more or less) character. */
- anychar,
- /* Matches any one char belonging to specified set. First
- following byte is number of bitmap bytes. Then come bytes
- for a bitmap saying which chars are in. Bits in each byte
- are ordered low-bit-first. A character is in the set if its
- bit is 1. A character too large to have a bit in the map is
- automatically not in the set. */
- charset,
- /* Same parameters as charset, but match any character that is
- not one of those specified. */
- charset_not,
- /* Start remembering the text that is matched, for storing in a
- register. Followed by one byte with the register number, in
- the range 0 to one less than the pattern buffer's re_nsub
- field. Then followed by one byte with the number of groups
- inner to this one. (This last has to be part of the
- start_memory only because we need it in the on_failure_jump
- of re_match_2.) */
- start_memory,
- /* Stop remembering the text that is matched and store it in a
- memory register. Followed by one byte with the register
- number, in the range 0 to one less than `re_nsub' in the
- pattern buffer, and one byte with the number of inner groups,
- just like `start_memory'. (We need the number of inner
- groups here because we don't have any easy way of finding the
- corresponding start_memory when we're at a stop_memory.) */
- stop_memory,
- /* Match a duplicate of something remembered. Followed by one
- byte containing the register number. */
- duplicate,
- /* Fail unless at beginning of line. */
- begline,
- /* Fail unless at end of line. */
- endline,
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- of string to be matched (if not). */
- begbuf,
- /* Analogously, for end of buffer/string. */
- endbuf,
-
- /* Followed by two byte relative address to which to jump. */
- jump,
- /* Same as jump, but marks the end of an alternative. */
- jump_past_alt,
- /* Followed by two-byte relative address of place to resume at
- in case of failure. */
- on_failure_jump,
-
- /* Like on_failure_jump, but pushes a placeholder instead of the
- current string position when executed. */
- on_failure_keep_string_jump,
-
- /* Throw away latest failure point and then jump to following
- two-byte relative address. */
- pop_failure_jump,
- /* Change to pop_failure_jump if know won't have to backtrack to
- match; otherwise change to jump. This is used to jump
- back to the beginning of a repeat. If what follows this jump
- clearly won't match what the repeat does, such that we can be
- sure that there is no use backtracking out of repetitions
- already matched, then we change it to a pop_failure_jump.
- Followed by two-byte address. */
- maybe_pop_jump,
- /* Jump to following two-byte address, and push a dummy failure
- point. This failure point will be thrown away if an attempt
- is made to use it for a failure. A `+' construct makes this
- before the first repeat. Also used as an intermediary kind
- of jump when compiling an alternative. */
- dummy_failure_jump,
- /* Push a dummy failure point and continue. Used at the end of
- alternatives. */
- push_dummy_failure,
- /* Followed by two-byte relative address and two-byte number n.
- After matching N times, jump to the address upon failure. */
- succeed_n,
- /* Followed by two-byte relative address, and two-byte number n.
- Jump to the address N times, then fail. */
- jump_n,
- /* Set the following two-byte relative address to the
- subsequent two-byte number. The address *includes* the two
- bytes of number. */
- set_number_at,
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
- wordbeg, /* Succeeds if at word beginning. */
- wordend, /* Succeeds if at word end. */
- wordbound, /* Succeeds if at a word boundary. */
- notwordbound /* Succeeds if not at a word boundary. */
- #ifdef emacs
- ,before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
- /* Matches any character whose syntax is specified. Followed by
- a byte which contains a syntax code, e.g., Sword. */
- syntaxspec,
- /* Matches any character whose syntax is not that specified. */
- notsyntaxspec
- #endif /* emacs */
- } re_opcode_t;
- /* Common operations on the compiled pattern. */
- /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
- #define STORE_NUMBER(destination, number)
- do {
- (destination)[0] = (number) & 0377;
- (destination)[1] = (number) >> 8;
- } while (0)
- /* Same as STORE_NUMBER, except increment DESTINATION to
- the byte after where the number is stored. Therefore, DESTINATION
- must be an lvalue. */
- #define STORE_NUMBER_AND_INCR(destination, number)
- do {
- STORE_NUMBER (destination, number);
- (destination) += 2;
- } while (0)
- /* Put into DESTINATION a number stored in two contiguous bytes starting
- at SOURCE. */
- #define EXTRACT_NUMBER(destination, source)
- do {
- (destination) = *(source) & 0377;
- (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;
- } while (0)
- #ifdef DEBUG
- static void
- extract_number (dest, source)
- int *dest;
- unsigned char *source;
- {
- int temp = SIGN_EXTEND_CHAR (*(source + 1));
- *dest = *source & 0377;
- *dest += temp << 8;
- }
- #ifndef EXTRACT_MACROS /* To debug the macros. */
- #undef EXTRACT_NUMBER
- #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
- #endif /* not EXTRACT_MACROS */
- #endif /* DEBUG */
- /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
- SOURCE must be an lvalue. */
- #define EXTRACT_NUMBER_AND_INCR(destination, source)
- do {
- EXTRACT_NUMBER (destination, source);
- (source) += 2;
- } while (0)
- #ifdef DEBUG
- static void
- extract_number_and_incr (destination, source)
- int *destination;
- unsigned char **source;
- {
- extract_number (destination, *source);
- *source += 2;
- }
- #ifndef EXTRACT_MACROS
- #undef EXTRACT_NUMBER_AND_INCR
- #define EXTRACT_NUMBER_AND_INCR(dest, src)
- extract_number_and_incr (&dest, &src)
- #endif /* not EXTRACT_MACROS */
- #endif /* DEBUG */
- /* If DEBUG is defined, Regex prints many voluminous messages about what
- it is doing (if the variable `debug' is nonzero). If linked with the
- main program in `iregex.c', you can enter patterns and strings
- interactively. And if linked with the main program in `main.c' and
- the other test files, you can run the already-written tests. */
- #ifdef DEBUG
- /* We use standard I/O for debugging. */
- #include <stdio.h>
- /* It is useful to test things that ``must'' be true when debugging. */
- #include <assert.h>
- static int debug = 0;
- #define DEBUG_STATEMENT(e) e
- #define DEBUG_PRINT1(x) if (debug) printf (x)
- #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
- #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
- #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
- #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
- if (debug) print_partial_compiled_pattern (s, e)
- #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
- if (debug) print_double_string (w, s1, sz1, s2, sz2)
- extern void printchar ();
- /* Print the fastmap in human-readable form. */
- void
- print_fastmap (fastmap)
- char *fastmap;
- {
- unsigned was_a_range = 0;
- unsigned i = 0;
-
- while (i < (1 << BYTEWIDTH))
- {
- if (fastmap[i++])
- {
- was_a_range = 0;
- printchar (i - 1);
- while (i < (1 << BYTEWIDTH) && fastmap[i])
- {
- was_a_range = 1;
- i++;
- }
- if (was_a_range)
- {
- printf ("-");
- printchar (i - 1);
- }
- }
- }
- putchar ('n');
- }
- /* Print a compiled pattern string in human-readable form, starting at
- the START pointer into it and ending just before the pointer END. */
- void
- print_partial_compiled_pattern (start, end)
- unsigned char *start;
- unsigned char *end;
- {
- int mcnt, mcnt2;
- unsigned char *p = start;
- unsigned char *pend = end;
- if (start == NULL)
- {
- printf ("(null)n");
- return;
- }
-
- /* Loop over pattern commands. */
- while (p < pend)
- {
- switch ((re_opcode_t) *p++)
- {
- case no_op:
- printf ("/no_op");
- break;
- case exactn:
- mcnt = *p++;
- printf ("/exactn/%d", mcnt);
- do
- {
- putchar ('/');
- printchar (*p++);
- }
- while (--mcnt);
- break;
- case start_memory:
- mcnt = *p++;
- printf ("/start_memory/%d/%d", mcnt, *p++);
- break;
- case stop_memory:
- mcnt = *p++;
- printf ("/stop_memory/%d/%d", mcnt, *p++);
- break;
- case duplicate:
- printf ("/duplicate/%d", *p++);
- break;
- case anychar:
- printf ("/anychar");
- break;
- case charset:
- case charset_not:
- {
- register int c;
- printf ("/charset%s",
- (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
-
- assert (p + *p < pend);
- for (c = 0; c < *p; c++)
- {
- unsigned bit;
- unsigned char map_byte = p[1 + c];
-
- putchar ('/');
- for (bit = 0; bit < BYTEWIDTH; bit++)
- if (map_byte & (1 << bit))
- printchar (c * BYTEWIDTH + bit);
- }
- p += 1 + *p;
- break;
- }
- case begline:
- printf ("/begline");
- break;
- case endline:
- printf ("/endline");
- break;
- case on_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_jump/0/%d", mcnt);
- break;
- case on_failure_keep_string_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_keep_string_jump/0/%d", mcnt);
- break;
- case dummy_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/dummy_failure_jump/0/%d", mcnt);
- break;
- case push_dummy_failure:
- printf ("/push_dummy_failure");
- break;
-
- case maybe_pop_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/maybe_pop_jump/0/%d", mcnt);
- break;
- case pop_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/pop_failure_jump/0/%d", mcnt);
- break;
-
- case jump_past_alt:
- extract_number_and_incr (&mcnt, &p);
- printf ("/jump_past_alt/0/%d", mcnt);
- break;
-
- case jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/jump/0/%d", mcnt);
- break;
- case succeed_n:
- extract_number_and_incr (&mcnt, &p);
- extract_number_and_incr (&mcnt2, &p);
- printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
- break;
-
- case jump_n:
- extract_number_and_incr (&mcnt, &p);
- extract_number_and_incr (&mcnt2, &p);
- printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
- break;
-
- case set_number_at:
- extract_number_and_incr (&mcnt, &p);
- extract_number_and_incr (&mcnt2, &p);
- printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
- break;
-
- case wordbound:
- printf ("/wordbound");
- break;
- case notwordbound:
- printf ("/notwordbound");
- break;
- case wordbeg:
- printf ("/wordbeg");
- break;
-
- case wordend:
- printf ("/wordend");
-
- #ifdef emacs
- case before_dot:
- printf ("/before_dot");
- break;
- case at_dot:
- printf ("/at_dot");
- break;
- case after_dot:
- printf ("/after_dot");
- break;
- case syntaxspec:
- printf ("/syntaxspec");
- mcnt = *p++;
- printf ("/%d", mcnt);
- break;
-
- case notsyntaxspec:
- printf ("/notsyntaxspec");
- mcnt = *p++;
- printf ("/%d", mcnt);
- break;
- #endif /* emacs */
- case wordchar:
- printf ("/wordchar");
- break;
-
- case notwordchar:
- printf ("/notwordchar");
- break;
- case begbuf:
- printf ("/begbuf");
- break;
- case endbuf:
- printf ("/endbuf");
- break;
- default:
- printf ("?%d", *(p-1));
- }
- }
- printf ("/n");
- }
- void
- print_compiled_pattern (bufp)
- struct re_pattern_buffer *bufp;
- {
- unsigned char *buffer = bufp->buffer;
- print_partial_compiled_pattern (buffer, buffer + bufp->used);
- printf ("%d bytes used/%d bytes allocated.n", bufp->used, bufp->allocated);
- if (bufp->fastmap_accurate && bufp->fastmap)
- {
- printf ("fastmap: ");
- print_fastmap (bufp->fastmap);
- }
- printf ("re_nsub: %dt", bufp->re_nsub);
- printf ("regs_alloc: %dt", bufp->regs_allocated);
- printf ("can_be_null: %dt", bufp->can_be_null);
- printf ("newline_anchor: %dn", bufp->newline_anchor);
- printf ("no_sub: %dt", bufp->no_sub);
- printf ("not_bol: %dt", bufp->not_bol);
- printf ("not_eol: %dt", bufp->not_eol);
- printf ("syntax: %dn", bufp->syntax);
- /* Perhaps we should print the translate table? */
- }
- void
- print_double_string (where, string1, size1, string2, size2)
- const char *where;
- const char *string1;
- const char *string2;
- int size1;
- int size2;
- {
- unsigned this_char;
-
- if (where == NULL)
- printf ("(null)");
- else
- {
- if (FIRST_STRING_P (where))
- {
- for (this_char = where - string1; this_char < size1; this_char++)
- printchar (string1[this_char]);
- where = string2;
- }
- for (this_char = where - string2; this_char < size2; this_char++)
- printchar (string2[this_char]);
- }
- }
- #else /* not DEBUG */
- #undef assert
- #define assert(e)
- #define DEBUG_STATEMENT(e)
- #define DEBUG_PRINT1(x)
- #define DEBUG_PRINT2(x1, x2)
- #define DEBUG_PRINT3(x1, x2, x3)
- #define DEBUG_PRINT4(x1, x2, x3, x4)
- #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
- #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
- #endif /* not DEBUG */
- /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- also be assigned to arbitrarily: each pattern buffer stores its own
- syntax, so it can be changed between regex compilations. */
- reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
- /* Specify the precise syntax of regexps for compilation. This provides
- for compatibility for various utilities which historically have
- different, incompatible syntaxes.
- The argument SYNTAX is a bit mask comprised of the various bits
- defined in regex.h. We return the old syntax. */
- reg_syntax_t
- re_set_syntax (reg_syntax_t syntax)
- {
- reg_syntax_t ret = re_syntax_options;
-
- re_syntax_options = syntax;
- return ret;
- }
- /* This table gives an error message for each of the error codes listed
- in regex.h. Obviously the order here has to be same as there. */
- static const char *re_error_msg[] =
- { NULL, /* REG_NOERROR */
- "No match", /* REG_NOMATCH */
- "Invalid regular expression", /* REG_BADPAT */
- "Invalid collation character", /* REG_ECOLLATE */
- "Invalid character class name", /* REG_ECTYPE */
- "Trailing backslash", /* REG_EESCAPE */
- "Invalid back reference", /* REG_ESUBREG */
- "Unmatched [ or [^", /* REG_EBRACK */
- "Unmatched ( or \(", /* REG_EPAREN */
- "Unmatched \{", /* REG_EBRACE */
- "Invalid content of \{\}", /* REG_BADBR */
- "Invalid range end", /* REG_ERANGE */
- "Memory exhausted", /* REG_ESPACE */
- "Invalid preceding regular expression", /* REG_BADRPT */
- "Premature end of regular expression", /* REG_EEND */
- "Regular expression too big", /* REG_ESIZE */
- "Unmatched ) or \)", /* REG_ERPAREN */
- };
- /* Since we have one byte reserved for the register number argument to
- {start,stop}_memory, the maximum number of groups we can report
- things about is what fits in that byte. */
- #define MAX_REGNUM 255
- /* But patterns can have more than `MAX_REGNUM' registers. We just
- ignore the excess. */
- typedef unsigned regnum_t;
- /* Since offsets can go either forwards or backwards, this type needs to
- be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
- typedef int pattern_offset_t;
- typedef struct
- {
- pattern_offset_t begalt_offset;
- pattern_offset_t fixup_alt_jump;
- pattern_offset_t inner_group_offset;
- pattern_offset_t laststart_offset;
- regnum_t regnum;
- } compile_stack_elt_t;
- typedef struct
- {
- compile_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } compile_stack_type;
- /* Subroutine declarations and macros for regex_compile. */
- static void store_op1 (
- re_opcode_t op,
- unsigned char *loc,
- int arg), store_op2 (
- re_opcode_t op,
- unsigned char *loc,
- int arg1, int arg2);
- static void insert_op1 (
- re_opcode_t op,
- unsigned char *loc,
- int arg,
- unsigned char *end), insert_op2 (
- re_opcode_t op,
- unsigned char *loc,
- int arg1, int arg2,
- unsigned char *end);
- static boolean at_begline_loc_p (
- const char *pattern, const char *p,
- reg_syntax_t syntax), at_endline_loc_p (
- const char *p, const char *pend,
- int syntax);
- static boolean group_in_compile_stack (
- compile_stack_type compile_stack,
- regnum_t regnum);
- static reg_errcode_t compile_range (
- const char **p_ptr, const char *pend,
- char *translate,
- reg_syntax_t syntax,
- unsigned char *b);
- /* Fetch the next character in the uncompiled pattern---translating it
- if necessary. Also cast from a signed character in the constant
- string passed to us by the user to an unsigned char that we can use
- as an array index (in, e.g., `translate'). */
- #define PATFETCH(c)
- do {if (p == pend) return REG_EEND;
- c = (unsigned char) *p++;
- if (translate) c = translate[c];
- } while (0)
- /* Fetch the next character in the uncompiled pattern, with no
- translation. */
- #define PATFETCH_RAW(c)
- do {if (p == pend) return REG_EEND;
- c = (unsigned char) *p++;
- } while (0)
- /* Go backwards one character in the pattern. */
- #define PATUNFETCH p--
- /* If `translate' is non-null, return translate[D], else just D. We
- cast the subscript to translate because some data is declared as
- `char *', to avoid warnings when a string constant is passed. But
- when we use a character as a subscript we must make it unsigned. */
- #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
- /* Macros for outputting the compiled pattern into `buffer'. */
- /* If the buffer isn't allocated when it comes in, use this. */
- #define INIT_BUF_SIZE 32
- /* Make sure we have at least N more bytes of space in buffer. */
- #define GET_BUFFER_SPACE(n)
- while (b - bufp->buffer + (n) > bufp->allocated)
- EXTEND_BUFFER ()
- /* Make sure we have one more byte of buffer space and then add C to it. */
- #define BUF_PUSH(c)
- do {
- GET_BUFFER_SPACE (1);
- *b++ = (unsigned char) (c);
- } while (0)
- /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
- #define BUF_PUSH_2(c1, c2)
- do {
- GET_BUFFER_SPACE (2);
- *b++ = (unsigned char) (c1);
- *b++ = (unsigned char) (c2);
- } while (0)
- /* As with BUF_PUSH_2, except for three bytes. */
- #define BUF_PUSH_3(c1, c2, c3)
- do {
- GET_BUFFER_SPACE (3);
- *b++ = (unsigned char) (c1);
- *b++ = (unsigned char) (c2);
- *b++ = (unsigned char) (c3);
- } while (0)
- /* Store a jump with opcode OP at LOC to location TO. We store a
- relative address offset by the three bytes the jump itself occupies. */
- #define STORE_JUMP(op, loc, to)
- store_op1 (op, loc, (to) - (loc) - 3)
- /* Likewise, for a two-argument jump. */
- #define STORE_JUMP2(op, loc, to, arg)
- store_op2 (op, loc, (to) - (loc) - 3, arg)
- /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
- #define INSERT_JUMP(op, loc, to)
- insert_op1 (op, loc, (to) - (loc) - 3, b)
- /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
- #define INSERT_JUMP2(op, loc, to, arg)
- insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
- /* This is not an arbitrary limit: the arguments which represent offsets
- into the pattern are two bytes long. So if 2^16 bytes turns out to
- be too small, many things would have to change. */
- #define MAX_BUF_SIZE (1L << 16)
- /* Extend the buffer by twice its current size via realloc and
- reset the pointers that pointed into the old block to point to the
- correct places in the new one. If extending the buffer results in it
- being larger than MAX_BUF_SIZE, then flag memory exhausted. */
- #define EXTEND_BUFFER()
- do {
- unsigned char *old_buffer = bufp->buffer;
- if (bufp->allocated == MAX_BUF_SIZE)
- return REG_ESIZE;
- bufp->allocated <<= 1;
- if (bufp->allocated > MAX_BUF_SIZE)
- bufp->allocated = MAX_BUF_SIZE;
- bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);
- if (bufp->buffer == NULL)
- return REG_ESPACE;
- /* If the buffer moved, move all the pointers into it. */
- if (old_buffer != bufp->buffer)
- {
- b = (b - old_buffer) + bufp->buffer;
- begalt = (begalt - old_buffer) + bufp->buffer;
- if (fixup_alt_jump)
- fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;
- if (laststart)
- laststart = (laststart - old_buffer) + bufp->buffer;
- if (pending_exact)
- pending_exact = (pending_exact - old_buffer) + bufp->buffer;
- }
- } while (0)
- /* Macros for the compile stack. */
- #define INIT_COMPILE_STACK_SIZE 32
- #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
- #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
- /* The next available element. */
- #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
- /* Set the bit for character C in a list. */
- #define SET_LIST_BIT(c)
- (b[((unsigned char) (c)) / BYTEWIDTH]
- |= 1 << (((unsigned char) c) % BYTEWIDTH))
- /* Get the next unsigned number in the uncompiled pattern. */
- #define GET_UNSIGNED_NUMBER(num)
- { if (p != pend)
- {
- PATFETCH (c);
- while (ISDIGIT (c))
- {
- if (num < 0)
- num = 0;
- num = num * 10 + c - '0';
- if (p == pend)
- break;
- PATFETCH (c);
- }
- }
- }
- #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
- #define IS_CHAR_CLASS(string)
- (STREQ (string, "alpha") || STREQ (string, "upper")
- || STREQ (string, "lower") || STREQ (string, "digit")
- || STREQ (string, "alnum") || STREQ (string, "xdigit")
- || STREQ (string, "space") || STREQ (string, "print")
- || STREQ (string, "punct") || STREQ (string, "graph")
- || STREQ (string, "cntrl") || STREQ (string, "blank"))
- /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
- Returns one of error codes defined in `regex.h', or zero for success.
- Assumes the `allocated' (and perhaps `buffer') and `translate'
- fields are set in BUFP on entry.
- If it succeeds, results are put in BUFP (if it returns an error, the
- contents of BUFP are undefined):
- `buffer' is the compiled pattern;
- `syntax' is set to SYNTAX;
- `used' is set to the length of the compiled pattern;
- `fastmap_accurate' is zero;
- `re_nsub' is the number of subexpressions in PATTERN;
- `not_bol' and `not_eol' are zero;
-
- The `fastmap' and `newline_anchor' fields are neither
- examined nor set. */
- static reg_errcode_t
- regex_compile (
- const char *pattern,
- int size,
- reg_syntax_t syntax,
- struct re_pattern_buffer *bufp)
- {
- /* We fetch characters from PATTERN here. Even though PATTERN is
- `char *' (i.e., signed), we declare these variables as unsigned, so
- they can be reliably used as array indices. */
- register unsigned char c, c1;
-
- /* A random tempory spot in PATTERN. */
- const char *p1;
- /* Points to the end of the buffer, where we should append. */
- register unsigned char *b;
-
- /* Keeps track of unclosed groups. */
- compile_stack_type compile_stack;
- /* Points to the current (ending) position in the pattern. */
- const char *p = pattern;
- const char *pend = pattern + size;
-
- /* How to translate the characters in the pattern. */
- char *translate = bufp->translate;
- /* Address of the count-byte of the most recently inserted `exactn'
- command. This makes it possible to tell if a new exact-match
- character can be added to that command or if the character requires
- a new `exactn' command. */
- unsigned char *pending_exact = 0;
- /* Address of start of the most recently finished expression.
- This tells, e.g., postfix * where to find the start of its
- operand. Reset at the beginning of groups and alternatives. */
- unsigned char *laststart = 0;
- /* Address of beginning of regexp, or inside of last group. */
- unsigned char *begalt;
- /* Place in the uncompiled pattern (i.e., the {) to
- which to go back if the interval is invalid. */
- const char *beg_interval;
-
- /* Address of the place where a forward jump should go to the end of
- the containing expression. Each alternative of an `or' -- except the
- last -- ends with a forward jump of this sort. */
- unsigned char *fixup_alt_jump = 0;
- /* Counts open-groups as they are encountered. Remembered for the
- matching close-group on the compile stack, so the same register
- number is put in the stop_memory as the start_memory. */
- regnum_t regnum = 0;
- #ifdef DEBUG
- DEBUG_PRINT1 ("nCompiling pattern: ");
- if (debug)
- {
- unsigned debug_count;
-
- for (debug_count = 0; debug_count < size; debug_count++)
- printchar (pattern[debug_count]);
- putchar ('n');
- }
- #endif /* DEBUG */
- /* Initialize the compile stack. */
- compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
- compile_stack.avail = 0;
- /* Initialize the pattern buffer. */
- bufp->syntax = syntax;
- bufp->fastmap_accurate = 0;
- bufp->not_bol = bufp->not_eol = 0;
- /* Set `used' to zero, so that if we return an error, the pattern
- printer (for debugging) will think there's no pattern. We reset it
- at the end. */
- bufp->used = 0;
-
- /* Always count groups, whether or not bufp->no_sub is set. */
- bufp->re_nsub = 0;
- #if !defined (emacs) && !defined (SYNTAX_TABLE)
- /* Initialize the syntax table. */
- init_syntax_once ();
- #endif
- if (bufp->allocated == 0)
- {
- if (bufp->buffer)
- { /* If zero allocated, but buffer is non-null, try to realloc
- enough space. This loses if buffer's address is bogus, but
- that is the user's responsibility. */
- RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
- }
- else
- { /* Caller did not allocate a buffer. Do it for them. */
- bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
- }
- if (!bufp->buffer) return REG_ESPACE;
- bufp->allocated = INIT_BUF_SIZE;
- }
- begalt = b = bufp->buffer;
- /* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend)
- {
- PATFETCH (c);
- switch (c)
- {
- case '^':
- {
- if ( /* If at start of pattern, it's an operator. */
- p == pattern + 1
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's come before. */
- || at_begline_loc_p (pattern, p, syntax))
- BUF_PUSH (begline);
- else
- goto normal_char;
- }
- break;
- case '$':
- {
- if ( /* If at end of pattern, it's an operator. */
- p == pend
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's next. */
- || at_endline_loc_p (p, pend, syntax))
- BUF_PUSH (endline);
- else
- goto normal_char;
- }
- break;
- case '+':
- case '?':
- if ((syntax & RE_BK_PLUS_QM)
- || (syntax & RE_LIMITED_OPS))
- goto normal_char;
- handle_plus:
- case '*':
- /* If there is no previous pattern... */
- if (!laststart)
- {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
- }
- {
- /* Are we optimizing this jump? */
- boolean keep_string_p = false;
-
- /* 1 means zero (many) matches is allowed. */
- char zero_times_ok = 0, many_times_ok = 0;
- /* If there is a sequence of repetition chars, collapse it
- down to just one (the right one). We can't combine
- interval operators with these because of, e.g., `a{2}*',
- which should only match an even number of `a's. */
- for (;;)
- {
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
- if (p == pend)
- break;
- PATFETCH (c);
- if (c == '*'
- || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
- ;
- else if (syntax & RE_BK_PLUS_QM && c == '\')
- {
- if (p == pend) return REG_EESCAPE;
- PATFETCH (c1);
- if (!(c1 == '+' || c1 == '?'))
- {
- PATUNFETCH;
- PATUNFETCH;
- break;
- }
- c = c1;
- }
- else
- {
- PATUNFETCH;
- break;
- }
- /* If we get here, we found another repeat character. */
- }
- /* Star, etc. applied to an empty pattern is equivalent
- to an empty pattern. */
- if (!laststart)
- break;
- /* Now we know whether or not zero matches is allowed
- and also whether or not two or more matches is allowed. */
- if (many_times_ok)
- { /* More than one repetition is allowed, so put in at the
- end a backward relative jump from `b' to before the next
- jump we're going to put in below (which jumps from
- laststart to after this jump).
- But if we are at the `*' in the exact sequence `.*n',
- insert an unconditional jump backwards to the .,
- instead of the beginning of the loop. This way we only
- push a failure point once, instead of every time
- through the loop. */
- assert (p - 1 > pattern);
- /* Allocate the space for the jump. */
- GET_BUFFER_SPACE (3);
- /* We know we are not at the first character of the pattern,
- because laststart was nonzero. And we've already
- incremented `p', by the way, to be the character after
- the `*'. Do we have to do something analogous here
- for null bytes, because of RE_DOT_NOT_NULL? */
- if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
- && zero_times_ok
- && p < pend && TRANSLATE (*p) == TRANSLATE ('n')
- && !(syntax & RE_DOT_NEWLINE))
- { /* We have .*n. */
- STORE_JUMP (jump, b, laststart);
- keep_string_p = true;
- }
- else
- /* Anything else. */
- STORE_JUMP (maybe_pop_jump, b, laststart - 3);
- /* We've added more stuff to the buffer. */
- b += 3;
- }
- /* On failure, jump from laststart to b + 3, which will be the
- end of the buffer after this jump is inserted. */
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
- : on_failure_jump,
- laststart, b + 3);
- pending_exact = 0;
- b += 3;
- if (!zero_times_ok)
- {
- /* At least one repetition is required, so insert a
- `dummy_failure_jump' before the initial
- `on_failure_jump' instruction of the loop. This
- effects a skip over that instruction the first time
- we hit that loop. */
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
- b += 3;
- }
- }
- break;
- case '.':
- laststart = b;
- BUF_PUSH (anychar);
- break;
- case '[':
- {
- boolean had_char_class = false;
- if (p == pend) return REG_EBRACK;
- /* Ensure that we have enough space to push a charset: the
- opcode, the length count, and the bitset; 34 bytes in all. */
- GET_BUFFER_SPACE (34);
- laststart = b;
- /* We test `*p == '^' twice, instead of using an if
- statement, so we only need one BUF_PUSH. */
- BUF_PUSH (*p == '^' ? charset_not : charset);
- if (*p == '^')
- p++;
- /* Remember the first position in the bracket expression. */
- p1 = p;
- /* Push the number of bytes in the bitmap. */
- BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
- /* Clear the whole map. */
- bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
- /* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) b[-2] == charset_not
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
- SET_LIST_BIT ('n');
- /* Read in characters and ranges, setting map bits. */
- for (;;)
- {
- if (p == pend) return REG_EBRACK;
- PATFETCH (c);
- /* might escape characters inside [...] and [^...]. */
- if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\')
- {
- if (p == pend) return REG_EESCAPE;
- PATFETCH (c1);
- SET_LIST_BIT (c1);
- continue;
- }
- /* Could be the end of the bracket expression. If it's
- not (i.e., when the bracket expression is `[]' so
- far), the ']' character bit gets set way below. */
- if (c == ']' && p != p1 + 1)
- break;
- /* Look ahead to see if it's a range when the last thing
- was a character class. */
- if (had_char_class && c == '-' && *p != ']')
- return REG_ERANGE;
- /* Look ahead to see if it's a range when the last thing
- was a character: if this is a hyphen not at the
- beginning or the end of a list, then it's the range
- operator. */
- if (c == '-'
- && !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
- && *p != ']')
- {
- reg_errcode_t ret
- = compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
- }
- else if (p[0] == '-' && p[1] != ']')
- { /* This handles ranges made up of characters only. */
- reg_errcode_t ret;
- /* Move past the `-'. */
- PATFETCH (c1);
-
- ret = compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
- }
- /* See if we're at the beginning of a possible character
- class. */
- else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
- { /* Leave room for the null. */
- char str[CHAR_CLASS_MAX_LENGTH + 1];
- PATFETCH (c);
- c1 = 0;
- /* If pattern is `[[:'. */
- if (p == pend) return REG_EBRACK;
- for (;;)
- {
- PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
- break;
- str[c1++] = c;
- }
- str[c1] = ' ';
- /* If isn't a word bracketed by `[:' and:`]':
- undo the ending character, the letters, and leave
- the leading `:' and `[' (but set bits for them). */
- if (c == ':' && *p == ']')
- {
- int ch;
- boolean is_alnum = STREQ (str, "alnum");
- boolean is_alpha = STREQ (str, "alpha");
- boolean is_blank = STREQ (str, "blank");
- boolean is_cntrl = STREQ (str, "cntrl");
- boolean is_digit = STREQ (str, "digit");
- boolean is_graph = STREQ (str, "graph");
- boolean is_lower = STREQ (str, "lower");
- boolean is_print = STREQ (str, "print");
- boolean is_punct = STREQ (str, "punct");
- boolean is_space = STREQ (str, "space");
- boolean is_upper = STREQ (str, "upper");
- boolean is_xdigit = STREQ (str, "xdigit");
-
- if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
- /* Throw away the ] at the end of the character
- class. */
- PATFETCH (c);
- if (p == pend) return REG_EBRACK;
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
- {
- if ( (is_alnum && ISALNUM (ch))
- || (is_alpha && ISALPHA (ch))
- || (is_blank && ISBLANK (ch))
- || (is_cntrl && ISCNTRL (ch))
- || (is_digit && ISDIGIT (ch))
- || (is_graph && ISGRAPH (ch))
- || (is_lower && ISLOWER (ch))
- || (is_print && ISPRINT (ch))
- || (is_punct && ISPUNCT (ch))
- || (is_space && ISSPACE (ch))
- || (is_upper && ISUPPER (ch))
- || (is_xdigit && ISXDIGIT (ch)))
- SET_LIST_BIT (ch);
- }
- had_char_class = true;
- }
- else
- {
- c1++;
- while (c1--)
- PATUNFETCH;
- SET_LIST_BIT ('[');
- SET_LIST_BIT (':');
- had_char_class = false;
- }
- }
- else
- {
- had_char_class = false;
- SET_LIST_BIT (c);
- }
- }
- /* Discard any (non)matching list bytes that are all 0 at the
- end of the map. Decrease the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
- }
- break;
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_open;
- else
- goto normal_char;
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_close;
- else
- goto normal_char;
- case 'n':
- if (syntax & RE_NEWLINE_ALT)
- goto handle_alt;
- else
- goto normal_char;
- case '|':
- if (syntax & RE_NO_BK_VBAR)
- goto handle_alt;
- else
- goto normal_char;
- case '{':
- if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
- goto handle_interval;
- else
- goto normal_char;
- case '\':
- if (p == pend) return REG_EESCAPE;
- /* Do not translate the character after the , so that we can
- distinguish, e.g., B from b, even if we normally would
- translate, e.g., B to b. */
- PATFETCH_RAW (c);
- switch (c)
- {
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
- handle_open:
- bufp->re_nsub++;
- regnum++;
- if (COMPILE_STACK_FULL)
- {
- RETALLOC (compile_stack.stack, compile_stack.size << 1,
- compile_stack_elt_t);
- if (compile_stack.stack == NULL) return REG_ESPACE;
- compile_stack.size <<= 1;
- }
- /* These are the values to restore when we hit end of this
- group. They are all relative offsets, so that if the
- whole pattern moves because of realloc, they will still
- be valid. */
- COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
- COMPILE_STACK_TOP.fixup_alt_jump
- = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
- COMPILE_STACK_TOP.regnum = regnum;
- /* We will eventually replace the 0 with the number of
- groups inner to this one. But do not push a
- start_memory for groups beyond the last one we can
- represent in the compiled pattern. */
- if (regnum <= MAX_REGNUM)
- {
- COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
- BUF_PUSH_3 (start_memory, regnum, 0);
- }
-
- compile_stack.avail++;
- fixup_alt_jump = 0;
- laststart = 0;
- begalt = b;
- /* If we've reached MAX_REGNUM groups, then this open
- won't actually generate any code, so we'll have to
- clear pending_exact explicitly. */
- pending_exact = 0;
- break;
- case ')':
- if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
- if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_backslash;
- else
- return REG_ERPAREN;
- handle_close:
- if (fixup_alt_jump)
- { /* Push a dummy failure point at the end of the
- alternative for a possible future
- `pop_failure_jump' to pop. See comments at
- `push_dummy_failure' in `re_match_2'. */
- BUF_PUSH (push_dummy_failure);
-
- /* We allocated space for this jump when we assigned
- to `fixup_alt_jump', in the `handle_alt' case below. */
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
- }
- /* See similar code for backslashed left paren above. */
- if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_char;
- else
- return REG_ERPAREN;
- /* Since we just checked for an empty stack above, this
- ``can't happen''. */
- assert (compile_stack.avail != 0);
- {
- /* We don't just want to restore into `regnum', because
- later groups should continue to be numbered higher,
- as in `(ab)c(de)' -- the second group is #2. */
- regnum_t this_group_regnum;
- compile_stack.avail--;
- begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
- fixup_alt_jump
- = COMPILE_STACK_TOP.fixup_alt_jump
- ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
- : 0;
- laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
- this_group_regnum = COMPILE_STACK_TOP.regnum;
- /* If we've reached MAX_REGNUM groups, then this open
- won't actually generate any code, so we'll have to
- clear pending_exact explicitly. */
- pending_exact = 0;
- /* We're at the end of the group, so now we know how many
- groups were inside this one. */
- if (this_group_regnum <= MAX_REGNUM)
- {
- unsigned char *inner_group_loc
- = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
-
- *inner_group_loc = regnum - this_group_regnum;
- BUF_PUSH_3 (stop_memory, this_group_regnum,
- regnum - this_group_regnum);
- }
- }
- break;
- case '|': /* `|'. */
- if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
- goto normal_backslash;
- handle_alt:
- if (syntax & RE_LIMITED_OPS)
- goto normal_char;
- /* Insert before the previous alternative a jump which
- jumps to this alternative if the former fails. */
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (on_failure_jump, begalt, b + 6);
- pending_exact = 0;
- b += 3;
- /* The alternative before this one has a jump after it
- which gets executed if it gets matched. Adjust that
- jump so it will jump to this alternative's analogous
- jump (put in below, which in turn will jump to the next
- (if any) alternative's such jump, etc.). The last such
- jump jumps to the correct final destination. A picture:
- _____ _____
- | | | |
- | v | v
- a | b | c
- If we are at `b', then fixup_alt_jump right now points to a
- three-byte space after `a'. We'll put in the jump, set
- fixup_alt_jump to right after `b', and leave behind three
- bytes which we'll fill in when we get to after `c'. */
- if (fixup_alt_jump)
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
- /* Mark and leave space for a jump after this alternative,
- to be filled in later either by next alternative or
- when know we're at the end of a series of alternatives. */
- fixup_alt_jump = b;
- GET_BUFFER_SPACE (3);
- b += 3;
- laststart = 0;
- begalt = b;
- break;
- case '{':
- /* If { is a literal. */
- if (!(syntax & RE_INTERVALS)
- /* If we're at `{' and it's not the open-interval
- operator. */
- || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- || (p - 2 == pattern && p == pend))
- goto normal_backslash;
- handle_interval:
- {
- /* If got here, then the syntax allows intervals. */
- /* At least (most) this many matches must be made. */
- int lower_bound = -1, upper_bound = -1;
- beg_interval = p - 1;
- if (p == pend)
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_EBRACE;
- }
- GET_UNSIGNED_NUMBER (lower_bound);
- if (c == ',')
- {
- GET_UNSIGNED_NUMBER (upper_bound);
- if (upper_bound < 0) upper_bound = RE_DUP_MAX;
- }
- else
- /* Interval such as `{1}' => match exactly once. */
- upper_bound = lower_bound;
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound)
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (c != '\') return REG_EBRACE;
- PATFETCH (c);
- }
- if (c != '}')
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
- /* We just parsed a valid interval. */
- /* If it's invalid to have no preceding re. */
- if (!laststart)
- {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (syntax & RE_CONTEXT_INDEP_OPS)
- laststart = b;
- else
- goto unfetch_interval;
- }
- /* If the upper bound is zero, don't want to succeed at
- all; jump from `laststart' to `b + 3', which will be
- the end of the buffer after we insert the jump. */
- if (upper_bound == 0)
- {
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (jump, laststart, b + 3);
- b += 3;
- }
- /* Otherwise, we have a nontrivial interval. When
- we're all done, the pattern will look like:
- set_number_at <jump count> <upper bound>
- set_number_at <succeed_n count> <lower bound>
- succeed_n <after jump addr> <succed_n count>
- <body of loop>
- jump_n <succeed_n addr> <jump count>
- (The upper bound and `jump_n' are omitted if
- `upper_bound' is 1, though.) */
- else
- { /* If the upper bound is > 1, we need to insert
- more at the end of the loop. */
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
- GET_BUFFER_SPACE (nbytes);
- /* Initialize lower bound of the `succeed_n', even
- though it will be set during matching by its
- attendant `set_number_at' (inserted next),
- because `re_compile_fastmap' needs to know.
- Jump to the `jump_n' we might insert below. */
- INSERT_JUMP2 (succeed_n, laststart,
- b + 5 + (upper_bound > 1) * 5,
- lower_bound);
- b += 5;
- /* Code to initialize the lower bound. Insert
- before the `succeed_n'. The `5' is the last two
- bytes of this `set_number_at', plus 3 bytes of
- the following `succeed_n'. */
- insert_op2 (set_number_at, laststart, 5, lower_bound, b);
- b += 5;
- if (upper_bound > 1)
- { /* More than one repetition is allowed, so
- append a backward jump to the `succeed_n'
- that starts this interval.
-
- When we've reached this during matching,
- we'll have matched the interval once, so
- jump back only `upper_bound - 1' times. */
- STORE_JUMP2 (jump_n, b, laststart + 5,
- upper_bound - 1);
- b += 5;
- /* The location we want to set is the second
- parameter of the `jump_n'; that is `b-2' as
- an absolute address. `laststart' will be
- the `set_number_at' we're about to insert;
- `laststart+3' the number to set, the source
- for the relative address. But we are
- inserting into the middle of the pattern --
- so everything is getting moved up by 5.
- Conclusion: (b - 2) - (laststart + 3) + 5,
- i.e., b - laststart.
-
- We insert this at the beginning of the loop
- so that if we fail during matching, we'll
- reinitialize the bounds. */
- insert_op2 (set_number_at, laststart, b - laststart,
- upper_bound - 1, b);
- b += 5;
- }
- }
- pending_exact = 0;
- beg_interval = NULL;
- }
- break;
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- assert (beg_interval);
- p = beg_interval;
- beg_interval = NULL;
- /* normal_char and normal_backslash need `c'. */
- PATFETCH (c);
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (p > pattern && p[-1] == '\')
- goto normal_backslash;
- }
- goto normal_char;
- #ifdef emacs
- /* There is no way to specify the before_dot and after_dot
- operators. rms says this is ok. --karl */
- case '=':
- BUF_PUSH (at_dot);
- break;
- case 's':
- laststart = b;
- PATFETCH (c);
- BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
- break;
- case 'S':
- laststart = b;
- PATFETCH (c);
- BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
- break;
- #endif /* emacs */
- case 'w':
- laststart = b;
- BUF_PUSH (wordchar);
- break;
- case 'W':
- laststart = b;
- BUF_PUSH (notwordchar);
- break;
- case '<':
- BUF_PUSH (wordbeg);
- break;
- case '>':
- BUF_PUSH (wordend);
- break;
- case 'b':
- BUF_PUSH (wordbound);
- break;
- case 'B':
- BUF_PUSH (notwordbound);
- break;
- case '`':
- BUF_PUSH (begbuf);
- break;
- case ''':
- BUF_PUSH (endbuf);
- break;
- case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
- c1 = c - '0';
- if (c1 > regnum)
- return REG_ESUBREG;
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack (compile_stack, c1))
- goto normal_char;
- laststart = b;
- BUF_PUSH_2 (duplicate, c1);
- break;
- case '+':
- case '?':
- if (syntax & RE_BK_PLUS_QM)
- goto handle_plus;
- else
- goto normal_backslash;
- default:
- normal_backslash:
- /* You might think it would be useful for to mean
- not to translate; but if we don't translate it
- it will never match anything. */
- c = TRANSLATE (c);
- goto normal_char;
- }
- break;
- default:
- /* Expects the character in `c'. */
- normal_char:
- /* If no exactn currently being built. */
- if (!pending_exact
- /* If last exactn not at current position. */
- || pending_exact + *pending_exact + 1 != b
-
- /* We have only one byte following the exactn for the count. */
- || *pending_exact == (1 << BYTEWIDTH) - 1
- /* If followed by a repetition operator. */
- || *p == '*' || *p == '^'
- || ((syntax & RE_BK_PLUS_QM)
- ? *p == '\' && (p[1] == '+' || p[1] == '?')
- : (*p == '+' || *p == '?'))
- || ((syntax & RE_INTERVALS)
- && ((syntax & RE_NO_BK_BRACES)
- ? *p == '{'
- : (p[0] == '\' && p[1] == '{'))))
- {
- /* Start building a new exactn. */
-
- laststart = b;
- BUF_PUSH_2 (exactn, 0);
- pending_exact = b - 1;
- }
-
- BUF_PUSH (c);
- (*pending_exact)++;
- break;
- } /* switch (c) */
- } /* while p != pend */
-
- /* Through the pattern now. */
-
- if (fixup_alt_jump)
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
- if (!COMPILE_STACK_EMPTY)
- return REG_EPAREN;
- free (compile_stack.stack);
- /* We have succeeded; set the length of the buffer. */
- bufp->used = b - bufp->buffer;
- #ifdef DEBUG
- if (debug)
- {
- DEBUG_PRINT1 ("nCompiled pattern: ");
- print_compiled_pattern (bufp);
- }
- #endif /* DEBUG */
- return REG_NOERROR;
- } /* regex_compile */
- /* Subroutines for `regex_compile'. */
- /* Store OP at LOC followed by two-byte integer parameter ARG. */
- static void
- store_op1 (
- re_opcode_t op,
- unsigned char *loc,
- int arg)
- {
- *loc = (unsigned char) op;
- STORE_NUMBER (loc + 1, arg);
- }
- /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
- static void
- store_op2 (
- re_opcode_t op,
- unsigned char *loc,
- int arg1, int arg2)
- {
- *loc = (unsigned char) op;
- STORE_NUMBER (loc + 1, arg1);
- STORE_NUMBER (loc + 3, arg2);
- }
- /* Copy the bytes from LOC to END to open up three bytes of space at LOC
- for OP followed by two-byte integer parameter ARG. */
- static void
- insert_op1 (
- re_opcode_t op,
- unsigned char *loc,
- int arg,
- unsigned char *end)
- {
- register unsigned char *pfrom = end;
- register unsigned char *pto = end + 3;
- while (pfrom != loc)
- *--pto = *--pfrom;
-
- store_op1 (op, loc, arg);
- }
- /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
- static void
- insert_op2 (
- re_opcode_t op,
- unsigned char *loc,
- int arg1, int arg2,
- unsigned char *end)
- {
- register unsigned char *pfrom = end;
- register unsigned char *pto = end + 5;
- while (pfrom != loc)
- *--pto = *--pfrom;
-
- store_op2 (op, loc, arg1, arg2);
- }
- /* P points to just after a ^ in PATTERN. Return true if that ^ comes
- after an alternative or a begin-subexpression. We assume there is at
- least one character before the ^. */
- static boolean
- at_begline_loc_p (
- const char *pattern, const char *p,
- reg_syntax_t syntax)
- {
- const char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\';
-
- return
- /* After a subexpression? */
- (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
- /* After an alternative? */
- || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
- }
- /* The dual of at_begline_loc_p. This one is for $. We assume there is
- at least one character after the $, i.e., `P < PEND'. */
- static boolean
- at_endline_loc_p (
- const char *p, const char *pend,
- int syntax)
- {
- const char *next = p;
- boolean next_backslash = *next == '\';
- const char *next_next = p + 1 < pend ? p + 1 : NULL;
-
- return
- /* Before a subexpression? */
- (syntax & RE_NO_BK_PARENS ? *next == ')'
- : next_backslash && next_next && *next_next == ')')
- /* Before an alternative? */
- || (syntax & RE_NO_BK_VBAR ? *next == '|'
- : next_backslash && next_next && *next_next == '|');
- }
- /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
- false if it's not. */
- static boolean
- group_in_compile_stack (
- compile_stack_type compile_stack,
- regnum_t regnum)
- {
- int this_element;
- for (this_element = compile_stack.avail - 1;
- this_element >= 0;
- this_element--)
- if (compile_stack.stack[this_element].regnum == regnum)
- return true;
- return false;
- }
- /* Read the ending character of a range (in a bracket expression) from the
- uncompiled pattern *P_PTR (which ends at PEND). We assume the
- starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
- Then we set the translation of all bits between the starting and
- ending characters (inclusive) in the compiled pattern B.
-
- Return an error code.
-
- We use these short variable names so we can use the same macros as
- `regex_compile' itself. */
- static reg_errcode_t
- compile_range (
- const char **p_ptr, const char *pend,
- char *translate,
- reg_syntax_t syntax,
- unsigned char *b)
- {
- unsigned this_char;
- const char *p = *p_ptr;
- int range_start, range_end;
-
- if (p == pend)
- return REG_ERANGE;
- /* Even though the pattern is a signed `char *', we need to fetch
- with unsigned char *'s; if the high bit of the pattern character
- is set, the range endpoints will be negative if we fetch using a
- signed char *.
- We also want to fetch the endpoints without translating them; the
- appropriate translation is done in the bit-setting loop below. */
- range_start = ((unsigned char *) p)[-2];
- range_end = ((unsigned char *) p)[0];
- /* Have to increment the pointer into the pattern string, so the
- caller isn't still at the ending character. */
- (*p_ptr)++;
- /* If the start is after the end, the range is empty. */
- if (range_start > range_end)
- return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
- /* Here we see why `this_char' has to be larger than an `unsigned
- char' -- the range is inclusive, so if `range_end' == 0xff
- (assuming 8-bit characters), we would otherwise go into an infinite
- loop, since all characters <= 0xff. */
- for (this_char = range_start; this_char <= range_end; this_char++)
- {
- SET_LIST_BIT (TRANSLATE (this_char));
- }
-
- return REG_NOERROR;
- }
- /* Failure stack declarations and macros; both re_compile_fastmap and
- re_match_2 use a failure stack. These have to be macros because of
- REGEX_ALLOCATE. */
-
- /* Number of failure points for which to initially allocate space
- when matching. If this number is exceeded, we allocate more
- space, so it is not a hard limit. */
- #ifndef INIT_FAILURE_ALLOC
- #define INIT_FAILURE_ALLOC 5
- #endif
- /* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_SPACE each time we failed.
- This is a variable only so users of regex can assign to it; we never
- change it ourselves. */
- static int re_max_failures = 2000;
- typedef const unsigned char *fail_stack_elt_t;
- typedef struct
- {
- fail_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } fail_stack_type;
- #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
- #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
- #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
- #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
- /* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
- #define INIT_FAIL_STACK()
- do {
- fail_stack.stack = (fail_stack_elt_t *)
- REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));
-
- if (fail_stack.stack == NULL)
- return -2;
-
- fail_stack.size = INIT_FAILURE_ALLOC;
- fail_stack.avail = 0;
- } while (0)
- /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
- Return 1 if succeeds, and 0 if either ran out of memory
- allocating space for it or it was already too large.
-
- REGEX_REALLOCATE requires `destination' be declared. */
- #define DOUBLE_FAIL_STACK(fail_stack)
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS
- ? 0
- : ((fail_stack).stack = (fail_stack_elt_t *)
- REGEX_REALLOCATE ((fail_stack).stack,
- (fail_stack).size * sizeof (fail_stack_elt_t),
- ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),
-
- (fail_stack).stack == NULL
- ? 0
- : ((fail_stack).size <<= 1,
- 1)))
- /* Push PATTERN_OP on FAIL_STACK.
- Return 1 if was able to do so and 0 if ran out of memory allocating
- space to do so. */
- #define PUSH_PATTERN_OP(pattern_op, fail_stack)
- ((FAIL_STACK_FULL ()
- && !DOUBLE_FAIL_STACK (fail_stack))
- ? 0
- : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,
- 1))
- /* This pushes an item onto the failure stack. Must be a four-byte
- value. Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
- #define PUSH_FAILURE_ITEM(item)
- fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
- /* The complement operation. Assumes `fail_stack' is nonempty. */
- #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
- /* Used to omit pushing failure point id's when we're not debugging. */
- #ifdef DEBUG
- #define DEBUG_PUSH PUSH_FAILURE_ITEM
- #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
- #else
- #define DEBUG_PUSH(item)
- #define DEBUG_POP(item_addr)
- #endif
- /* Push the information about the state we will need
- if we ever fail back to it.
-
- Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- declared.
-
- Does `return FAILURE_CODE' if runs out of memory. */
- #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)
- do {
- char *destination;
- /* Must be int, so when we don't save any registers, the arithmetic
- of 0 + -1 isn't done as unsigned. */
- int this_reg;
-
- DEBUG_STATEMENT (failure_id++);
- DEBUG_STATEMENT (nfailure_points_pushed++);
- DEBUG_PRINT2 ("nPUSH_FAILURE_POINT #%u:n", failure_id);
- DEBUG_PRINT2 (" Before push, next avail: %dn", (fail_stack).avail);
- DEBUG_PRINT2 (" size: %dn", (fail_stack).size);
-
- DEBUG_PRINT2 (" slots needed: %dn", NUM_FAILURE_ITEMS);
- DEBUG_PRINT2 (" available: %dn", REMAINING_AVAIL_SLOTS);
-
- /* Ensure we have enough space allocated for what we will push. */
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)
- {
- if (!DOUBLE_FAIL_STACK (fail_stack))
- return failure_code;
-
- DEBUG_PRINT2 ("n Doubled stack; size now: %dn",
- (fail_stack).size);
- DEBUG_PRINT2 (" slots available: %dn", REMAINING_AVAIL_SLOTS);
- }
-
- /* Push the info, starting with the registers. */
- DEBUG_PRINT1 ("n");
-
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;
- this_reg++)
- {
- DEBUG_PRINT2 (" Pushing reg: %dn", this_reg);
- DEBUG_STATEMENT (num_regs_pushed++);
-
- DEBUG_PRINT2 (" start: 0x%xn", regstart[this_reg]);
- PUSH_FAILURE_ITEM (regstart[this_reg]);
-
- DEBUG_PRINT2 (" end: 0x%xn", regend[this_reg]);
- PUSH_FAILURE_ITEM (regend[this_reg]);
-
- DEBUG_PRINT2 (" info: 0x%xn ", reg_info[this_reg]);
- DEBUG_PRINT2 (" match_null=%d",
- REG_MATCH_NULL_STRING_P (reg_info[this_reg]));
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));
- DEBUG_PRINT2 (" matched_something=%d",
- MATCHED_SOMETHING (reg_info[this_reg]));
- DEBUG_PRINT2 (" ever_matched=%d",
- EVER_MATCHED_SOMETHING (reg_info[this_reg]));
- DEBUG_PRINT1 ("n");
- PUSH_FAILURE_ITEM (reg_info[this_reg].word);
- }
-
- DEBUG_PRINT2 (" Pushing low active reg: %dn", lowest_active_reg);
- PUSH_FAILURE_ITEM (lowest_active_reg);
-
- DEBUG_PRINT2 (" Pushing high active reg: %dn", highest_active_reg);
- PUSH_FAILURE_ITEM (highest_active_reg);
-
- DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place);
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);
- PUSH_FAILURE_ITEM (pattern_place);
-
- DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place);
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,
- size2);
- DEBUG_PRINT1 ("'n");
- PUSH_FAILURE_ITEM (string_place);
-
- DEBUG_PRINT2 (" Pushing failure id: %un", failure_id);
- DEBUG_PUSH (failure_id);
- } while (0)
- /* This is the number of items that are pushed and popped on the stack
- for each register. */
- #define NUM_REG_ITEMS 3
- /* Individual items aside from the registers. */
- #ifdef DEBUG
- #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
- #else
- #define NUM_NONREG_ITEMS 4
- #endif
- /* We push at most this many items on the stack. */
- #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
- /* We actually push this many items. */
- #define NUM_FAILURE_ITEMS
- ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS
- + NUM_NONREG_ITEMS)
- /* How many items can still be added to the stack without overflowing it. */
- #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
- /* Pops what PUSH_FAIL_STACK pushes.
- We restore into the parameters, all of which should be lvalues:
- STR -- the saved data position.
- PAT -- the saved pattern position.
- LOW_REG, HIGH_REG -- the highest and lowest active registers.
- REGSTART, REGEND -- arrays of string positions.
- REG_INFO -- array of information about each subexpression.
-
- Also assumes the variables `fail_stack' and (if debugging), `bufp',
- `pend', `string1', `size1', `string2', and `size2'. */
- #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)
- {