GNUregex.c
上传用户:liugui
上传日期:2007-01-04
资源大小:822k
文件大小:140k
- /*
- * $Id: GNUregex.c,v 1.11 1998/09/23 17:14:20 wessels Exp $
- */
- /* Extended regular expression matching and search library,
- * version 0.12.
- * (Implements POSIX draft P10003.2/D11.2, except for
- * internationalization features.)
- *
- * Copyright (C) 1993 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. */
- /* AIX requires this to be the first thing in the file. */
- #if defined (_AIX) && !defined (REGEX_MALLOC)
- #pragma alloca
- #endif
- #ifndef _GNU_SOURCE
- #define _GNU_SOURCE 1
- #endif
- #include "config.h"
- #if !HAVE_ALLOCA
- #define REGEX_MALLOC 1
- #endif
- /* The `emacs' switch turns on certain matching commands
- * that make sense only in Emacs. */
- #ifdef emacs
- #include "lisp.h"
- #include "buffer.h"
- #include "syntax.h"
- /* Emacs uses `NULL' as a predicate. */
- #undef NULL
- #else /* not emacs */
- /* We used to test for `BSTRING' here, but only GCC and Emacs define
- * `BSTRING', as far as I know, and neither of them use this code. */
- #if HAVE_STRING_H || STDC_HEADERS
- #include <string.h>
- #else
- #include <strings.h>
- #endif
- #ifdef STDC_HEADERS
- #include <stdlib.h>
- #else
- char *malloc();
- char *realloc();
- #endif
- /* Define the syntax stuff for <, >, etc. */
- /* This must be nonzero for the wordchar and notwordchar pattern
- * commands in re_match_2. */
- #ifndef Sword
- #define Sword 1
- #endif
- #ifdef SYNTAX_TABLE
- extern char *re_syntax_table;
- #else /* not SYNTAX_TABLE */
- /* How many characters in the character set. */
- #define CHAR_SET_SIZE 256
- static char re_syntax_table[CHAR_SET_SIZE];
- static void
- init_syntax_once()
- {
- register int c;
- static int done = 0;
- if (done)
- return;
- memset(re_syntax_table, 0, sizeof re_syntax_table);
- for (c = 'a'; c <= 'z'; c++)
- re_syntax_table[c] = Sword;
- for (c = 'A'; c <= 'Z'; c++)
- re_syntax_table[c] = Sword;
- for (c = '0'; c <= '9'; c++)
- re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
- done = 1;
- }
- #endif /* not SYNTAX_TABLE */
- #define SYNTAX(c) re_syntax_table[c]
- #endif /* not emacs */
- /* Get the interface, including the syntax bits. */
- #include "GNUregex.h"
- /* isalpha etc. are used for the character classes. */
- #include <ctype.h>
- #ifndef isascii
- #define isascii(c) 1
- #endif
- #ifdef isblank
- #define ISBLANK(c) (isascii (c) && isblank (c))
- #else
- #define ISBLANK(c) ((c) == ' ' || (c) == 't')
- #endif
- #ifdef isgraph
- #define ISGRAPH(c) (isascii (c) && isgraph (c))
- #else
- #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
- #endif
- #define ISPRINT(c) (isascii (c) && isprint (c))
- #define ISDIGIT(c) (isascii (c) && isdigit (c))
- #define ISALNUM(c) (isascii (c) && isalnum (c))
- #define ISALPHA(c) (isascii (c) && isalpha (c))
- #define ISCNTRL(c) (isascii (c) && iscntrl (c))
- #define ISLOWER(c) (isascii (c) && islower (c))
- #define ISPUNCT(c) (isascii (c) && ispunct (c))
- #define ISSPACE(c) (isascii (c) && isspace (c))
- #define ISUPPER(c) (isascii (c) && isupper (c))
- #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
- #ifndef NULL
- #define NULL 0
- #endif
- /* We remove any previous definition of `SIGN_EXTEND_CHAR',
- * since ours (we hope) works properly with all combinations of
- * machines, compilers, `char' and `unsigned char' argument types.
- * (Per Bothner suggested the basic approach.) */
- #undef SIGN_EXTEND_CHAR
- #ifdef __STDC__
- #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
- #else /* not __STDC__ */
- /* As in Harbison and Steele. */
- #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
- #endif
- /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- * use `alloca' instead of `malloc'. This is because using malloc in
- * re_search* or re_match* could cause memory leaks when C-g is used in
- * Emacs; also, malloc is slower and causes storage fragmentation. On
- * the other hand, malloc is more portable, and easier to debug.
- *
- * Because we sometimes use alloca, some routines have to be macros,
- * not functions -- `alloca'-allocated space disappears at the end of the
- * function it is called in. */
- #ifdef REGEX_MALLOC
- #define REGEX_ALLOCATE malloc
- #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
- #else /* not REGEX_MALLOC */
- /* Emacs already defines alloca, sometimes. */
- #ifndef alloca
- /* Make alloca work the best possible way. */
- #ifdef __GNUC__
- #define alloca __builtin_alloca
- #else /* not __GNUC__ */
- #if HAVE_ALLOCA_H
- #include <alloca.h>
- #else /* not __GNUC__ or HAVE_ALLOCA_H */
- #ifndef _AIX /* Already did AIX, up at the top. */
- char *alloca();
- #endif /* not _AIX */
- #endif /* not HAVE_ALLOCA_H */
- #endif /* not __GNUC__ */
- #endif /* not alloca */
- #define REGEX_ALLOCATE alloca
- /* Assumes a `char *destination' variable. */
- #define REGEX_REALLOCATE(source, osize, nsize)
- (destination = (char *) alloca (nsize),
- xmemcpy (destination, source, osize),
- destination)
- #endif /* not REGEX_MALLOC */
- /* True if `size1' is non-NULL and PTR is pointing anywhere inside
- * `string1' or just past its end. This works if PTR is NULL, which is
- * a good thing. */
- #define FIRST_STRING_P(ptr)
- (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
- /* (Re)Allocate N items of type T using malloc, or fail. */
- #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
- #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
- #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
- #define BYTEWIDTH 8 /* In bits. */
- #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
- typedef char boolean;
- #define false 0
- #define true 1
- /* These are the command codes that appear in compiled regular
- * expressions. Some opcodes are followed by argument bytes. A
- * command code can specify any interpretation whatsoever for its
- * arguments. Zero bytes may appear in the compiled regular expression.
- *
- * The value of `exactn' is needed in search.c (search_buffer) in Emacs.
- * So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
- * `exactn' we use here must also be 1. */
- typedef enum {
- no_op = 0,
- /* Followed by one byte giving n, then by n literal bytes. */
- exactn = 1,
- /* Matches any (more or less) character. */
- anychar,
- /* Matches any one char belonging to specified set. First
- * following byte is number of bitmap bytes. Then come bytes
- * for a bitmap saying which chars are in. Bits in each byte
- * are ordered low-bit-first. A character is in the set if its
- * bit is 1. A character too large to have a bit in the map is
- * automatically not in the set. */
- charset,
- /* Same parameters as charset, but match any character that is
- * not one of those specified. */
- charset_not,
- /* Start remembering the text that is matched, for storing in a
- * register. Followed by one byte with the register number, in
- * the range 0 to one less than the pattern buffer's re_nsub
- * field. Then followed by one byte with the number of groups
- * inner to this one. (This last has to be part of the
- * start_memory only because we need it in the on_failure_jump
- * of re_match_2.) */
- start_memory,
- /* Stop remembering the text that is matched and store it in a
- * memory register. Followed by one byte with the register
- * number, in the range 0 to one less than `re_nsub' in the
- * pattern buffer, and one byte with the number of inner groups,
- * just like `start_memory'. (We need the number of inner
- * groups here because we don't have any easy way of finding the
- * corresponding start_memory when we're at a stop_memory.) */
- stop_memory,
- /* Match a duplicate of something remembered. Followed by one
- * byte containing the register number. */
- duplicate,
- /* Fail unless at beginning of line. */
- begline,
- /* Fail unless at end of line. */
- endline,
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- * of string to be matched (if not). */
- begbuf,
- /* Analogously, for end of buffer/string. */
- endbuf,
- /* Followed by two byte relative address to which to jump. */
- jump,
- /* Same as jump, but marks the end of an alternative. */
- jump_past_alt,
- /* Followed by two-byte relative address of place to resume at
- * in case of failure. */
- on_failure_jump,
- /* Like on_failure_jump, but pushes a placeholder instead of the
- * current string position when executed. */
- on_failure_keep_string_jump,
- /* Throw away latest failure point and then jump to following
- * two-byte relative address. */
- pop_failure_jump,
- /* Change to pop_failure_jump if know won't have to backtrack to
- * match; otherwise change to jump. This is used to jump
- * back to the beginning of a repeat. If what follows this jump
- * clearly won't match what the repeat does, such that we can be
- * sure that there is no use backtracking out of repetitions
- * already matched, then we change it to a pop_failure_jump.
- * Followed by two-byte address. */
- maybe_pop_jump,
- /* Jump to following two-byte address, and push a dummy failure
- * point. This failure point will be thrown away if an attempt
- * is made to use it for a failure. A `+' construct makes this
- * before the first repeat. Also used as an intermediary kind
- * of jump when compiling an alternative. */
- dummy_failure_jump,
- /* Push a dummy failure point and continue. Used at the end of
- * alternatives. */
- push_dummy_failure,
- /* Followed by two-byte relative address and two-byte number n.
- * After matching N times, jump to the address upon failure. */
- succeed_n,
- /* Followed by two-byte relative address, and two-byte number n.
- * Jump to the address N times, then fail. */
- jump_n,
- /* Set the following two-byte relative address to the
- * subsequent two-byte number. The address *includes* the two
- * bytes of number. */
- set_number_at,
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
- wordbeg, /* Succeeds if at word beginning. */
- wordend, /* Succeeds if at word end. */
- wordbound, /* Succeeds if at a word boundary. */
- notwordbound /* Succeeds if not at a word boundary. */
- #ifdef emacs
- ,before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
- /* Matches any character whose syntax is specified. Followed by
- * a byte which contains a syntax code, e.g., Sword. */
- syntaxspec,
- /* Matches any character whose syntax is not that specified. */
- notsyntaxspec
- #endif /* emacs */
- } re_opcode_t;
- /* Common operations on the compiled pattern. */
- /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
- #define STORE_NUMBER(destination, number)
- do {
- (destination)[0] = (number) & 0377;
- (destination)[1] = (number) >> 8;
- } while (0)
- /* Same as STORE_NUMBER, except increment DESTINATION to
- * the byte after where the number is stored. Therefore, DESTINATION
- * must be an lvalue. */
- #define STORE_NUMBER_AND_INCR(destination, number)
- do {
- STORE_NUMBER (destination, number);
- (destination) += 2;
- } while (0)
- /* Put into DESTINATION a number stored in two contiguous bytes starting
- * at SOURCE. */
- #define EXTRACT_NUMBER(destination, source)
- do {
- (destination) = *(source) & 0377;
- (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;
- } while (0)
- #ifdef DEBUG
- static void
- extract_number(dest, source)
- int *dest;
- unsigned char *source;
- {
- int temp = SIGN_EXTEND_CHAR(*(source + 1));
- *dest = *source & 0377;
- *dest += temp << 8;
- }
- #ifndef EXTRACT_MACROS /* To debug the macros. */
- #undef EXTRACT_NUMBER
- #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
- #endif /* not EXTRACT_MACROS */
- #endif /* DEBUG */
- /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
- * SOURCE must be an lvalue. */
- #define EXTRACT_NUMBER_AND_INCR(destination, source)
- do {
- EXTRACT_NUMBER (destination, source);
- (source) += 2;
- } while (0)
- #ifdef DEBUG
- static void
- extract_number_and_incr(destination, source)
- int *destination;
- unsigned char **source;
- {
- extract_number(destination, *source);
- *source += 2;
- }
- #ifndef EXTRACT_MACROS
- #undef EXTRACT_NUMBER_AND_INCR
- #define EXTRACT_NUMBER_AND_INCR(dest, src)
- extract_number_and_incr (&dest, &src)
- #endif /* not EXTRACT_MACROS */
- #endif /* DEBUG */
- /* If DEBUG is defined, Regex prints many voluminous messages about what
- * it is doing (if the variable `debug' is nonzero). If linked with the
- * main program in `iregex.c', you can enter patterns and strings
- * interactively. And if linked with the main program in `main.c' and
- * the other test files, you can run the already-written tests. */
- #ifdef DEBUG
- /* We use standard I/O for debugging. */
- #include <stdio.h>
- /* It is useful to test things that ``must'' be true when debugging. */
- #include <assert.h>
- static int debug = 0;
- #define DEBUG_STATEMENT(e) e
- #define DEBUG_PRINT1(x) if (debug) printf (x)
- #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
- #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
- #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
- #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
- if (debug) print_partial_compiled_pattern (s, e)
- #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
- if (debug) print_double_string (w, s1, sz1, s2, sz2)
- extern void printchar();
- /* Print the fastmap in human-readable form. */
- void
- print_fastmap(fastmap)
- char *fastmap;
- {
- unsigned was_a_range = 0;
- unsigned i = 0;
- while (i < (1 << BYTEWIDTH)) {
- if (fastmap[i++]) {
- was_a_range = 0;
- printchar(i - 1);
- while (i < (1 << BYTEWIDTH) && fastmap[i]) {
- was_a_range = 1;
- i++;
- }
- if (was_a_range) {
- printf("-");
- printchar(i - 1);
- }
- }
- }
- putchar('n');
- }
- /* Print a compiled pattern string in human-readable form, starting at
- * the START pointer into it and ending just before the pointer END. */
- void
- print_partial_compiled_pattern(start, end)
- unsigned char *start;
- unsigned char *end;
- {
- int mcnt, mcnt2;
- unsigned char *p = start;
- unsigned char *pend = end;
- if (start == NULL) {
- printf("(null)n");
- return;
- }
- /* Loop over pattern commands. */
- while (p < pend) {
- switch ((re_opcode_t) * p++) {
- case no_op:
- printf("/no_op");
- break;
- case exactn:
- mcnt = *p++;
- printf("/exactn/%d", mcnt);
- do {
- putchar('/');
- printchar(*p++);
- }
- while (--mcnt);
- break;
- case start_memory:
- mcnt = *p++;
- printf("/start_memory/%d/%d", mcnt, *p++);
- break;
- case stop_memory:
- mcnt = *p++;
- printf("/stop_memory/%d/%d", mcnt, *p++);
- break;
- case duplicate:
- printf("/duplicate/%d", *p++);
- break;
- case anychar:
- printf("/anychar");
- break;
- case charset:
- case charset_not:
- {
- register int c;
- printf("/charset%s",
- (re_opcode_t) * (p - 1) == charset_not ? "_not" : "");
- assert(p + *p < pend);
- for (c = 0; c < *p; c++) {
- unsigned bit;
- unsigned char map_byte = p[1 + c];
- putchar('/');
- for (bit = 0; bit < BYTEWIDTH; bit++)
- if (map_byte & (1 << bit))
- printchar(c * BYTEWIDTH + bit);
- }
- p += 1 + *p;
- break;
- }
- case begline:
- printf("/begline");
- break;
- case endline:
- printf("/endline");
- break;
- case on_failure_jump:
- extract_number_and_incr(&mcnt, &p);
- printf("/on_failure_jump/0/%d", mcnt);
- break;
- case on_failure_keep_string_jump:
- extract_number_and_incr(&mcnt, &p);
- printf("/on_failure_keep_string_jump/0/%d", mcnt);
- break;
- case dummy_failure_jump:
- extract_number_and_incr(&mcnt, &p);
- printf("/dummy_failure_jump/0/%d", mcnt);
- break;
- case push_dummy_failure:
- printf("/push_dummy_failure");
- break;
- case maybe_pop_jump:
- extract_number_and_incr(&mcnt, &p);
- printf("/maybe_pop_jump/0/%d", mcnt);
- break;
- case pop_failure_jump:
- extract_number_and_incr(&mcnt, &p);
- printf("/pop_failure_jump/0/%d", mcnt);
- break;
- case jump_past_alt:
- extract_number_and_incr(&mcnt, &p);
- printf("/jump_past_alt/0/%d", mcnt);
- break;
- case jump:
- extract_number_and_incr(&mcnt, &p);
- printf("/jump/0/%d", mcnt);
- break;
- case succeed_n:
- extract_number_and_incr(&mcnt, &p);
- extract_number_and_incr(&mcnt2, &p);
- printf("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
- break;
- case jump_n:
- extract_number_and_incr(&mcnt, &p);
- extract_number_and_incr(&mcnt2, &p);
- printf("/jump_n/0/%d/0/%d", mcnt, mcnt2);
- break;
- case set_number_at:
- extract_number_and_incr(&mcnt, &p);
- extract_number_and_incr(&mcnt2, &p);
- printf("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
- break;
- case wordbound:
- printf("/wordbound");
- break;
- case notwordbound:
- printf("/notwordbound");
- break;
- case wordbeg:
- printf("/wordbeg");
- break;
- case wordend:
- printf("/wordend");
- #ifdef emacs
- case before_dot:
- printf("/before_dot");
- break;
- case at_dot:
- printf("/at_dot");
- break;
- case after_dot:
- printf("/after_dot");
- break;
- case syntaxspec:
- printf("/syntaxspec");
- mcnt = *p++;
- printf("/%d", mcnt);
- break;
- case notsyntaxspec:
- printf("/notsyntaxspec");
- mcnt = *p++;
- printf("/%d", mcnt);
- break;
- #endif /* emacs */
- case wordchar:
- printf("/wordchar");
- break;
- case notwordchar:
- printf("/notwordchar");
- break;
- case begbuf:
- printf("/begbuf");
- break;
- case endbuf:
- printf("/endbuf");
- break;
- default:
- printf("?%d", *(p - 1));
- }
- }
- printf("/n");
- }
- void
- print_compiled_pattern(bufp)
- struct re_pattern_buffer *bufp;
- {
- unsigned char *buffer = bufp->buffer;
- print_partial_compiled_pattern(buffer, buffer + bufp->used);
- printf("%d bytes used/%d bytes allocated.n", bufp->used, bufp->allocated);
- if (bufp->fastmap_accurate && bufp->fastmap) {
- printf("fastmap: ");
- print_fastmap(bufp->fastmap);
- }
- printf("re_nsub: %dt", bufp->re_nsub);
- printf("regs_alloc: %dt", bufp->regs_allocated);
- printf("can_be_null: %dt", bufp->can_be_null);
- printf("newline_anchor: %dn", bufp->newline_anchor);
- printf("no_sub: %dt", bufp->no_sub);
- printf("not_bol: %dt", bufp->not_bol);
- printf("not_eol: %dt", bufp->not_eol);
- printf("syntax: %dn", bufp->syntax);
- /* Perhaps we should print the translate table? */
- }
- void
- print_double_string(where, string1, size1, string2, size2)
- const char *where;
- const char *string1;
- const char *string2;
- int size1;
- int size2;
- {
- unsigned this_char;
- if (where == NULL)
- printf("(null)");
- else {
- if (FIRST_STRING_P(where)) {
- for (this_char = where - string1; this_char < size1; this_char++)
- printchar(string1[this_char]);
- where = string2;
- }
- for (this_char = where - string2; this_char < size2; this_char++)
- printchar(string2[this_char]);
- }
- }
- #else /* not DEBUG */
- #undef assert
- #define assert(e)
- #define DEBUG_STATEMENT(e)
- #define DEBUG_PRINT1(x)
- #define DEBUG_PRINT2(x1, x2)
- #define DEBUG_PRINT3(x1, x2, x3)
- #define DEBUG_PRINT4(x1, x2, x3, x4)
- #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
- #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
- #endif /* not DEBUG */
- /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- * also be assigned to arbitrarily: each pattern buffer stores its own
- * syntax, so it can be changed between regex compilations. */
- reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
- /* Specify the precise syntax of regexps for compilation. This provides
- * for compatibility for various utilities which historically have
- * different, incompatible syntaxes.
- *
- * The argument SYNTAX is a bit mask comprised of the various bits
- * defined in regex.h. We return the old syntax. */
- reg_syntax_t
- re_set_syntax(syntax)
- reg_syntax_t syntax;
- {
- reg_syntax_t ret = re_syntax_options;
- re_syntax_options = syntax;
- return ret;
- }
- /* This table gives an error message for each of the error codes listed
- * in regex.h. Obviously the order here has to be same as there. */
- static const char *re_error_msg[] =
- {NULL, /* REG_NOERROR */
- "No match", /* REG_NOMATCH */
- "Invalid regular expression", /* REG_BADPAT */
- "Invalid collation character", /* REG_ECOLLATE */
- "Invalid character class name", /* REG_ECTYPE */
- "Trailing backslash", /* REG_EESCAPE */
- "Invalid back reference", /* REG_ESUBREG */
- "Unmatched [ or [^", /* REG_EBRACK */
- "Unmatched ( or \(", /* REG_EPAREN */
- "Unmatched \{", /* REG_EBRACE */
- "Invalid content of \{\}", /* REG_BADBR */
- "Invalid range end", /* REG_ERANGE */
- "Memory exhausted", /* REG_ESPACE */
- "Invalid preceding regular expression", /* REG_BADRPT */
- "Premature end of regular expression", /* REG_EEND */
- "Regular expression too big", /* REG_ESIZE */
- "Unmatched ) or \)", /* REG_ERPAREN */
- };
- /* Subroutine declarations and macros for regex_compile. */
- static void store_op1(), store_op2();
- static void insert_op1(), insert_op2();
- static boolean at_begline_loc_p(), at_endline_loc_p();
- static boolean group_in_compile_stack();
- static reg_errcode_t compile_range();
- /* Fetch the next character in the uncompiled pattern---translating it
- * if necessary. Also cast from a signed character in the constant
- * string passed to us by the user to an unsigned char that we can use
- * as an array index (in, e.g., `translate'). */
- #define PATFETCH(c)
- do {if (p == pend) return REG_EEND;
- c = (unsigned char) *p++;
- if (translate) c = translate[c];
- } while (0)
- /* Fetch the next character in the uncompiled pattern, with no
- * translation. */
- #define PATFETCH_RAW(c)
- do {if (p == pend) return REG_EEND;
- c = (unsigned char) *p++;
- } while (0)
- /* Go backwards one character in the pattern. */
- #define PATUNFETCH p--
- /* If `translate' is non-null, return translate[D], else just D. We
- * cast the subscript to translate because some data is declared as
- * `char *', to avoid warnings when a string constant is passed. But
- * when we use a character as a subscript we must make it unsigned. */
- #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
- /* Macros for outputting the compiled pattern into `buffer'. */
- /* If the buffer isn't allocated when it comes in, use this. */
- #define INIT_BUF_SIZE 32
- /* Make sure we have at least N more bytes of space in buffer. */
- #define GET_BUFFER_SPACE(n)
- while (b - bufp->buffer + (n) > bufp->allocated)
- EXTEND_BUFFER ()
- /* Make sure we have one more byte of buffer space and then add C to it. */
- #define BUF_PUSH(c)
- do {
- GET_BUFFER_SPACE (1);
- *b++ = (unsigned char) (c);
- } while (0)
- /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
- #define BUF_PUSH_2(c1, c2)
- do {
- GET_BUFFER_SPACE (2);
- *b++ = (unsigned char) (c1);
- *b++ = (unsigned char) (c2);
- } while (0)
- /* As with BUF_PUSH_2, except for three bytes. */
- #define BUF_PUSH_3(c1, c2, c3)
- do {
- GET_BUFFER_SPACE (3);
- *b++ = (unsigned char) (c1);
- *b++ = (unsigned char) (c2);
- *b++ = (unsigned char) (c3);
- } while (0)
- /* Store a jump with opcode OP at LOC to location TO. We store a
- * relative address offset by the three bytes the jump itself occupies. */
- #define STORE_JUMP(op, loc, to)
- store_op1 (op, loc, (to) - (loc) - 3)
- /* Likewise, for a two-argument jump. */
- #define STORE_JUMP2(op, loc, to, arg)
- store_op2 (op, loc, (to) - (loc) - 3, arg)
- /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
- #define INSERT_JUMP(op, loc, to)
- insert_op1 (op, loc, (to) - (loc) - 3, b)
- /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
- #define INSERT_JUMP2(op, loc, to, arg)
- insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
- /* This is not an arbitrary limit: the arguments which represent offsets
- * into the pattern are two bytes long. So if 2^16 bytes turns out to
- * be too small, many things would have to change. */
- #define MAX_BUF_SIZE (1L << 16)
- /* Extend the buffer by twice its current size via realloc and
- * reset the pointers that pointed into the old block to point to the
- * correct places in the new one. If extending the buffer results in it
- * being larger than MAX_BUF_SIZE, then flag memory exhausted. */
- #define EXTEND_BUFFER()
- do {
- unsigned char *old_buffer = bufp->buffer;
- if (bufp->allocated == MAX_BUF_SIZE)
- return REG_ESIZE;
- bufp->allocated <<= 1;
- if (bufp->allocated > MAX_BUF_SIZE)
- bufp->allocated = MAX_BUF_SIZE;
- bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);
- if (bufp->buffer == NULL)
- return REG_ESPACE;
- /* If the buffer moved, move all the pointers into it. */
- if (old_buffer != bufp->buffer)
- {
- b = (b - old_buffer) + bufp->buffer;
- begalt = (begalt - old_buffer) + bufp->buffer;
- if (fixup_alt_jump)
- fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;
- if (laststart)
- laststart = (laststart - old_buffer) + bufp->buffer;
- if (pending_exact)
- pending_exact = (pending_exact - old_buffer) + bufp->buffer;
- }
- } while (0)
- /* Since we have one byte reserved for the register number argument to
- * {start,stop}_memory, the maximum number of groups we can report
- * things about is what fits in that byte. */
- #define MAX_REGNUM 255
- /* But patterns can have more than `MAX_REGNUM' registers. We just
- * ignore the excess. */
- typedef unsigned regnum_t;
- /* Macros for the compile stack. */
- /* Since offsets can go either forwards or backwards, this type needs to
- * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
- typedef int pattern_offset_t;
- typedef struct {
- pattern_offset_t begalt_offset;
- pattern_offset_t fixup_alt_jump;
- pattern_offset_t inner_group_offset;
- pattern_offset_t laststart_offset;
- regnum_t regnum;
- } compile_stack_elt_t;
- typedef struct {
- compile_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } compile_stack_type;
- #define INIT_COMPILE_STACK_SIZE 32
- #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
- #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
- /* The next available element. */
- #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
- /* Set the bit for character C in a list. */
- #define SET_LIST_BIT(c)
- (b[((unsigned char) (c)) / BYTEWIDTH]
- |= 1 << (((unsigned char) c) % BYTEWIDTH))
- /* Get the next unsigned number in the uncompiled pattern. */
- #define GET_UNSIGNED_NUMBER(num)
- { if (p != pend)
- {
- PATFETCH (c);
- while (ISDIGIT (c))
- {
- if (num < 0)
- num = 0;
- num = num * 10 + c - '0';
- if (p == pend)
- break;
- PATFETCH (c);
- }
- }
- }
- #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
- #define IS_CHAR_CLASS(string)
- (STREQ (string, "alpha") || STREQ (string, "upper")
- || STREQ (string, "lower") || STREQ (string, "digit")
- || STREQ (string, "alnum") || STREQ (string, "xdigit")
- || STREQ (string, "space") || STREQ (string, "print")
- || STREQ (string, "punct") || STREQ (string, "graph")
- || STREQ (string, "cntrl") || STREQ (string, "blank"))
- /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
- * Returns one of error codes defined in `regex.h', or zero for success.
- *
- * Assumes the `allocated' (and perhaps `buffer') and `translate'
- * fields are set in BUFP on entry.
- *
- * If it succeeds, results are put in BUFP (if it returns an error, the
- * contents of BUFP are undefined):
- * `buffer' is the compiled pattern;
- * `syntax' is set to SYNTAX;
- * `used' is set to the length of the compiled pattern;
- * `fastmap_accurate' is zero;
- * `re_nsub' is the number of subexpressions in PATTERN;
- * `not_bol' and `not_eol' are zero;
- *
- * The `fastmap' and `newline_anchor' fields are neither
- * examined nor set. */
- static reg_errcode_t
- regex_compile(pattern, size, syntax, bufp)
- const char *pattern;
- int size;
- reg_syntax_t syntax;
- struct re_pattern_buffer *bufp;
- {
- /* We fetch characters from PATTERN here. Even though PATTERN is
- * `char *' (i.e., signed), we declare these variables as unsigned, so
- * they can be reliably used as array indices. */
- register unsigned char c, c1;
- /* A random tempory spot in PATTERN. */
- const char *p1;
- /* Points to the end of the buffer, where we should append. */
- register unsigned char *b;
- /* Keeps track of unclosed groups. */
- compile_stack_type compile_stack;
- /* Points to the current (ending) position in the pattern. */
- const char *p = pattern;
- const char *pend = pattern + size;
- /* How to translate the characters in the pattern. */
- char *translate = bufp->translate;
- /* Address of the count-byte of the most recently inserted `exactn'
- * command. This makes it possible to tell if a new exact-match
- * character can be added to that command or if the character requires
- * a new `exactn' command. */
- unsigned char *pending_exact = 0;
- /* Address of start of the most recently finished expression.
- * This tells, e.g., postfix * where to find the start of its
- * operand. Reset at the beginning of groups and alternatives. */
- unsigned char *laststart = 0;
- /* Address of beginning of regexp, or inside of last group. */
- unsigned char *begalt;
- /* Place in the uncompiled pattern (i.e., the {) to
- * which to go back if the interval is invalid. */
- const char *beg_interval;
- /* Address of the place where a forward jump should go to the end of
- * the containing expression. Each alternative of an `or' -- except the
- * last -- ends with a forward jump of this sort. */
- unsigned char *fixup_alt_jump = 0;
- /* Counts open-groups as they are encountered. Remembered for the
- * matching close-group on the compile stack, so the same register
- * number is put in the stop_memory as the start_memory. */
- regnum_t regnum = 0;
- #ifdef DEBUG
- DEBUG_PRINT1("nCompiling pattern: ");
- if (debug) {
- unsigned debug_count;
- for (debug_count = 0; debug_count < size; debug_count++)
- printchar(pattern[debug_count]);
- putchar('n');
- }
- #endif /* DEBUG */
- /* Initialize the compile stack. */
- compile_stack.stack = TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
- compile_stack.avail = 0;
- /* Initialize the pattern buffer. */
- bufp->syntax = syntax;
- bufp->fastmap_accurate = 0;
- bufp->not_bol = bufp->not_eol = 0;
- /* Set `used' to zero, so that if we return an error, the pattern
- * printer (for debugging) will think there's no pattern. We reset it
- * at the end. */
- bufp->used = 0;
- /* Always count groups, whether or not bufp->no_sub is set. */
- bufp->re_nsub = 0;
- #if !defined (emacs) && !defined (SYNTAX_TABLE)
- /* Initialize the syntax table. */
- init_syntax_once();
- #endif
- if (bufp->allocated == 0) {
- if (bufp->buffer) { /* If zero allocated, but buffer is non-null, try to realloc
- * enough space. This loses if buffer's address is bogus, but
- * that is the user's responsibility. */
- RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
- } else { /* Caller did not allocate a buffer. Do it for them. */
- bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
- }
- if (!bufp->buffer)
- return REG_ESPACE;
- bufp->allocated = INIT_BUF_SIZE;
- }
- begalt = b = bufp->buffer;
- /* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend) {
- PATFETCH(c);
- switch (c) {
- case '^':
- {
- if ( /* If at start of pattern, it's an operator. */
- p == pattern + 1
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's come before. */
- || at_begline_loc_p(pattern, p, syntax))
- BUF_PUSH(begline);
- else
- goto normal_char;
- }
- break;
- case '$':
- {
- if ( /* If at end of pattern, it's an operator. */
- p == pend
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's next. */
- || at_endline_loc_p(p, pend, syntax))
- BUF_PUSH(endline);
- else
- goto normal_char;
- }
- break;
- case '+':
- case '?':
- if ((syntax & RE_BK_PLUS_QM)
- || (syntax & RE_LIMITED_OPS))
- goto normal_char;
- handle_plus:
- case '*':
- /* If there is no previous pattern... */
- if (!laststart) {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
- } {
- /* Are we optimizing this jump? */
- boolean keep_string_p = false;
- /* 1 means zero (many) matches is allowed. */
- char zero_times_ok = 0, many_times_ok = 0;
- /* If there is a sequence of repetition chars, collapse it
- * down to just one (the right one). We can't combine
- * interval operators with these because of, e.g., `a{2}*',
- * which should only match an even number of `a's. */
- for (;;) {
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
- if (p == pend)
- break;
- PATFETCH(c);
- if (c == '*'
- || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')));
- else if (syntax & RE_BK_PLUS_QM && c == '\') {
- if (p == pend)
- return REG_EESCAPE;
- PATFETCH(c1);
- if (!(c1 == '+' || c1 == '?')) {
- PATUNFETCH;
- PATUNFETCH;
- break;
- }
- c = c1;
- } else {
- PATUNFETCH;
- break;
- }
- /* If we get here, we found another repeat character. */
- }
- /* Star, etc. applied to an empty pattern is equivalent
- * to an empty pattern. */
- if (!laststart)
- break;
- /* Now we know whether or not zero matches is allowed
- * and also whether or not two or more matches is allowed. */
- if (many_times_ok) { /* More than one repetition is allowed, so put in at the
- * end a backward relative jump from `b' to before the next
- * jump we're going to put in below (which jumps from
- * laststart to after this jump).
- *
- * But if we are at the `*' in the exact sequence `.*n',
- * insert an unconditional jump backwards to the .,
- * instead of the beginning of the loop. This way we only
- * push a failure point once, instead of every time
- * through the loop. */
- assert(p - 1 > pattern);
- /* Allocate the space for the jump. */
- GET_BUFFER_SPACE(3);
- /* We know we are not at the first character of the pattern,
- * because laststart was nonzero. And we've already
- * incremented `p', by the way, to be the character after
- * the `*'. Do we have to do something analogous here
- * for null bytes, because of RE_DOT_NOT_NULL? */
- if (TRANSLATE(*(p - 2)) == TRANSLATE('.')
- && zero_times_ok
- && p < pend && TRANSLATE(*p) == TRANSLATE('n')
- && !(syntax & RE_DOT_NEWLINE)) { /* We have .*n. */
- STORE_JUMP(jump, b, laststart);
- keep_string_p = true;
- } else
- /* Anything else. */
- STORE_JUMP(maybe_pop_jump, b, laststart - 3);
- /* We've added more stuff to the buffer. */
- b += 3;
- }
- /* On failure, jump from laststart to b + 3, which will be the
- * end of the buffer after this jump is inserted. */
- GET_BUFFER_SPACE(3);
- INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump
- : on_failure_jump,
- laststart, b + 3);
- pending_exact = 0;
- b += 3;
- if (!zero_times_ok) {
- /* At least one repetition is required, so insert a
- * `dummy_failure_jump' before the initial
- * `on_failure_jump' instruction of the loop. This
- * effects a skip over that instruction the first time
- * we hit that loop. */
- GET_BUFFER_SPACE(3);
- INSERT_JUMP(dummy_failure_jump, laststart, laststart + 6);
- b += 3;
- }
- }
- break;
- case '.':
- laststart = b;
- BUF_PUSH(anychar);
- break;
- case '[':
- {
- boolean had_char_class = false;
- if (p == pend)
- return REG_EBRACK;
- /* Ensure that we have enough space to push a charset: the
- * opcode, the length count, and the bitset; 34 bytes in all. */
- GET_BUFFER_SPACE(34);
- laststart = b;
- /* We test `*p == '^' twice, instead of using an if
- * statement, so we only need one BUF_PUSH. */
- BUF_PUSH(*p == '^' ? charset_not : charset);
- if (*p == '^')
- p++;
- /* Remember the first position in the bracket expression. */
- p1 = p;
- /* Push the number of bytes in the bitmap. */
- BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
- /* Clear the whole map. */
- memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
- /* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) b[-2] == charset_not
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
- SET_LIST_BIT('n');
- /* Read in characters and ranges, setting map bits. */
- for (;;) {
- if (p == pend)
- return REG_EBRACK;
- PATFETCH(c);
- /* might escape characters inside [...] and [^...]. */
- if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\') {
- if (p == pend)
- return REG_EESCAPE;
- PATFETCH(c1);
- SET_LIST_BIT(c1);
- continue;
- }
- /* Could be the end of the bracket expression. If it's
- * not (i.e., when the bracket expression is `[]' so
- * far), the ']' character bit gets set way below. */
- if (c == ']' && p != p1 + 1)
- break;
- /* Look ahead to see if it's a range when the last thing
- * was a character class. */
- if (had_char_class && c == '-' && *p != ']')
- return REG_ERANGE;
- /* Look ahead to see if it's a range when the last thing
- * was a character: if this is a hyphen not at the
- * beginning or the end of a list, then it's the range
- * operator. */
- if (c == '-'
- && !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
- && *p != ']') {
- reg_errcode_t ret
- = compile_range(&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR)
- return ret;
- } else if (p[0] == '-' && p[1] != ']') { /* This handles ranges made up of characters only. */
- reg_errcode_t ret;
- /* Move past the `-'. */
- PATFETCH(c1);
- ret = compile_range(&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR)
- return ret;
- }
- /* See if we're at the beginning of a possible character
- * class. */
- else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') { /* Leave room for the null. */
- char str[CHAR_CLASS_MAX_LENGTH + 1];
- PATFETCH(c);
- c1 = 0;
- /* If pattern is `[[:'. */
- if (p == pend)
- return REG_EBRACK;
- for (;;) {
- PATFETCH(c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
- break;
- str[c1++] = c;
- }
- str[c1] = ' ';
- /* If isn't a word bracketed by `[:' and:`]':
- * undo the ending character, the letters, and leave
- * the leading `:' and `[' (but set bits for them). */
- if (c == ':' && *p == ']') {
- int ch;
- boolean is_alnum = STREQ(str, "alnum");
- boolean is_alpha = STREQ(str, "alpha");
- boolean is_blank = STREQ(str, "blank");
- boolean is_cntrl = STREQ(str, "cntrl");
- boolean is_digit = STREQ(str, "digit");
- boolean is_graph = STREQ(str, "graph");
- boolean is_lower = STREQ(str, "lower");
- boolean is_print = STREQ(str, "print");
- boolean is_punct = STREQ(str, "punct");
- boolean is_space = STREQ(str, "space");
- boolean is_upper = STREQ(str, "upper");
- boolean is_xdigit = STREQ(str, "xdigit");
- if (!IS_CHAR_CLASS(str))
- return REG_ECTYPE;
- /* Throw away the ] at the end of the character
- * class. */
- PATFETCH(c);
- if (p == pend)
- return REG_EBRACK;
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
- if ((is_alnum && ISALNUM(ch))
- || (is_alpha && ISALPHA(ch))
- || (is_blank && ISBLANK(ch))
- || (is_cntrl && ISCNTRL(ch))
- || (is_digit && ISDIGIT(ch))
- || (is_graph && ISGRAPH(ch))
- || (is_lower && ISLOWER(ch))
- || (is_print && ISPRINT(ch))
- || (is_punct && ISPUNCT(ch))
- || (is_space && ISSPACE(ch))
- || (is_upper && ISUPPER(ch))
- || (is_xdigit && ISXDIGIT(ch)))
- SET_LIST_BIT(ch);
- }
- had_char_class = true;
- } else {
- c1++;
- while (c1--)
- PATUNFETCH;
- SET_LIST_BIT('[');
- SET_LIST_BIT(':');
- had_char_class = false;
- }
- } else {
- had_char_class = false;
- SET_LIST_BIT(c);
- }
- }
- /* Discard any (non)matching list bytes that are all 0 at the
- * end of the map. Decrease the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
- }
- break;
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_open;
- else
- goto normal_char;
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_close;
- else
- goto normal_char;
- case 'n':
- if (syntax & RE_NEWLINE_ALT)
- goto handle_alt;
- else
- goto normal_char;
- case '|':
- if (syntax & RE_NO_BK_VBAR)
- goto handle_alt;
- else
- goto normal_char;
- case '{':
- if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
- goto handle_interval;
- else
- goto normal_char;
- case '\':
- if (p == pend)
- return REG_EESCAPE;
- /* Do not translate the character after the , so that we can
- * distinguish, e.g., B from b, even if we normally would
- * translate, e.g., B to b. */
- PATFETCH_RAW(c);
- switch (c) {
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
- handle_open:
- bufp->re_nsub++;
- regnum++;
- if (COMPILE_STACK_FULL) {
- RETALLOC(compile_stack.stack, compile_stack.size << 1,
- compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
- compile_stack.size <<= 1;
- }
- /* These are the values to restore when we hit end of this
- * group. They are all relative offsets, so that if the
- * whole pattern moves because of realloc, they will still
- * be valid. */
- COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
- COMPILE_STACK_TOP.fixup_alt_jump
- = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
- COMPILE_STACK_TOP.regnum = regnum;
- /* We will eventually replace the 0 with the number of
- * groups inner to this one. But do not push a
- * start_memory for groups beyond the last one we can
- * represent in the compiled pattern. */
- if (regnum <= MAX_REGNUM) {
- COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
- BUF_PUSH_3(start_memory, regnum, 0);
- }
- compile_stack.avail++;
- fixup_alt_jump = 0;
- laststart = 0;
- begalt = b;
- /* If we've reached MAX_REGNUM groups, then this open
- * won't actually generate any code, so we'll have to
- * clear pending_exact explicitly. */
- pending_exact = 0;
- break;
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
- if (COMPILE_STACK_EMPTY) {
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_backslash;
- else
- return REG_ERPAREN;
- }
- handle_close:
- if (fixup_alt_jump) { /* Push a dummy failure point at the end of the
- * alternative for a possible future
- * `pop_failure_jump' to pop. See comments at
- * `push_dummy_failure' in `re_match_2'. */
- BUF_PUSH(push_dummy_failure);
- /* We allocated space for this jump when we assigned
- * to `fixup_alt_jump', in the `handle_alt' case below. */
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);
- }
- /* See similar code for backslashed left paren above. */
- if (COMPILE_STACK_EMPTY) {
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_char;
- else
- return REG_ERPAREN;
- }
- /* Since we just checked for an empty stack above, this
- * ``can't happen''. */
- assert(compile_stack.avail != 0);
- {
- /* We don't just want to restore into `regnum', because
- * later groups should continue to be numbered higher,
- * as in `(ab)c(de)' -- the second group is #2. */
- regnum_t this_group_regnum;
- compile_stack.avail--;
- begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
- fixup_alt_jump
- = COMPILE_STACK_TOP.fixup_alt_jump
- ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
- : 0;
- laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
- this_group_regnum = COMPILE_STACK_TOP.regnum;
- /* If we've reached MAX_REGNUM groups, then this open
- * won't actually generate any code, so we'll have to
- * clear pending_exact explicitly. */
- pending_exact = 0;
- /* We're at the end of the group, so now we know how many
- * groups were inside this one. */
- if (this_group_regnum <= MAX_REGNUM) {
- unsigned char *inner_group_loc
- = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
- *inner_group_loc = regnum - this_group_regnum;
- BUF_PUSH_3(stop_memory, this_group_regnum,
- regnum - this_group_regnum);
- }
- }
- break;
- case '|': /* `|'. */
- if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
- goto normal_backslash;
- handle_alt:
- if (syntax & RE_LIMITED_OPS)
- goto normal_char;
- /* Insert before the previous alternative a jump which
- * jumps to this alternative if the former fails. */
- GET_BUFFER_SPACE(3);
- INSERT_JUMP(on_failure_jump, begalt, b + 6);
- pending_exact = 0;
- b += 3;
- /* The alternative before this one has a jump after it
- * which gets executed if it gets matched. Adjust that
- * jump so it will jump to this alternative's analogous
- * jump (put in below, which in turn will jump to the next
- * (if any) alternative's such jump, etc.). The last such
- * jump jumps to the correct final destination. A picture:
- * _____ _____
- * | | | |
- * | v | v
- * a | b | c
- *
- * If we are at `b', then fixup_alt_jump right now points to a
- * three-byte space after `a'. We'll put in the jump, set
- * fixup_alt_jump to right after `b', and leave behind three
- * bytes which we'll fill in when we get to after `c'. */
- if (fixup_alt_jump)
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
- /* Mark and leave space for a jump after this alternative,
- * to be filled in later either by next alternative or
- * when know we're at the end of a series of alternatives. */
- fixup_alt_jump = b;
- GET_BUFFER_SPACE(3);
- b += 3;
- laststart = 0;
- begalt = b;
- break;
- case '{':
- /* If { is a literal. */
- if (!(syntax & RE_INTERVALS)
- /* If we're at `{' and it's not the open-interval
- * operator. */
- || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- || (p - 2 == pattern && p == pend))
- goto normal_backslash;
- handle_interval:
- {
- /* If got here, then the syntax allows intervals. */
- /* At least (most) this many matches must be made. */
- int lower_bound = -1, upper_bound = -1;
- beg_interval = p - 1;
- if (p == pend) {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_EBRACE;
- }
- GET_UNSIGNED_NUMBER(lower_bound);
- if (c == ',') {
- GET_UNSIGNED_NUMBER(upper_bound);
- if (upper_bound < 0)
- upper_bound = RE_DUP_MAX;
- } else
- /* Interval such as `{1}' => match exactly once. */
- upper_bound = lower_bound;
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound) {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
- if (!(syntax & RE_NO_BK_BRACES)) {
- if (c != '\')
- return REG_EBRACE;
- PATFETCH(c);
- }
- if (c != '}') {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
- /* We just parsed a valid interval. */
- /* If it's invalid to have no preceding re. */
- if (!laststart) {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (syntax & RE_CONTEXT_INDEP_OPS)
- laststart = b;
- else
- goto unfetch_interval;
- }
- /* If the upper bound is zero, don't want to succeed at
- * all; jump from `laststart' to `b + 3', which will be
- * the end of the buffer after we insert the jump. */
- if (upper_bound == 0) {
- GET_BUFFER_SPACE(3);
- INSERT_JUMP(jump, laststart, b + 3);
- b += 3;
- }
- /* Otherwise, we have a nontrivial interval. When
- * we're all done, the pattern will look like:
- * set_number_at <jump count> <upper bound>
- * set_number_at <succeed_n count> <lower bound>
- * succeed_n <after jump addr> <succed_n count>
- * <body of loop>
- * jump_n <succeed_n addr> <jump count>
- * (The upper bound and `jump_n' are omitted if
- * `upper_bound' is 1, though.) */
- else { /* If the upper bound is > 1, we need to insert
- * more at the end of the loop. */
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
- GET_BUFFER_SPACE(nbytes);
- /* Initialize lower bound of the `succeed_n', even
- * though it will be set during matching by its
- * attendant `set_number_at' (inserted next),
- * because `re_compile_fastmap' needs to know.
- * Jump to the `jump_n' we might insert below. */
- INSERT_JUMP2(succeed_n, laststart,
- b + 5 + (upper_bound > 1) * 5,
- lower_bound);
- b += 5;
- /* Code to initialize the lower bound. Insert
- * before the `succeed_n'. The `5' is the last two
- * bytes of this `set_number_at', plus 3 bytes of
- * the following `succeed_n'. */
- insert_op2(set_number_at, laststart, 5, lower_bound, b);
- b += 5;
- if (upper_bound > 1) { /* More than one repetition is allowed, so
- * append a backward jump to the `succeed_n'
- * that starts this interval.
- *
- * When we've reached this during matching,
- * we'll have matched the interval once, so
- * jump back only `upper_bound - 1' times. */
- STORE_JUMP2(jump_n, b, laststart + 5,
- upper_bound - 1);
- b += 5;
- /* The location we want to set is the second
- * parameter of the `jump_n'; that is `b-2' as
- * an absolute address. `laststart' will be
- * the `set_number_at' we're about to insert;
- * `laststart+3' the number to set, the source
- * for the relative address. But we are
- * inserting into the middle of the pattern --
- * so everything is getting moved up by 5.
- * Conclusion: (b - 2) - (laststart + 3) + 5,
- * i.e., b - laststart.
- *
- * We insert this at the beginning of the loop
- * so that if we fail during matching, we'll
- * reinitialize the bounds. */
- insert_op2(set_number_at, laststart, b - laststart,
- upper_bound - 1, b);
- b += 5;
- }
- }
- pending_exact = 0;
- beg_interval = NULL;
- }
- break;
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- assert(beg_interval);
- p = beg_interval;
- beg_interval = NULL;
- /* normal_char and normal_backslash need `c'. */
- PATFETCH(c);
- if (!(syntax & RE_NO_BK_BRACES)) {
- if (p > pattern && p[-1] == '\')
- goto normal_backslash;
- }
- goto normal_char;
- #ifdef emacs
- /* There is no way to specify the before_dot and after_dot
- * operators. rms says this is ok. --karl */
- case '=':
- BUF_PUSH(at_dot);
- break;
- case 's':
- laststart = b;
- PATFETCH(c);
- BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
- break;
- case 'S':
- laststart = b;
- PATFETCH(c);
- BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
- break;
- #endif /* emacs */
- case 'w':
- laststart = b;
- BUF_PUSH(wordchar);
- break;
- case 'W':
- laststart = b;
- BUF_PUSH(notwordchar);
- break;
- case '<':
- BUF_PUSH(wordbeg);
- break;
- case '>':
- BUF_PUSH(wordend);
- break;
- case 'b':
- BUF_PUSH(wordbound);
- break;
- case 'B':
- BUF_PUSH(notwordbound);
- break;
- case '`':
- BUF_PUSH(begbuf);
- break;
- case ''':
- BUF_PUSH(endbuf);
- break;
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
- c1 = c - '0';
- if (c1 > regnum)
- return REG_ESUBREG;
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack(compile_stack, c1))
- goto normal_char;
- laststart = b;
- BUF_PUSH_2(duplicate, c1);
- break;
- case '+':
- case '?':
- if (syntax & RE_BK_PLUS_QM)
- goto handle_plus;
- else
- goto normal_backslash;
- default:
- normal_backslash:
- /* You might think it would be useful for to mean
- * not to translate; but if we don't translate it
- * it will never match anything. */
- c = TRANSLATE(c);
- goto normal_char;
- }
- break;
- default:
- /* Expects the character in `c'. */
- normal_char:
- /* If no exactn currently being built. */
- if (!pending_exact
- /* If last exactn not at current position. */
- || pending_exact + *pending_exact + 1 != b
- /* We have only one byte following the exactn for the count. */
- || *pending_exact == (1 << BYTEWIDTH) - 1
- /* If followed by a repetition operator. */
- || *p == '*' || *p == '^'
- || ((syntax & RE_BK_PLUS_QM)
- ? *p == '\' && (p[1] == '+' || p[1] == '?')
- : (*p == '+' || *p == '?'))
- || ((syntax & RE_INTERVALS)
- && ((syntax & RE_NO_BK_BRACES)
- ? *p == '{'
- : (p[0] == '\' && p[1] == '{')))) {
- /* Start building a new exactn. */
- laststart = b;
- BUF_PUSH_2(exactn, 0);
- pending_exact = b - 1;
- }
- BUF_PUSH(c);
- (*pending_exact)++;
- break;
- } /* switch (c) */
- } /* while p != pend */
- /* Through the pattern now. */
- if (fixup_alt_jump)
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
- if (!COMPILE_STACK_EMPTY)
- return REG_EPAREN;
- free(compile_stack.stack);
- /* We have succeeded; set the length of the buffer. */
- bufp->used = b - bufp->buffer;
- #ifdef DEBUG
- if (debug) {
- DEBUG_PRINT1("nCompiled pattern: ");
- print_compiled_pattern(bufp);
- }
- #endif /* DEBUG */
- return REG_NOERROR;
- } /* regex_compile */
- /* Subroutines for `regex_compile'. */
- /* Store OP at LOC followed by two-byte integer parameter ARG. */
- static void
- store_op1(op, loc, arg)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
- {
- *loc = (unsigned char) op;
- STORE_NUMBER(loc + 1, arg);
- }
- /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
- static void
- store_op2(op, loc, arg1, arg2)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
- {
- *loc = (unsigned char) op;
- STORE_NUMBER(loc + 1, arg1);
- STORE_NUMBER(loc + 3, arg2);
- }
- /* Copy the bytes from LOC to END to open up three bytes of space at LOC
- * for OP followed by two-byte integer parameter ARG. */
- static void
- insert_op1(op, loc, arg, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
- unsigned char *end;
- {
- register unsigned char *pfrom = end;
- register unsigned char *pto = end + 3;
- while (pfrom != loc)
- *--pto = *--pfrom;
- store_op1(op, loc, arg);
- }
- /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
- static void
- insert_op2(op, loc, arg1, arg2, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
- unsigned char *end;
- {
- register unsigned char *pfrom = end;
- register unsigned char *pto = end + 5;
- while (pfrom != loc)
- *--pto = *--pfrom;
- store_op2(op, loc, arg1, arg2);
- }
- /* P points to just after a ^ in PATTERN. Return true if that ^ comes
- * after an alternative or a begin-subexpression. We assume there is at
- * least one character before the ^. */
- static boolean
- at_begline_loc_p(pattern, p, syntax)
- const char *pattern, *p;
- reg_syntax_t syntax;
- {
- const char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\';
- return
- /* After a subexpression? */
- (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
- /* After an alternative? */
- || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
- }
- /* The dual of at_begline_loc_p. This one is for $. We assume there is
- * at least one character after the $, i.e., `P < PEND'. */
- static boolean
- at_endline_loc_p(p, pend, syntax)
- const char *p, *pend;
- int syntax;
- {
- const char *next = p;
- boolean next_backslash = *next == '\';
- const char *next_next = p + 1 < pend ? p + 1 : NULL;
- return
- /* Before a subexpression? */
- (syntax & RE_NO_BK_PARENS ? *next == ')'
- : next_backslash && next_next && *next_next == ')')
- /* Before an alternative? */
- || (syntax & RE_NO_BK_VBAR ? *next == '|'
- : next_backslash && next_next && *next_next == '|');
- }
- /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
- * false if it's not. */
- static boolean
- group_in_compile_stack(compile_stack, regnum)
- compile_stack_type compile_stack;
- regnum_t regnum;
- {
- int this_element;
- for (this_element = compile_stack.avail - 1;
- this_element >= 0;
- this_element--)
- if (compile_stack.stack[this_element].regnum == regnum)
- return true;
- return false;
- }
- /* Read the ending character of a range (in a bracket expression) from the
- * uncompiled pattern *P_PTR (which ends at PEND). We assume the
- * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
- * Then we set the translation of all bits between the starting and
- * ending characters (inclusive) in the compiled pattern B.
- *
- * Return an error code.
- *
- * We use these short variable names so we can use the same macros as
- * `regex_compile' itself. */
- static reg_errcode_t
- compile_range(p_ptr, pend, translate, syntax, b)
- const char **p_ptr, *pend;
- char *translate;
- reg_syntax_t syntax;
- unsigned char *b;
- {
- unsigned this_char;
- const char *p = *p_ptr;
- int range_start, range_end;
- if (p == pend)
- return REG_ERANGE;
- /* Even though the pattern is a signed `char *', we need to fetch
- * with unsigned char *'s; if the high bit of the pattern character
- * is set, the range endpoints will be negative if we fetch using a
- * signed char *.
- *
- * We also want to fetch the endpoints without translating them; the
- * appropriate translation is done in the bit-setting loop below. */
- range_start = ((unsigned char *) p)[-2];
- range_end = ((unsigned char *) p)[0];
- /* Have to increment the pointer into the pattern string, so the
- * caller isn't still at the ending character. */
- (*p_ptr)++;
- /* If the start is after the end, the range is empty. */
- if (range_start > range_end)
- return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
- /* Here we see why `this_char' has to be larger than an `unsigned
- * char' -- the range is inclusive, so if `range_end' == 0xff
- * (assuming 8-bit characters), we would otherwise go into an infinite
- * loop, since all characters <= 0xff. */
- for (this_char = range_start; this_char <= range_end; this_char++) {
- SET_LIST_BIT(TRANSLATE(this_char));
- }
- return REG_NOERROR;
- }
- /* Failure stack declarations and macros; both re_compile_fastmap and
- * re_match_2 use a failure stack. These have to be macros because of
- * REGEX_ALLOCATE. */
- /* Number of failure points for which to initially allocate space
- * when matching. If this number is exceeded, we allocate more
- * space, so it is not a hard limit. */
- #ifndef INIT_FAILURE_ALLOC
- #define INIT_FAILURE_ALLOC 5
- #endif
- /* Roughly the maximum number of failure points on the stack. Would be
- * exactly that if always used MAX_FAILURE_SPACE each time we failed.
- * This is a variable only so users of regex can assign to it; we never
- * change it ourselves. */
- int re_max_failures = 2000;
- typedef const unsigned char *fail_stack_elt_t;
- typedef struct {
- fail_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } fail_stack_type;
- #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
- #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
- #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
- #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
- /* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
- #define INIT_FAIL_STACK()
- do {
- fail_stack.stack = (fail_stack_elt_t *)
- REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));
-
- if (fail_stack.stack == NULL)
- return -2;
-
- fail_stack.size = INIT_FAILURE_ALLOC;
- fail_stack.avail = 0;
- } while (0)
- /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
- *
- * Return 1 if succeeds, and 0 if either ran out of memory
- * allocating space for it or it was already too large.
- *
- * REGEX_REALLOCATE requires `destination' be declared. */
- #define DOUBLE_FAIL_STACK(fail_stack)
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS
- ? 0
- : ((fail_stack).stack = (fail_stack_elt_t *)
- REGEX_REALLOCATE ((fail_stack).stack,
- (fail_stack).size * sizeof (fail_stack_elt_t),
- ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),
-
- (fail_stack).stack == NULL
- ? 0
- : ((fail_stack).size <<= 1,
- 1)))
- /* Push PATTERN_OP on FAIL_STACK.
- *
- * Return 1 if was able to do so and 0 if ran out of memory allocating
- * space to do so. */
- #define PUSH_PATTERN_OP(pattern_op, fail_stack)
- ((FAIL_STACK_FULL ()
- && !DOUBLE_FAIL_STACK (fail_stack))
- ? 0
- : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,
- 1))
- /* This pushes an item onto the failure stack. Must be a four-byte
- * value. Assumes the variable `fail_stack'. Probably should only
- * be called from within `PUSH_FAILURE_POINT'. */
- #define PUSH_FAILURE_ITEM(item)
- fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
- /* The complement operation. Assumes `fail_stack' is nonempty. */
- #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
- /* Used to omit pushing failure point id's when we're not debugging. */
- #ifdef DEBUG
- #define DEBUG_PUSH PUSH_FAILURE_ITEM
- #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
- #else
- #define DEBUG_PUSH(item)
- #define DEBUG_POP(item_addr)
- #endif
- /* Push the information about the state we will need
- * if we ever fail back to it.
- *
- * Requires variables fail_stack, regstart, regend, reg_info, and
- * num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- * declared.
- *
- * Does `return FAILURE_CODE' if runs out of memory. */
- #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)
- do {
- char *destination;
- /* Must be int, so when we don't save any registers, the arithmetic
- of 0 + -1 isn't done as unsigned. */
- int this_reg;
-
- DEBUG_STATEMENT (failure_id++);
- DEBUG_STATEMENT (nfailure_points_pushed++);
- DEBUG_PRINT2 ("nPUSH_FAILURE_POINT #%u:n", failure_id);
- DEBUG_PRINT2 (" Before push, next avail: %dn", (fail_stack).avail);
- DEBUG_PRINT2 (" size: %dn", (fail_stack).size);
-
- DEBUG_PRINT2 (" slots needed: %dn", NUM_FAILURE_ITEMS);
- DEBUG_PRINT2 (" available: %dn", REMAINING_AVAIL_SLOTS);
-
- /* Ensure we have enough space allocated for what we will push. */
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)
- {
- if (!DOUBLE_FAIL_STACK (fail_stack))
- return failure_code;
-
- DEBUG_PRINT2 ("n Doubled stack; size now: %dn",
- (fail_stack).size);
- DEBUG_PRINT2 (" slots available: %dn", REMAINING_AVAIL_SLOTS);
- }
-
- /* Push the info, starting with the registers. */
- DEBUG_PRINT1 ("n");
-
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;
- this_reg++)
- {
- DEBUG_PRINT2 (" Pushing reg: %dn", this_reg);
- DEBUG_STATEMENT (num_regs_pushed++);
-
- DEBUG_PRINT2 (" start: 0x%xn", regstart[this_reg]);
- PUSH_FAILURE_ITEM (regstart[this_reg]);
-
- DEBUG_PRINT2 (" end: 0x%xn", regend[this_reg]);
- PUSH_FAILURE_ITEM (regend[this_reg]);
-
- DEBUG_PRINT2 (" info: 0x%xn ", reg_info[this_reg]);
- DEBUG_PRINT2 (" match_null=%d",
- REG_MATCH_NULL_STRING_P (reg_info[this_reg]));
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));
- DEBUG_PRINT2 (" matched_something=%d",
- MATCHED_SOMETHING (reg_info[this_reg]));
- DEBUG_PRINT2 (" ever_matched=%d",
- EVER_MATCHED_SOMETHING (reg_info[this_reg]));
- DEBUG_PRINT1 ("n");
- PUSH_FAILURE_ITEM (reg_info[this_reg].word);
- }
-
- DEBUG_PRINT2 (" Pushing low active reg: %dn", lowest_active_reg);
- PUSH_FAILURE_ITEM (lowest_active_reg);
-
- DEBUG_PRINT2 (" Pushing high active reg: %dn", highest_active_reg);
- PUSH_FAILURE_ITEM (highest_active_reg);
-
- DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place);
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);
- PUSH_FAILURE_ITEM (pattern_place);
-
- DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place);
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,
- size2);
- DEBUG_PRINT1 ("'n");
- PUSH_FAILURE_ITEM (string_place);
-
- DEBUG_PRINT2 (" Pushing failure id: %un", failure_id);
- DEBUG_PUSH (failure_id);
- } while (0)
- /* This is the number of items that are pushed and popped on the stack
- * for each register. */
- #define NUM_REG_ITEMS 3
- /* Individual items aside from the registers. */
- #ifdef DEBUG
- #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
- #else
- #define NUM_NONREG_ITEMS 4
- #endif
- /* We push at most this many items on the stack. */
- #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
- /* We actually push this many items. */
- #define NUM_FAILURE_ITEMS
- ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS
- + NUM_NONREG_ITEMS)
- /* How many items can still be added to the stack without overflowing it. */
- #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)