study.c
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:11k
- /*
- * ===========================================================================
- * PRODUCTION $Log: study.c,v $
- * PRODUCTION Revision 1000.0 2003/10/29 15:56:20 gouriano
- * PRODUCTION PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.1
- * PRODUCTION
- * ===========================================================================
- */
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /*
- This is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language. See
- the file Tech.Notes for some information on the internals.
- Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1997-2001 University of Cambridge
- -----------------------------------------------------------------------------
- Permission is granted to anyone to use this software for any purpose on any
- computer system, and to redistribute it freely, subject to the following
- restrictions:
- 1. This software is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- 2. The origin of this software must not be misrepresented, either by
- explicit claim or by omission.
- 3. Altered versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
- 4. If PCRE is embedded in any software that is released under the GNU
- General Purpose Licence (GPL), then the terms of that licence shall
- supersede any condition above with which it is incompatible.
- -----------------------------------------------------------------------------
- */
- /* Include the internals header, which itself includes Standard C headers plus
- the external pcre header. */
- #include "pcre_internal.h"
- /*************************************************
- * Set a bit and maybe its alternate case *
- *************************************************/
- /* Given a character, set its bit in the table, and also the bit for the other
- version of a letter if we are caseless.
- Arguments:
- start_bits points to the bit map
- c is the character
- caseless the caseless flag
- cd the block with char table pointers
- Returns: nothing
- */
- static void
- set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
- {
- start_bits[c/8] |= (1 << (c&7));
- if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
- start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
- }
- /*************************************************
- * Create bitmap of starting chars *
- *************************************************/
- /* This function scans a compiled unanchored expression and attempts to build a
- bitmap of the set of initial characters. If it can't, it returns FALSE. As time
- goes by, we may be able to get more clever at doing this.
- Arguments:
- code points to an expression
- start_bits points to a 32-byte table, initialized to 0
- caseless the current state of the caseless flag
- cd the block with char table pointers
- Returns: TRUE if table built, FALSE otherwise
- */
- static BOOL
- set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
- compile_data *cd)
- {
- register int c;
- /* This next statement and the later reference to dummy are here in order to
- trick the optimizer of the IBM C compiler for OS/2 into generating correct
- code. Apparently IBM isn't going to fix the problem, and we would rather not
- disable optimization (in this module it actually makes a big difference, and
- the pcre module can use all the optimization it can get). */
- volatile int dummy;
- do
- {
- const uschar *tcode = code + 3;
- BOOL try_next = TRUE;
- while (try_next)
- {
- /* If a branch starts with a bracket or a positive lookahead assertion,
- recurse to set bits from within them. That's all for this branch. */
- if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
- {
- if (!set_start_bits(tcode, start_bits, caseless, cd))
- return FALSE;
- try_next = FALSE;
- }
- else switch(*tcode)
- {
- default:
- return FALSE;
- /* Skip over extended extraction bracket number */
- case OP_BRANUMBER:
- tcode += 3;
- break;
- /* Skip over lookbehind and negative lookahead assertions */
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
- tcode += 3;
- break;
- /* Skip over an option setting, changing the caseless flag */
- case OP_OPT:
- caseless = (tcode[1] & PCRE_CASELESS) != 0;
- tcode += 2;
- break;
- /* BRAZERO does the bracket, but carries on. */
- case OP_BRAZERO:
- case OP_BRAMINZERO:
- if (!set_start_bits(++tcode, start_bits, caseless, cd))
- return FALSE;
- dummy = 1;
- do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
- tcode += 3;
- break;
- /* Single-char * or ? sets the bit and tries the next item */
- case OP_STAR:
- case OP_MINSTAR:
- case OP_QUERY:
- case OP_MINQUERY:
- set_bit(start_bits, tcode[1], caseless, cd);
- tcode += 2;
- break;
- /* Single-char upto sets the bit and tries the next */
- case OP_UPTO:
- case OP_MINUPTO:
- set_bit(start_bits, tcode[3], caseless, cd);
- tcode += 4;
- break;
- /* At least one single char sets the bit and stops */
- case OP_EXACT: /* Fall through */
- tcode++;
- case OP_CHARS: /* Fall through */
- tcode++;
- case OP_PLUS:
- case OP_MINPLUS:
- set_bit(start_bits, tcode[1], caseless, cd);
- try_next = FALSE;
- break;
- /* Single character type sets the bits and stops */
- case OP_NOT_DIGIT:
- for (c = 0; c < 32; c++)
- start_bits[c] |= ~cd->cbits[c+cbit_digit];
- try_next = FALSE;
- break;
- case OP_DIGIT:
- for (c = 0; c < 32; c++)
- start_bits[c] |= cd->cbits[c+cbit_digit];
- try_next = FALSE;
- break;
- case OP_NOT_WHITESPACE:
- for (c = 0; c < 32; c++)
- start_bits[c] |= ~cd->cbits[c+cbit_space];
- try_next = FALSE;
- break;
- case OP_WHITESPACE:
- for (c = 0; c < 32; c++)
- start_bits[c] |= cd->cbits[c+cbit_space];
- try_next = FALSE;
- break;
- case OP_NOT_WORDCHAR:
- for (c = 0; c < 32; c++)
- start_bits[c] |= ~cd->cbits[c+cbit_word];
- try_next = FALSE;
- break;
- case OP_WORDCHAR:
- for (c = 0; c < 32; c++)
- start_bits[c] |= cd->cbits[c+cbit_word];
- try_next = FALSE;
- break;
- /* One or more character type fudges the pointer and restarts, knowing
- it will hit a single character type and stop there. */
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- tcode++;
- break;
- case OP_TYPEEXACT:
- tcode += 3;
- break;
- /* Zero or more repeats of character types set the bits and then
- try again. */
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- tcode += 2; /* Fall through */
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- switch(tcode[1])
- {
- case OP_NOT_DIGIT:
- for (c = 0; c < 32; c++)
- start_bits[c] |= ~cd->cbits[c+cbit_digit];
- break;
- case OP_DIGIT:
- for (c = 0; c < 32; c++)
- start_bits[c] |= cd->cbits[c+cbit_digit];
- break;
- case OP_NOT_WHITESPACE:
- for (c = 0; c < 32; c++)
- start_bits[c] |= ~cd->cbits[c+cbit_space];
- break;
- case OP_WHITESPACE:
- for (c = 0; c < 32; c++)
- start_bits[c] |= cd->cbits[c+cbit_space];
- break;
- case OP_NOT_WORDCHAR:
- for (c = 0; c < 32; c++)
- start_bits[c] |= ~cd->cbits[c+cbit_word];
- break;
- case OP_WORDCHAR:
- for (c = 0; c < 32; c++)
- start_bits[c] |= cd->cbits[c+cbit_word];
- break;
- }
- tcode += 2;
- break;
- /* Character class: set the bits and either carry on or not,
- according to the repeat count. */
- case OP_CLASS:
- {
- tcode++;
- for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
- tcode += 32;
- switch (*tcode)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- tcode++;
- break;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
- else try_next = FALSE;
- break;
- default:
- try_next = FALSE;
- break;
- }
- }
- break; /* End of class handling */
- } /* End of switch */
- } /* End of try_next loop */
- code += (code[1] << 8) + code[2]; /* Advance to next branch */
- }
- while (*code == OP_ALT);
- return TRUE;
- }
- /*************************************************
- * Study a compiled expression *
- *************************************************/
- /* This function is handed a compiled expression that it must study to produce
- information that will speed up the matching. It returns a pcre_extra block
- which then gets handed back to pcre_exec().
- Arguments:
- re points to the compiled expression
- options contains option bits
- errorptr points to where to place error messages;
- set NULL unless error
- Returns: pointer to a pcre_extra block,
- NULL on error or if no optimization possible
- */
- pcre_extra *
- pcre_study(const pcre *external_re, int options, const char **errorptr)
- {
- uschar start_bits[32];
- real_pcre_extra *extra;
- const real_pcre *re = (const real_pcre *)external_re;
- compile_data compile_block;
- *errorptr = NULL;
- if (re == NULL || re->magic_number != MAGIC_NUMBER)
- {
- *errorptr = "argument is not a compiled regular expression";
- return NULL;
- }
- if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
- {
- *errorptr = "unknown or incorrect option bit(s) set";
- return NULL;
- }
- /* For an anchored pattern, or an unchored pattern that has a first char, or a
- multiline pattern that matches only at "line starts", no further processing at
- present. */
- if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
- return NULL;
- /* Set the character tables in the block which is passed around */
- compile_block.lcc = re->tables + lcc_offset;
- compile_block.fcc = re->tables + fcc_offset;
- compile_block.cbits = re->tables + cbits_offset;
- compile_block.ctypes = re->tables + ctypes_offset;
- /* See if we can find a fixed set of initial characters for the pattern. */
- memset(start_bits, 0, 32 * sizeof(uschar));
- if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
- &compile_block)) return NULL;
- /* Get an "extra" block and put the information therein. */
- extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));
- if (extra == NULL)
- {
- *errorptr = "failed to get memory";
- return NULL;
- }
- extra->options = PCRE_STUDY_MAPPED;
- memcpy(extra->start_bits, start_bits, sizeof(start_bits));
- return (pcre_extra *)extra;
- }
- /* End of study.c */