tclRegexp.c
上传用户:rrhhcc
上传日期:2015-12-11
资源大小:54129k
文件大小:30k
- /*
- * tclRegexp.c --
- *
- * This file contains the public interfaces to the Tcl regular
- * expression mechanism.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- * Copyright (c) 1998-1999 by Scriptics Corporation.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $
- */
- #include "tclInt.h"
- #include "tclPort.h"
- #include "tclRegexp.h"
- /*
- *----------------------------------------------------------------------
- * The routines in this file use Henry Spencer's regular expression
- * package contained in the following additional source files:
- *
- * regc_color.c regc_cvec.c regc_lex.c
- * regc_nfa.c regcomp.c regcustom.h
- * rege_dfa.c regerror.c regerrs.h
- * regex.h regexec.c regfree.c
- * regfronts.c regguts.h
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * *** NOTE: this code has been altered slightly for use in Tcl: ***
- * *** 1. Names have been changed, e.g. from re_comp to ***
- * *** TclRegComp, to avoid clashes with other ***
- * *** regexp implementations used by applications. ***
- */
- /*
- * Thread local storage used to maintain a per-thread cache of compiled
- * regular expressions.
- */
- #define NUM_REGEXPS 30
- typedef struct ThreadSpecificData {
- int initialized; /* Set to 1 when the module is initialized. */
- char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
- * regular expression patterns. NULL
- * means that this slot isn't used.
- * Malloc-ed. */
- int patLengths[NUM_REGEXPS];/* Number of non-null characters in
- * corresponding entry in patterns.
- * -1 means entry isn't used. */
- struct TclRegexp *regexps[NUM_REGEXPS];
- /* Compiled forms of above strings. Also
- * malloc-ed, or NULL if not in use yet. */
- } ThreadSpecificData;
- static Tcl_ThreadDataKey dataKey;
- /*
- * Declarations for functions used only in this file.
- */
- static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
- CONST char *pattern, int length, int flags));
- static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
- Tcl_Obj *copyPtr));
- static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
- static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
- static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
- static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_RegExp re, CONST Tcl_UniChar *uniString,
- int numChars, int nmatches, int flags));
- static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_Obj *objPtr));
- /*
- * The regular expression Tcl object type. This serves as a cache
- * of the compiled form of the regular expression.
- */
- static Tcl_ObjType tclRegexpType = {
- "regexp", /* name */
- FreeRegexpInternalRep, /* freeIntRepProc */
- DupRegexpInternalRep, /* dupIntRepProc */
- NULL, /* updateStringProc */
- SetRegexpFromAny /* setFromAnyProc */
- };
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_RegExpCompile --
- *
- * Compile a regular expression into a form suitable for fast
- * matching. This procedure is DEPRECATED in favor of the
- * object version of the command.
- *
- * Results:
- * The return value is a pointer to the compiled form of string,
- * suitable for passing to Tcl_RegExpExec. This compiled form
- * is only valid up until the next call to this procedure, so
- * don't keep these around for a long time! If an error occurred
- * while compiling the pattern, then NULL is returned and an error
- * message is left in the interp's result.
- *
- * Side effects:
- * Updates the cache of compiled regexps.
- *
- *----------------------------------------------------------------------
- */
- Tcl_RegExp
- Tcl_RegExpCompile(interp, string)
- Tcl_Interp *interp; /* For use in error reporting and
- * to access the interp regexp cache. */
- CONST char *string; /* String for which to produce
- * compiled regular expression. */
- {
- return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
- REG_ADVANCED);
- }
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_RegExpExec --
- *
- * Execute the regular expression matcher using a compiled form
- * of a regular expression and save information about any match
- * that is found.
- *
- * Results:
- * If an error occurs during the matching operation then -1
- * is returned and the interp's result contains an error message.
- * Otherwise the return value is 1 if a matching range is
- * found and 0 if there is no matching range.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- int
- Tcl_RegExpExec(interp, re, string, start)
- Tcl_Interp *interp; /* Interpreter to use for error reporting. */
- Tcl_RegExp re; /* Compiled regular expression; must have
- * been returned by previous call to
- * Tcl_GetRegExpFromObj. */
- CONST char *string; /* String against which to match re. */
- CONST char *start; /* If string is part of a larger string,
- * this identifies beginning of larger
- * string, so that "^" won't match. */
- {
- int flags, result, numChars;
- TclRegexp *regexp = (TclRegexp *)re;
- Tcl_DString ds;
- CONST Tcl_UniChar *ustr;
- /*
- * If the starting point is offset from the beginning of the buffer,
- * then we need to tell the regexp engine not to match "^".
- */
- if (string > start) {
- flags = REG_NOTBOL;
- } else {
- flags = 0;
- }
- /*
- * Remember the string for use by Tcl_RegExpRange().
- */
- regexp->string = string;
- regexp->objPtr = NULL;
- /*
- * Convert the string to Unicode and perform the match.
- */
- Tcl_DStringInit(&ds);
- ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
- numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
- result = RegExpExecUniChar(interp, re, ustr, numChars,
- -1 /* nmatches */, flags);
- Tcl_DStringFree(&ds);
- return result;
- }
- /*
- *---------------------------------------------------------------------------
- *
- * Tcl_RegExpRange --
- *
- * Returns pointers describing the range of a regular expression match,
- * or one of the subranges within the match.
- *
- * Results:
- * The variables at *startPtr and *endPtr are modified to hold the
- * addresses of the endpoints of the range given by index. If the
- * specified range doesn't exist then NULLs are returned.
- *
- * Side effects:
- * None.
- *
- *---------------------------------------------------------------------------
- */
- void
- Tcl_RegExpRange(re, index, startPtr, endPtr)
- Tcl_RegExp re; /* Compiled regular expression that has
- * been passed to Tcl_RegExpExec. */
- int index; /* 0 means give the range of the entire
- * match, > 0 means give the range of
- * a matching subrange. */
- CONST char **startPtr; /* Store address of first character in
- * (sub-) range here. */
- CONST char **endPtr; /* Store address of character just after last
- * in (sub-) range here. */
- {
- TclRegexp *regexpPtr = (TclRegexp *) re;
- CONST char *string;
- if ((size_t) index > regexpPtr->re.re_nsub) {
- *startPtr = *endPtr = NULL;
- } else if (regexpPtr->matches[index].rm_so < 0) {
- *startPtr = *endPtr = NULL;
- } else {
- if (regexpPtr->objPtr) {
- string = Tcl_GetString(regexpPtr->objPtr);
- } else {
- string = regexpPtr->string;
- }
- *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
- *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
- }
- }
- /*
- *---------------------------------------------------------------------------
- *
- * RegExpExecUniChar --
- *
- * Execute the regular expression matcher using a compiled form of a
- * regular expression and save information about any match that is
- * found.
- *
- * Results:
- * If an error occurs during the matching operation then -1 is
- * returned and an error message is left in interp's result.
- * Otherwise the return value is 1 if a matching range was found or
- * 0 if there was no matching range.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- static int
- RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
- Tcl_Interp *interp; /* Interpreter to use for error reporting. */
- Tcl_RegExp re; /* Compiled regular expression; returned by
- * a previous call to Tcl_GetRegExpFromObj */
- CONST Tcl_UniChar *wString; /* String against which to match re. */
- int numChars; /* Length of Tcl_UniChar string (must
- * be >= 0). */
- int nmatches; /* How many subexpression matches (counting
- * the whole match as subexpression 0) are
- * of interest. -1 means "don't know". */
- int flags; /* Regular expression flags. */
- {
- int status;
- TclRegexp *regexpPtr = (TclRegexp *) re;
- size_t last = regexpPtr->re.re_nsub + 1;
- size_t nm = last;
- if (nmatches >= 0 && (size_t) nmatches < nm) {
- nm = (size_t) nmatches;
- }
- status = TclReExec(®expPtr->re, wString, (size_t) numChars,
- ®expPtr->details, nm, regexpPtr->matches, flags);
- /*
- * Check for errors.
- */
- if (status != REG_OKAY) {
- if (status == REG_NOMATCH) {
- return 0;
- }
- if (interp != NULL) {
- TclRegError(interp, "error while matching regular expression: ",
- status);
- }
- return -1;
- }
- return 1;
- }
- /*
- *---------------------------------------------------------------------------
- *
- * TclRegExpRangeUniChar --
- *
- * Returns pointers describing the range of a regular expression match,
- * or one of the subranges within the match, or the hypothetical range
- * represented by the rm_extend field of the rm_detail_t.
- *
- * Results:
- * The variables at *startPtr and *endPtr are modified to hold the
- * offsets of the endpoints of the range given by index. If the
- * specified range doesn't exist then -1s are supplied.
- *
- * Side effects:
- * None.
- *
- *---------------------------------------------------------------------------
- */
- void
- TclRegExpRangeUniChar(re, index, startPtr, endPtr)
- Tcl_RegExp re; /* Compiled regular expression that has
- * been passed to Tcl_RegExpExec. */
- int index; /* 0 means give the range of the entire
- * match, > 0 means give the range of
- * a matching subrange, -1 means the
- * range of the rm_extend field. */
- int *startPtr; /* Store address of first character in
- * (sub-) range here. */
- int *endPtr; /* Store address of character just after last
- * in (sub-) range here. */
- {
- TclRegexp *regexpPtr = (TclRegexp *) re;
- if ((regexpPtr->flags®_EXPECT) && index == -1) {
- *startPtr = regexpPtr->details.rm_extend.rm_so;
- *endPtr = regexpPtr->details.rm_extend.rm_eo;
- } else if ((size_t) index > regexpPtr->re.re_nsub) {
- *startPtr = -1;
- *endPtr = -1;
- } else {
- *startPtr = regexpPtr->matches[index].rm_so;
- *endPtr = regexpPtr->matches[index].rm_eo;
- }
- }
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_RegExpMatch --
- *
- * See if a string matches a regular expression.
- *
- * Results:
- * If an error occurs during the matching operation then -1
- * is returned and the interp's result contains an error message.
- * Otherwise the return value is 1 if "string" matches "pattern"
- * and 0 otherwise.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- int
- Tcl_RegExpMatch(interp, string, pattern)
- Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
- CONST char *string; /* String. */
- CONST char *pattern; /* Regular expression to match against
- * string. */
- {
- Tcl_RegExp re;
- re = Tcl_RegExpCompile(interp, pattern);
- if (re == NULL) {
- return -1;
- }
- return Tcl_RegExpExec(interp, re, string, string);
- }
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_RegExpExecObj --
- *
- * Execute a precompiled regexp against the given object.
- *
- * Results:
- * If an error occurs during the matching operation then -1
- * is returned and the interp's result contains an error message.
- * Otherwise the return value is 1 if "string" matches "pattern"
- * and 0 otherwise.
- *
- * Side effects:
- * Converts the object to a Unicode object.
- *
- *----------------------------------------------------------------------
- */
- int
- Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
- Tcl_Interp *interp; /* Interpreter to use for error reporting. */
- Tcl_RegExp re; /* Compiled regular expression; must have
- * been returned by previous call to
- * Tcl_GetRegExpFromObj. */
- Tcl_Obj *objPtr; /* String against which to match re. */
- int offset; /* Character index that marks where matching
- * should begin. */
- int nmatches; /* How many subexpression matches (counting
- * the whole match as subexpression 0) are
- * of interest. -1 means all of them. */
- int flags; /* Regular expression execution flags. */
- {
- TclRegexp *regexpPtr = (TclRegexp *) re;
- Tcl_UniChar *udata;
- int length;
- /*
- * Save the target object so we can extract strings from it later.
- */
- regexpPtr->string = NULL;
- regexpPtr->objPtr = objPtr;
- udata = Tcl_GetUnicodeFromObj(objPtr, &length);
- if (offset > length) {
- offset = length;
- }
- udata += offset;
- length -= offset;
-
- return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
- }
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_RegExpMatchObj --
- *
- * See if an object matches a regular expression.
- *
- * Results:
- * If an error occurs during the matching operation then -1
- * is returned and the interp's result contains an error message.
- * Otherwise the return value is 1 if "string" matches "pattern"
- * and 0 otherwise.
- *
- * Side effects:
- * Changes the internal rep of the pattern and string objects.
- *
- *----------------------------------------------------------------------
- */
- int
- Tcl_RegExpMatchObj(interp, stringObj, patternObj)
- Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
- Tcl_Obj *stringObj; /* Object containing the String to search. */
- Tcl_Obj *patternObj; /* Regular expression to match against
- * string. */
- {
- Tcl_RegExp re;
- re = Tcl_GetRegExpFromObj(interp, patternObj,
- TCL_REG_ADVANCED | TCL_REG_NOSUB);
- if (re == NULL) {
- return -1;
- }
- return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
- 0 /* nmatches */, 0 /* flags */);
- }
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_RegExpGetInfo --
- *
- * Retrieve information about the current match.
- *
- * Results:
- * None.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- void
- Tcl_RegExpGetInfo(regexp, infoPtr)
- Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
- Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
- {
- TclRegexp *regexpPtr = (TclRegexp *) regexp;
- infoPtr->nsubs = regexpPtr->re.re_nsub;
- infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
- infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
- }
- /*
- *----------------------------------------------------------------------
- *
- * Tcl_GetRegExpFromObj --
- *
- * Compile a regular expression into a form suitable for fast
- * matching. This procedure caches the result in a Tcl_Obj.
- *
- * Results:
- * The return value is a pointer to the compiled form of string,
- * suitable for passing to Tcl_RegExpExec. If an error occurred
- * while compiling the pattern, then NULL is returned and an error
- * message is left in the interp's result.
- *
- * Side effects:
- * Updates the native rep of the Tcl_Obj.
- *
- *----------------------------------------------------------------------
- */
- Tcl_RegExp
- Tcl_GetRegExpFromObj(interp, objPtr, flags)
- Tcl_Interp *interp; /* For use in error reporting, and to access
- * the interp regexp cache. */
- Tcl_Obj *objPtr; /* Object whose string rep contains regular
- * expression pattern. Internal rep will be
- * changed to compiled form of this regular
- * expression. */
- int flags; /* Regular expression compilation flags. */
- {
- int length;
- Tcl_ObjType *typePtr;
- TclRegexp *regexpPtr;
- char *pattern;
- typePtr = objPtr->typePtr;
- regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
- if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
- pattern = Tcl_GetStringFromObj(objPtr, &length);
- regexpPtr = CompileRegexp(interp, pattern, length, flags);
- if (regexpPtr == NULL) {
- return NULL;
- }
- /*
- * Add a reference to the regexp so it will persist even if it is
- * pushed out of the current thread's regexp cache. This reference
- * will be removed when the object's internal rep is freed.
- */
- regexpPtr->refCount++;
- /*
- * Free the old representation and set our type.
- */
- if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
- (*typePtr->freeIntRepProc)(objPtr);
- }
- objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
- objPtr->typePtr = &tclRegexpType;
- }
- return (Tcl_RegExp) regexpPtr;
- }
- /*
- *----------------------------------------------------------------------
- *
- * TclRegAbout --
- *
- * Return information about a compiled regular expression.
- *
- * Results:
- * The return value is -1 for failure, 0 for success, although at
- * the moment there's nothing that could fail. On success, a list
- * is left in the interp's result: first element is the subexpression
- * count, second is a list of re_info bit names.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- int
- TclRegAbout(interp, re)
- Tcl_Interp *interp; /* For use in variable assignment. */
- Tcl_RegExp re; /* The compiled regular expression. */
- {
- TclRegexp *regexpPtr = (TclRegexp *)re;
- char buf[TCL_INTEGER_SPACE];
- static struct infoname {
- int bit;
- char *text;
- } infonames[] = {
- {REG_UBACKREF, "REG_UBACKREF"},
- {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
- {REG_UBOUNDS, "REG_UBOUNDS"},
- {REG_UBRACES, "REG_UBRACES"},
- {REG_UBSALNUM, "REG_UBSALNUM"},
- {REG_UPBOTCH, "REG_UPBOTCH"},
- {REG_UBBS, "REG_UBBS"},
- {REG_UNONPOSIX, "REG_UNONPOSIX"},
- {REG_UUNSPEC, "REG_UUNSPEC"},
- {REG_UUNPORT, "REG_UUNPORT"},
- {REG_ULOCALE, "REG_ULOCALE"},
- {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
- {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
- {REG_USHORTEST, "REG_USHORTEST"},
- {0, ""}
- };
- struct infoname *inf;
- int n;
- Tcl_ResetResult(interp);
- sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
- Tcl_AppendElement(interp, buf);
- /*
- * Must count bits before generating list, because we must know
- * whether {} are needed before we start appending names.
- */
- n = 0;
- for (inf = infonames; inf->bit != 0; inf++) {
- if (regexpPtr->re.re_info&inf->bit) {
- n++;
- }
- }
- if (n != 1) {
- Tcl_AppendResult(interp, " {", NULL);
- }
- for (inf = infonames; inf->bit != 0; inf++) {
- if (regexpPtr->re.re_info&inf->bit) {
- Tcl_AppendElement(interp, inf->text);
- }
- }
- if (n != 1) {
- Tcl_AppendResult(interp, "}", NULL);
- }
- return 0;
- }
- /*
- *----------------------------------------------------------------------
- *
- * TclRegError --
- *
- * Generate an error message based on the regexp status code.
- *
- * Results:
- * Places an error in the interpreter.
- *
- * Side effects:
- * Sets errorCode as well.
- *
- *----------------------------------------------------------------------
- */
- void
- TclRegError(interp, msg, status)
- Tcl_Interp *interp; /* Interpreter for error reporting. */
- CONST char *msg; /* Message to prepend to error. */
- int status; /* Status code to report. */
- {
- char buf[100]; /* ample in practice */
- char cbuf[100]; /* lots in practice */
- size_t n;
- char *p;
- Tcl_ResetResult(interp);
- n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
- p = (n > sizeof(buf)) ? "..." : "";
- Tcl_AppendResult(interp, msg, buf, p, NULL);
- sprintf(cbuf, "%d", status);
- (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
- Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
- }
- /*
- *----------------------------------------------------------------------
- *
- * FreeRegexpInternalRep --
- *
- * Deallocate the storage associated with a regexp object's internal
- * representation.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Frees the compiled regular expression.
- *
- *----------------------------------------------------------------------
- */
- static void
- FreeRegexpInternalRep(objPtr)
- Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
- {
- TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
- /*
- * If this is the last reference to the regexp, free it.
- */
- if (--(regexpRepPtr->refCount) <= 0) {
- FreeRegexp(regexpRepPtr);
- }
- }
- /*
- *----------------------------------------------------------------------
- *
- * DupRegexpInternalRep --
- *
- * We copy the reference to the compiled regexp and bump its
- * reference count.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Increments the reference count of the regexp.
- *
- *----------------------------------------------------------------------
- */
- static void
- DupRegexpInternalRep(srcPtr, copyPtr)
- Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
- Tcl_Obj *copyPtr; /* Object with internal rep to set. */
- {
- TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
- regexpPtr->refCount++;
- copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
- copyPtr->typePtr = &tclRegexpType;
- }
- /*
- *----------------------------------------------------------------------
- *
- * SetRegexpFromAny --
- *
- * Attempt to generate a compiled regular expression for the Tcl object
- * "objPtr".
- *
- * Results:
- * The return value is TCL_OK or TCL_ERROR. If an error occurs during
- * conversion, an error message is left in the interpreter's result
- * unless "interp" is NULL.
- *
- * Side effects:
- * If no error occurs, a regular expression is stored as "objPtr"s
- * internal representation.
- *
- *----------------------------------------------------------------------
- */
- static int
- SetRegexpFromAny(interp, objPtr)
- Tcl_Interp *interp; /* Used for error reporting if not NULL. */
- Tcl_Obj *objPtr; /* The object to convert. */
- {
- if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
- return TCL_ERROR;
- }
- return TCL_OK;
- }
- /*
- *---------------------------------------------------------------------------
- *
- * CompileRegexp --
- *
- * Attempt to compile the given regexp pattern. If the compiled
- * regular expression can be found in the per-thread cache, it
- * will be used instead of compiling a new copy.
- *
- * Results:
- * The return value is a pointer to a newly allocated TclRegexp
- * that represents the compiled pattern, or NULL if the pattern
- * could not be compiled. If NULL is returned, an error message is
- * left in the interp's result.
- *
- * Side effects:
- * The thread-local regexp cache is updated and a new TclRegexp may
- * be allocated.
- *
- *----------------------------------------------------------------------
- */
- static TclRegexp *
- CompileRegexp(interp, string, length, flags)
- Tcl_Interp *interp; /* Used for error reporting if not NULL. */
- CONST char *string; /* The regexp to compile (UTF-8). */
- int length; /* The length of the string in bytes. */
- int flags; /* Compilation flags. */
- {
- TclRegexp *regexpPtr;
- CONST Tcl_UniChar *uniString;
- int numChars;
- Tcl_DString stringBuf;
- int status, i;
- ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
-
- if (!tsdPtr->initialized) {
- tsdPtr->initialized = 1;
- Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
- }
- /*
- * This routine maintains a second-level regular expression cache in
- * addition to the per-object regexp cache. The per-thread cache is needed
- * to handle the case where for various reasons the object is lost between
- * invocations of the regexp command, but the literal pattern is the same.
- */
- /*
- * Check the per-thread compiled regexp cache. We can only reuse
- * a regexp if it has the same pattern and the same flags.
- */
- for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
- if ((length == tsdPtr->patLengths[i])
- && (tsdPtr->regexps[i]->flags == flags)
- && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
- /*
- * Move the matched pattern to the first slot in the
- * cache and shift the other patterns down one position.
- */
- if (i != 0) {
- int j;
- char *cachedString;
- cachedString = tsdPtr->patterns[i];
- regexpPtr = tsdPtr->regexps[i];
- for (j = i-1; j >= 0; j--) {
- tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
- tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
- tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
- }
- tsdPtr->patterns[0] = cachedString;
- tsdPtr->patLengths[0] = length;
- tsdPtr->regexps[0] = regexpPtr;
- }
- return tsdPtr->regexps[0];
- }
- }
- /*
- * This is a new expression, so compile it and add it to the cache.
- */
-
- regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
- regexpPtr->objPtr = NULL;
- regexpPtr->string = NULL;
- regexpPtr->details.rm_extend.rm_so = -1;
- regexpPtr->details.rm_extend.rm_eo = -1;
- /*
- * Get the up-to-date string representation and map to unicode.
- */
- Tcl_DStringInit(&stringBuf);
- uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
- numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
- /*
- * Compile the string and check for errors.
- */
- regexpPtr->flags = flags;
- status = TclReComp(®expPtr->re, uniString, (size_t) numChars, flags);
- Tcl_DStringFree(&stringBuf);
- if (status != REG_OKAY) {
- /*
- * Clean up and report errors in the interpreter, if possible.
- */
- ckfree((char *)regexpPtr);
- if (interp) {
- TclRegError(interp,
- "couldn't compile regular expression pattern: ",
- status);
- }
- return NULL;
- }
- /*
- * Allocate enough space for all of the subexpressions, plus one
- * extra for the entire pattern.
- */
- regexpPtr->matches = (regmatch_t *) ckalloc(
- sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
- /*
- * Initialize the refcount to one initially, since it is in the cache.
- */
- regexpPtr->refCount = 1;
- /*
- * Free the last regexp, if necessary, and make room at the head of the
- * list for the new regexp.
- */
- if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
- TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
- if (--(oldRegexpPtr->refCount) <= 0) {
- FreeRegexp(oldRegexpPtr);
- }
- ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
- }
- for (i = NUM_REGEXPS - 2; i >= 0; i--) {
- tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
- tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
- tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
- }
- tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
- strcpy(tsdPtr->patterns[0], string);
- tsdPtr->patLengths[0] = length;
- tsdPtr->regexps[0] = regexpPtr;
- return regexpPtr;
- }
- /*
- *----------------------------------------------------------------------
- *
- * FreeRegexp --
- *
- * Release the storage associated with a TclRegexp.
- *
- * Results:
- * None.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- static void
- FreeRegexp(regexpPtr)
- TclRegexp *regexpPtr; /* Compiled regular expression to free. */
- {
- TclReFree(®expPtr->re);
- if (regexpPtr->matches) {
- ckfree((char *) regexpPtr->matches);
- }
- ckfree((char *) regexpPtr);
- }
- /*
- *----------------------------------------------------------------------
- *
- * FinalizeRegexp --
- *
- * Release the storage associated with the per-thread regexp
- * cache.
- *
- * Results:
- * None.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
- static void
- FinalizeRegexp(clientData)
- ClientData clientData; /* Not used. */
- {
- int i;
- TclRegexp *regexpPtr;
- ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
- for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
- regexpPtr = tsdPtr->regexps[i];
- if (--(regexpPtr->refCount) <= 0) {
- FreeRegexp(regexpPtr);
- }
- ckfree(tsdPtr->patterns[i]);
- tsdPtr->patterns[i] = NULL;
- }
- /*
- * We may find ourselves reinitialized if another finalization routine
- * invokes regexps.
- */
- tsdPtr->initialized = 0;
- }