reg.test
上传用户:rrhhcc
上传日期:2015-12-11
资源大小:54129k
文件大小:30k
- # reg.test --
- #
- # This file contains a collection of tests for one or more of the Tcl
- # built-in commands. Sourcing this file into Tcl runs the tests and
- # generates output for errors. No output means no errors were found.
- # (Don't panic if you are seeing this as part of the reg distribution
- # and aren't using Tcl -- reg's own regression tester also knows how
- # to read this file, ignoring the Tcl-isms.)
- #
- # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- #
- # RCS: @(#) $Id: reg.test,v 1.16.2.4 2007/12/18 11:23:16 dkf Exp $
- if {[lsearch [namespace children] ::tcltest] == -1} {
- package require tcltest 2
- namespace import -force ::tcltest::*
- }
- # All tests require the testregexp command, return if this
- # command doesn't exist
- ::tcltest::testConstraint testregexp
- [expr {[info commands testregexp] != {}}]
- ::tcltest::testConstraint localeRegexp 0
- # This file uses some custom procedures, defined below, for regexp regression
- # testing. The name of the procedure indicates the general nature of the
- # test:
- # e compile error expected
- # f match failure expected
- # m successful match
- # i successful match with -indices (used in checking things like
- # nonparticipating subexpressions)
- # p unsuccessful match with -indices (!!) (used in checking
- # partial-match reporting)
- # There is also "doing" which sets up title and major test number for each
- # block of tests.
- # The first 3 arguments are constant: a minor number (which often gets
- # a letter or two suffixed to it internally), some flags, and the RE itself.
- # For e, the remaining argument is the name of the compile error expected,
- # less the leading "REG_". For the rest, the next argument is the string
- # to try the match against. Remaining arguments are the substring expected
- # to be matched, and any substrings expected to be matched by subexpressions.
- # (For f, these arguments are optional, and if present are ignored except
- # that they indicate how many subexpressions should be present in the RE.)
- # It is an error for the number of subexpression arguments to be wrong.
- # Cases involving nonparticipating subexpressions, checking where empty
- # substrings are located, etc. should be done using i and p.
- # The flag characters are complex and a bit eclectic. Generally speaking,
- # lowercase letters are compile options, uppercase are expected re_info
- # bits, and nonalphabetics are match options, controls for how the test is
- # run, or testing options. The one small surprise is that AREs are the
- # default, and you must explicitly request lesser flavors of RE. The flags
- # are as follows. It is admitted that some are not very mnemonic.
- # There are some others which are purely debugging tools and are not
- # useful in this file.
- #
- # - no-op (placeholder)
- # + provide fake xy equivalence class and ch collating element
- # % force small state-set cache in matcher (to test cache replace)
- # ^ beginning of string is not beginning of line
- # $ end of string is not end of line
- # * test is Unicode-specific, needs big character set
- #
- # & test as both ARE and BRE
- # b BRE
- # e ERE
- # a turn advanced-features bit on (error unless ERE already)
- # q literal string, no metacharacters at all
- #
- # i case-independent matching
- # o ("opaque") no subexpression capture
- # p newlines are half-magic, excluded from . and [^ only
- # w newlines are half-magic, significant to ^ and $ only
- # n newlines are fully magic, both effects
- # x expanded RE syntax
- # t incomplete-match reporting
- #
- # A backslash-_a_lphanumeric seen
- # B ERE/ARE literal-_b_race heuristic used
- # E backslash (_e_scape) seen within []
- # H looka_h_ead constraint seen
- # I _i_mpossible to match
- # L _l_ocale-specific construct seen
- # M unportable (_m_achine-specific) construct seen
- # N RE can match empty (_n_ull) string
- # P non-_P_OSIX construct seen
- # Q {} _q_uantifier seen
- # R back _r_eference seen
- # S POSIX-un_s_pecified syntax seen
- # T prefers shortest (_t_iny)
- # U saw original-POSIX botch: unmatched right paren in ERE (_u_gh)
- # The one area we can't easily test is memory-allocation failures (which
- # are hard to provoke on command). Embedded NULs also are not tested at
- # the moment, but this is a historical accident which should be fixed.
- # test procedures and related
- set ask "about"
- set xflags "xflags"
- set testbypassed 0
- # re_info abbreviation mapping table
- set infonames(A) "REG_UBSALNUM"
- set infonames(B) "REG_UBRACES"
- set infonames(E) "REG_UBBS"
- set infonames(H) "REG_ULOOKAHEAD"
- set infonames(I) "REG_UIMPOSSIBLE"
- set infonames(L) "REG_ULOCALE"
- set infonames(M) "REG_UUNPORT"
- set infonames(N) "REG_UEMPTYMATCH"
- set infonames(P) "REG_UNONPOSIX"
- set infonames(Q) "REG_UBOUNDS"
- set infonames(R) "REG_UBACKREF"
- set infonames(S) "REG_UUNSPEC"
- set infonames(T) "REG_USHORTEST"
- set infonames(U) "REG_UPBOTCH"
- set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first
- # set major test number and description
- proc doing {major desc} {
- global prefix description testbypassed
- if {$testbypassed != 0} {
- puts stdout "!!! bypassed $testbypassed tests in
- $prefix, `$description'"
- }
- set prefix reg-$major
- set description "reg $desc"
- set testbypassed 0
- }
- # build test number (internal)
- proc tno {testid} {
- return [join $testid .]
- }
- # build description, with possible modifiers (internal)
- proc desc {testid} {
- global description
- set d $description
- if {[llength $testid] > 1} {
- set d "([lreplace $testid 0 0]) $d"
- }
- return $d
- }
- # build trailing options and flags argument from a flags string (internal)
- proc flags {fl} {
- global xflags
- set args [list]
- set flags ""
- foreach f [split $fl ""] {
- switch -exact -- $f {
- "i" { lappend args "-nocase" }
- "x" { lappend args "-expanded" }
- "n" { lappend args "-line" }
- "p" { lappend args "-linestop" }
- "w" { lappend args "-lineanchor" }
- "-" { }
- default { append flags $f }
- }
- }
- if {[string compare $flags ""] != 0} {
- lappend args -$xflags $flags
- }
- return $args
- }
- # build info-flags list from a flags string (internal)
- proc infoflags {fl} {
- global infonames infonameorder
- set ret [list]
- foreach f [split $infonameorder ""] {
- if {[string first $f $fl] >= 0} {
- lappend ret $infonames($f)
- }
- }
- return $ret
- }
- # compilation error expected
- proc e {testid flags re err} {
- global prefix ask errorCode
- # Tcl locale stuff doesn't do the ch/xy test fakery yet
- if {[string first "+" $flags] >= 0} {
- # This will register as a skipped test
- test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
- return
- }
- # if &, test as both ARE and BRE
- set amp [string first "&" $flags]
- if {$amp >= 0} {
- set f [string range $flags 0 [expr $amp - 1]]
- append f [string range $flags [expr $amp + 1] end]
- e [linsert $testid end ARE] ${f} $re $err
- e [linsert $testid end BRE] ${f}b $re $err
- return
- }
- set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
- set run "list [catch {$cmd}] [lindex $errorCode 1]"
- test $prefix.[tno $testid] [desc $testid]
- {testregexp} $run [list 1 REG_$err]
- }
- # match failure expected
- proc f {testid flags re target args} {
- global prefix description ask
- # Tcl locale stuff doesn't do the ch/xy test fakery yet
- if {[string first "+" $flags] >= 0} {
- # This will register as a skipped test
- test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
- return
- }
- # if &, test as both ARE and BRE
- set amp [string first "&" $flags]
- if {$amp >= 0} {
- set f [string range $flags 0 [expr $amp - 1]]
- append f [string range $flags [expr $amp + 1] end]
- eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re
- $target]
- eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re
- $target]
- return
- }
- set f [flags $flags]
- set infoflags [infoflags $flags]
- set ccmd [concat [list testregexp -$ask] $f [list $re]]
- set nsub [expr [llength $args] - 1]
- if {$nsub == -1} {
- # didn't tell us number of subexps
- set ccmd "lreplace [$ccmd] 0 0"
- set info [list $infoflags]
- } else {
- set info [list $nsub $infoflags]
- }
- lappend testid "compile"
- test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
- set testid [lreplace $testid end end "execute"]
- set ecmd [concat [list testregexp] $f [list $re $target]]
- test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
- }
- # match expected, internal routine that does the work
- # parameters like the "real" routines except they don't have "opts",
- # which is a possibly-empty list of switches for the regexp match attempt
- # The ! flag is used to indicate expected match failure (for REG_EXPECT,
- # which wants argument testing even in the event of failure).
- proc matchexpected {opts testid flags re target args} {
- global prefix description ask regBug
- if {[info exists regBug] && $regBug} {
- # This will register as a skipped test
- test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
- return
- }
- # Tcl locale stuff doesn't do the ch/xy test fakery yet
- if {[string first "+" $flags] >= 0} {
- # This will register as a skipped test
- test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
- return
- }
- # if &, test as both BRE and ARE
- set amp [string first "&" $flags]
- if {$amp >= 0} {
- set f [string range $flags 0 [expr $amp - 1]]
- append f [string range $flags [expr $amp + 1] end]
- eval [concat [list matchexpected $opts
- [linsert $testid end ARE] ${f} $re $target] $args]
- eval [concat [list matchexpected $opts
- [linsert $testid end BRE] ${f}b $re $target] $args]
- return
- }
- set f [flags $flags]
- set infoflags [infoflags $flags]
- set ccmd [concat [list testregexp -$ask] $f [list $re]]
- set ecmd [concat [list testregexp] $opts $f [list $re $target]]
- set nsub [expr [llength $args] - 1]
- set names [list]
- set refs ""
- for {set i 0} {$i <= $nsub} {incr i} {
- if {$i == 0} {
- set name match
- } else {
- set name sub$i
- }
- lappend names $name
- append refs " $$name"
- set $name ""
- }
- if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge
- set nsub 0 ;# unsigned value cannot be -1
- }
- if {[string first "t" $flags] >= 0} { ;# REG_EXPECT
- incr nsub -1 ;# the extra does not count
- }
- set ecmd [concat $ecmd $names]
- set erun "list [$ecmd] $refs"
- set retcode [list 1]
- if {[string first "!" $flags] >= 0} {
- set retcode [list 0]
- }
- set result [concat $retcode $args]
- set info [list $nsub $infoflags]
- lappend testid "compile"
- test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
- set testid [lreplace $testid end end "execute"]
- test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
- }
- # match expected (no missing, empty, or ambiguous submatches)
- # m testno flags re target mat submat ...
- proc m {args} {
- eval matchexpected [linsert $args 0 [list]]
- }
- # match expected (full fanciness)
- # i testno flags re target mat submat ...
- proc i {args} {
- eval matchexpected [linsert $args 0 [list "-indices"]]
- }
- # partial match expected
- # p testno flags re target mat "" ...
- # Quirk: number of ""s must be one more than number of subREs.
- proc p {args} {
- set f [lindex $args 1] ;# add ! flag
- set args [lreplace $args 1 1 "!$f"]
- eval matchexpected [linsert $args 0 [list "-indices"]]
- }
- # test is a knownBug
- proc knownBug {args} {
- set ::regBug 1
- uplevel #0 $args
- set ::regBug 0
- }
- # the tests themselves
- # support functions and preliminary misc.
- # This is sensitive to changes in message wording, but we really have to
- # test the code->message expansion at least once.
- test reg-0.1 "regexp error reporting" {
- list [catch {regexp (*) ign} msg] $msg
- } {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
- doing 1 "basic sanity checks"
- m 1 & abc abc abc
- f 2 & abc def
- m 3 & abc xyabxabce abc
- doing 2 "invalid option combinations"
- e 1 qe a INVARG
- e 2 qa a INVARG
- e 3 qx a INVARG
- e 4 qn a INVARG
- e 5 ba a INVARG
- doing 3 "basic syntax"
- i 1 &NS "" a {0 -1}
- m 2 NS a| a a
- m 3 - a|b a a
- m 4 - a|b b b
- m 5 NS a||b b b
- m 6 & ab ab ab
- doing 4 "parentheses"
- m 1 - (a)e ae ae a
- m 2 o (a)e ae
- m 3 b {(a)b} ab ab a
- m 4 - a((b)c) abc abc bc b
- m 5 - a(b)(c) abc abc b c
- e 6 - a(b EPAREN
- e 7 b {a(b} EPAREN
- # sigh, we blew it on the specs here... someday this will be fixed in POSIX,
- # but meanwhile, it's fixed in AREs
- m 8 eU a)b a)b a)b
- e 9 - a)b EPAREN
- e 10 b {a)b} EPAREN
- m 11 P a(?:b)c abc abc
- e 12 e a(?:b)c BADRPT
- i 13 S a()b ab {0 1} {1 0}
- m 14 SP a(?:)b ab ab
- i 15 S a(|b)c ac {0 1} {1 0}
- m 16 S a(b|)c abc abc b
- doing 5 "simple one-char matching"
- # general case of brackets done later
- m 1 & a.b axb axb
- f 2 &n "a.b" "anb"
- m 3 & {a[bc]d} abd abd
- m 4 & {a[bc]d} acd acd
- f 5 & {a[bc]d} aed
- f 6 & {a[^bc]d} abd
- m 7 & {a[^bc]d} aed aed
- f 8 &p "a[^bc]d" "and"
- doing 6 "context-dependent syntax"
- # plus odds and ends
- e 1 - * BADRPT
- m 2 b * * *
- m 3 b {(*)} * * *
- e 4 - (*) BADRPT
- m 5 b ^* * *
- e 6 - ^* BADRPT
- f 7 & ^b ^b
- m 8 b x^ x^ x^
- f 9 I x^ x
- m 10 n "n^" "xnb" "n"
- f 11 bS {(^b)} ^b
- m 12 - (^b) b b b
- m 13 & {x$} x x
- m 14 bS {(x$)} x x x
- m 15 - {(x$)} x x x
- m 16 b {x$y} "x$y" "x$y"
- f 17 I {x$y} xy
- m 18 n "x$n" "xn" "xn"
- e 19 - + BADRPT
- e 20 - ? BADRPT
- doing 7 "simple quantifiers"
- m 1 &N a* aa aa
- i 2 &N a* b {0 -1}
- m 3 - a+ aa aa
- m 4 - a?b ab ab
- m 5 - a?b b b
- e 6 - ** BADRPT
- m 7 bN ** *** ***
- e 8 & a** BADRPT
- e 9 & a**b BADRPT
- e 10 & *** BADRPT
- e 11 - a++ BADRPT
- e 12 - a?+ BADRPT
- e 13 - a?* BADRPT
- e 14 - a+* BADRPT
- e 15 - a*+ BADRPT
- doing 8 "braces"
- m 1 NQ "a{0,1}" "" ""
- m 2 NQ "a{0,1}" ac a
- e 3 - "a{1,0}" BADBR
- e 4 - "a{1,2,3}" BADBR
- e 5 - "a{257}" BADBR
- e 6 - "a{1000}" BADBR
- e 7 - "a{1" EBRACE
- e 8 - "a{1n}" BADBR
- m 9 BS "a{b" "a{b" "a{b"
- m 10 BS "a{" "a{" "a{"
- m 11 bQ "a\{0,1\}b" cb b
- e 12 b "a\{0,1" EBRACE
- e 13 - "a{0,1\" BADBR
- m 14 Q "a{0}b" ab b
- m 15 Q "a{0,0}b" ab b
- m 16 Q "a{0,1}b" ab ab
- m 17 Q "a{0,2}b" b b
- m 18 Q "a{0,2}b" aab aab
- m 19 Q "a{0,}b" aab aab
- m 20 Q "a{1,1}b" aab ab
- m 21 Q "a{1,3}b" aaaab aaab
- f 22 Q "a{1,3}b" b
- m 23 Q "a{1,}b" aab aab
- f 24 Q "a{2,3}b" ab
- m 25 Q "a{2,3}b" aaaab aaab
- f 26 Q "a{2,}b" ab
- m 27 Q "a{2,}b" aaaab aaaab
- doing 9 "brackets"
- m 1 & {a[bc]} ac ac
- m 2 & {a[-]} a- a-
- m 3 & {a[[.-.]]} a- a-
- m 4 &L {a[[.zero.]]} a0 a0
- m 5 &LM {a[[.zero.]-9]} a2 a2
- m 6 &M {a[0-[.9.]]} a2 a2
- m 7 &+L {a[[=x=]]} ax ax
- m 8 &+L {a[[=x=]]} ay ay
- f 9 &+L {a[[=x=]]} az
- e 10 & {a[0-[=x=]]} ERANGE
- m 11 &L {a[[:digit:]]} a0 a0
- e 12 & {a[[:woopsie:]]} ECTYPE
- f 13 &L {a[[:digit:]]} ab
- e 14 & {a[0-[:digit:]]} ERANGE
- m 15 &LP {[[:<:]]a} a a
- m 16 &LP {a[[:>:]]} a a
- e 17 & {a[[..]]b} ECOLLATE
- e 18 & {a[[==]]b} ECOLLATE
- e 19 & {a[[::]]b} ECTYPE
- e 20 & {a[[.a} EBRACK
- e 21 & {a[[=a} EBRACK
- e 22 & {a[[:a} EBRACK
- e 23 & {a[} EBRACK
- e 24 & {a[b} EBRACK
- e 25 & {a[b-} EBRACK
- e 26 & {a[b-c} EBRACK
- m 27 &M {a[b-c]} ab ab
- m 28 & {a[b-b]} ab ab
- m 29 &M {a[1-2]} a2 a2
- e 30 & {a[c-b]} ERANGE
- e 31 & {a[a-b-c]} ERANGE
- m 32 &M {a[--?]b} a?b a?b
- m 33 & {a[---]b} a-b a-b
- m 34 & {a[]b]c} a]c a]c
- m 35 EP {a[]]b} a]b a]b
- f 36 bE {a[]]b} a]b
- m 37 bE {a[]]b} "a\]b" "a\]b"
- m 38 eE {a[]]b} "a\]b" "a\]b"
- m 39 EP {a[\]b} "a\b" "a\b"
- m 40 eE {a[\]b} "a\b" "a\b"
- m 41 bE {a[\]b} "a\b" "a\b"
- e 42 - {a[Z]b} EESCAPE
- m 43 & {a[[b]c} "a[c" "a[c"
- m 44 EMP* {a[u00fe-u0507][u00ff-u0300]b}
- "au0102u02ffb" "au0102u02ffb"
- doing 10 "anchors and newlines"
- m 1 & ^a a a
- f 2 &^ ^a a
- i 3 &N ^ a {0 -1}
- i 4 & {a$} aba {2 2}
- f 5 {&$} {a$} a
- i 6 &N {$} ab {2 1}
- m 7 &n ^a a a
- m 8 &n "^a" "bna" "a"
- i 9 &w "^a" "ana" {0 0}
- i 10 &n^ "^a" "ana" {2 2}
- m 11 &n {a$} a a
- m 12 &n "a$" "anb" "a"
- i 13 &n "a$" "ana" {0 0}
- i 14 N ^^ a {0 -1}
- m 15 b ^^ ^ ^
- i 16 N {$$} a {1 0}
- m 17 b {$$} "$" "$"
- m 18 &N {^$} "" ""
- f 19 &N {^$} a
- i 20 &nN "^$" "annb" {2 1}
- m 21 N {$^} "" ""
- m 22 b {$^} "$^" "$^"
- m 23 P {Aa} a a
- m 24 ^P {Aa} a a
- f 25 ^nP {Aa} "bna"
- m 26 P {aZ} a a
- m 27 {$P} {aZ} a a
- f 28 {$nP} {aZ} "anb"
- e 29 - ^* BADRPT
- e 30 - {$*} BADRPT
- e 31 - {A*} BADRPT
- e 32 - {Z*} BADRPT
- doing 11 "boundary constraints"
- m 1 &LP {[[:<:]]a} a a
- m 2 &LP {[[:<:]]a} -a a
- f 3 &LP {[[:<:]]a} ba
- m 4 &LP {a[[:>:]]} a a
- m 5 &LP {a[[:>:]]} a- a
- f 6 &LP {a[[:>:]]} ab
- m 7 bLP {<a} a a
- f 8 bLP {<a} ba
- m 9 bLP {a>} a a
- f 10 bLP {a>} ab
- m 11 LP {ya} a a
- f 12 LP {ya} ba
- m 13 LP {ay} a a
- f 14 LP {ay} ab
- m 15 LP {aY} ab a
- f 16 LP {aY} a-
- f 17 LP {aY} a
- f 18 LP {-Y} -a
- m 19 LP {-Y} -% -
- f 20 LP {Y-} a-
- e 21 - {[[:<:]]*} BADRPT
- e 22 - {[[:>:]]*} BADRPT
- e 23 b {<*} BADRPT
- e 24 b {>*} BADRPT
- e 25 - {y*} BADRPT
- e 26 - {Y*} BADRPT
- m 27 LP {ma} a a
- f 28 LP {ma} ba
- m 29 LP {aM} a a
- f 30 LP {aM} ab
- f 31 ILP {Ma} a
- f 32 ILP {am} a
- doing 12 "character classes"
- m 1 LP {adb} a0b a0b
- f 2 LP {adb} axb
- f 3 LP {aDb} a0b
- m 4 LP {aDb} axb axb
- m 5 LP "a\sb" "a b" "a b"
- m 6 LP "a\sb" "atb" "atb"
- m 7 LP "a\sb" "anb" "anb"
- f 8 LP {asb} axb
- m 9 LP {aSb} axb axb
- f 10 LP "a\Sb" "a b"
- m 11 LP {awb} axb axb
- f 12 LP {awb} a-b
- f 13 LP {aWb} axb
- m 14 LP {aWb} a-b a-b
- m 15 LP {yw+zy} adze-guz guz
- m 16 LPE {a[d]b} a1b a1b
- m 17 LPE "a[\s]b" "a b" "a b"
- m 18 LPE {a[w]b} axb axb
- doing 13 "escapes"
- e 1 & "a\" EESCAPE
- m 2 - {a<b} a<b a<b
- m 3 e {a<b} a<b a<b
- m 4 bAS {awb} awb awb
- m 5 eAS {awb} awb awb
- m 6 PL "a\ab" "a 07b" "a 07b"
- m 7 P "a\bb" "abb" "abb"
- m 8 P {aBb} "a\b" "a\b"
- m 9 MP "a\chb" "abb" "abb"
- m 10 MP "a\cHb" "abb" "abb"
- m 11 LMP "a\e" "a 33" "a 33"
- m 12 P "a\fb" "afb" "afb"
- m 13 P "a\nb" "anb" "anb"
- m 14 P "a\rb" "arb" "arb"
- m 15 P "a\tb" "atb" "atb"
- m 16 P "a\u0008x" "abx" "abx"
- e 17 - {au008x} EESCAPE
- m 18 P "a\u00088x" "ab8x" "ab8x"
- m 19 P "a\U00000008x" "abx" "abx"
- e 20 - {aU0000008x} EESCAPE
- m 21 P "a\vb" "avb" "avb"
- m 22 MP "a\x08x" "abx" "abx"
- e 23 - {axq} EESCAPE
- m 24 MP "a\x0008x" "abx" "abx"
- e 25 - {az} EESCAPE
- m 26 MP "a\010b" "abb" "abb"
- doing 14 "back references"
- # ugh
- m 1 RP {a(b*)c1} abbcbb abbcbb bb
- m 2 RP {a(b*)c1} ac ac ""
- f 3 RP {a(b*)c1} abbcb
- m 4 RP {a(b*)1} abbcbb abb b
- m 5 RP {a(b|bb)1} abbcbb abb b
- m 6 RP {a([bc])1} abb abb b
- f 7 RP {a([bc])1} abc
- m 8 RP {a([bc])1} abcabb abb b
- f 9 RP {a([bc])*1} abc
- f 10 RP {a([bc])1} abB
- m 11 iRP {a([bc])1} abB abB b
- m 12 RP {a([bc])1+} abbb abbb b
- m 13 QRP "a([bc])\1{3,4}" abbbb abbbb b
- f 14 QRP "a([bc])\1{3,4}" abbb
- m 15 RP {a([bc])1*} abbb abbb b
- m 16 RP {a([bc])1*} ab ab b
- m 17 RP {a([bc])(1*)} ab ab b ""
- e 18 - {a((b)1)} ESUBREG
- e 19 - {a(b)c2} ESUBREG
- m 20 bR {a(b*)c1} abbcbb abbcbb bb
- doing 15 "octal escapes vs back references"
- # initial zero is always octal
- m 1 MP "a\010b" "abb" "abb"
- m 2 MP "a\0070b" "a 070b" "a 070b"
- m 3 MP "a\07b" "a 07b" "a 07b"
- m 4 MP "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\07c" "abbbbbbbbbb 07c"
- "abbbbbbbbbb 07c" "b" "b" "b" "b" "b" "b"
- "b" "b" "b" "b"
- # a single digit is always a backref
- e 5 - {a7b} ESUBREG
- # otherwise it's a backref only if within range (barf!)
- m 6 MP "a\10b" "abb" "abb"
- m 7 MP {a101b} aAb aAb
- m 8 RP {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)10c} abbbbbbbbbbbc
- abbbbbbbbbbbc b b b b b b b
- b b b
- # but we're fussy about border cases -- guys who want octal should use the zero
- e 9 - {a((((((((((b10))))))))))c} ESUBREG
- # BREs don't have octal, EREs don't have backrefs
- m 10 MP "a\12b" "anb" "anb"
- e 11 b {a12b} ESUBREG
- m 12 eAS {a12b} a12b a12b
- doing 16 "expanded syntax"
- m 1 xP "a b c" "abc" "abc"
- m 2 xP "a b #oopsnctd" "abcd" "abcd"
- m 3 x "a\ b\tc" "a btc" "a btc"
- m 4 xP "a b\#c" "ab#c" "ab#c"
- m 5 xP "a b[c d]e" "ab e" "ab e"
- m 6 xP "a b[c#d]e" "ab#e" "ab#e"
- m 7 xP "a b[c#d]e" "abde" "abde"
- m 8 xSPB "ab{ d" "ab{d" "ab{d"
- m 9 xPQ "ab{ 1 , 2 }c" "abc" "abc"
- doing 17 "misc syntax"
- m 1 P a(?#comment)b ab ab
- doing 18 "unmatchable REs"
- f 1 I a^b ab
- doing 19 "case independence"
- m 1 &i ab Ab Ab
- m 2 &i {a[bc]} aC aC
- f 3 &i {a[^bc]} aB
- m 4 &iM {a[b-d]} aC aC
- f 5 &iM {a[^b-d]} aC
- doing 20 "directors and embedded options"
- e 1 & ***? BADPAT
- m 2 q ***? ***? ***?
- m 3 &P ***=a*b a*b a*b
- m 4 q ***=a*b ***=a*b ***=a*b
- m 5 bLP {***:w+} ab ab
- m 6 eLP {***:w+} ab ab
- e 7 & ***:***=a*b BADRPT
- m 8 &P ***:(?b)a+b a+b a+b
- m 9 P (?b)a+b a+b a+b
- e 10 e {(?b)w+} BADRPT
- m 11 bAS {(?b)w+} (?b)w+ (?b)w+
- m 12 iP (?c)a a a
- f 13 iP (?c)a A
- m 14 APS {(?e)W+} WW WW
- m 15 P (?i)a+ Aa Aa
- f 16 P "(?m)a.b" "anb"
- m 17 P "(?m)^b" "anb" "b"
- f 18 P "(?n)a.b" "anb"
- m 19 P "(?n)^b" "anb" "b"
- f 20 P "(?p)a.b" "anb"
- f 21 P "(?p)^b" "anb"
- m 22 P (?q)a+b a+b a+b
- m 23 nP "(?s)a.b" "anb" "anb"
- m 24 xP "(?t)a b" "a b" "a b"
- m 25 P "(?w)a.b" "anb" "anb"
- m 26 P "(?w)^b" "anb" "b"
- m 27 P "(?x)a b" "ab" "ab"
- e 28 - (?z)ab BADOPT
- m 29 P (?ici)a+ Aa Aa
- e 30 P (?i)(?q)a+ BADRPT
- m 31 P (?q)(?i)a+ (?i)a+ (?i)a+
- m 32 P (?qe)a+ a a
- m 33 xP "(?q)a b" "a b" "a b"
- m 34 P "(?qx)a b" "a b" "a b"
- m 35 P (?qi)ab Ab Ab
- doing 21 "capturing"
- m 1 - a(b)c abc abc b
- m 2 P a(?:b)c xabc abc
- m 3 - a((b))c xabcy abc b b
- m 4 P a(?:(b))c abcy abc b
- m 5 P a((?:b))c abc abc b
- m 6 P a(?:(?:b))c abc abc
- i 7 Q "a(b){0}c" ac {0 1} {-1 -1}
- m 8 - a(b)c(d)e abcde abcde b d
- m 9 - (b)c(d)e bcde bcde b d
- m 10 - a(b)(d)e abde abde b d
- m 11 - a(b)c(d) abcd abcd b d
- m 12 - (ab)(cd) xabcdy abcd ab cd
- m 13 - a(b)?c xabcy abc b
- i 14 - a(b)?c xacy {1 2} {-1 -1}
- m 15 - a(b)?c(d)?e xabcdey abcde b d
- i 16 - a(b)?c(d)?e xacdey {1 4} {-1 -1} {3 3}
- i 17 - a(b)?c(d)?e xabcey {1 4} {2 2} {-1 -1}
- i 18 - a(b)?c(d)?e xacey {1 3} {-1 -1} {-1 -1}
- m 19 - a(b)*c xabcy abc b
- i 20 - a(b)*c xabbbcy {1 5} {4 4}
- i 21 - a(b)*c xacy {1 2} {-1 -1}
- m 22 - a(b*)c xabbbcy abbbc bbb
- m 23 - a(b*)c xacy ac ""
- f 24 - a(b)+c xacy
- m 25 - a(b)+c xabcy abc b
- i 26 - a(b)+c xabbbcy {1 5} {4 4}
- m 27 - a(b+)c xabbbcy abbbc bbb
- i 28 Q "a(b){2,3}c" xabbbcy {1 5} {4 4}
- i 29 Q "a(b){2,3}c" xabbcy {1 4} {3 3}
- f 30 Q "a(b){2,3}c" xabcy
- m 31 LP "\y(\w+)\y" "-- abc-" "abc" "abc"
- m 32 - a((b|c)d+)+ abacdbd acdbd bd b
- m 33 N (.*).* abc abc abc
- m 34 N (a*)* bc "" ""
- doing 22 "multicharacter collating elements"
- # again ugh
- m 1 &+L {a[c]e} ace ace
- f 2 &+IL {a[c]h} ach
- m 3 &+L {a[[.ch.]]} ach ach
- f 4 &+L {a[[.ch.]]} ace
- m 5 &+L {a[c[.ch.]]} ac ac
- m 6 &+L {a[c[.ch.]]} ace ac
- m 7 &+L {a[c[.ch.]]} ache ach
- f 8 &+L {a[^c]e} ace
- m 9 &+L {a[^c]e} abe abe
- m 10 &+L {a[^c]e} ache ache
- f 11 &+L {a[^[.ch.]]} ach
- m 12 &+L {a[^[.ch.]]} ace ac
- m 13 &+L {a[^[.ch.]]} ac ac
- m 14 &+L {a[^[.ch.]]} abe ab
- f 15 &+L {a[^c[.ch.]]} ach
- f 16 &+L {a[^c[.ch.]]} ace
- f 17 &+L {a[^c[.ch.]]} ac
- m 18 &+L {a[^c[.ch.]]} abe ab
- m 19 &+L {a[^b]} ac ac
- m 20 &+L {a[^b]} ace ac
- m 21 &+L {a[^b]} ach ach
- f 22 &+L {a[^b]} abe
- doing 23 "lookahead constraints"
- m 1 HP a(?=b)b* ab ab
- f 2 HP a(?=b)b* a
- m 3 HP a(?=b)b*(?=c)c* abc abc
- f 4 HP a(?=b)b*(?=c)c* ab
- f 5 HP a(?!b)b* ab
- m 6 HP a(?!b)b* a a
- m 7 HP (?=b)b b b
- f 8 HP (?=b)b a
- doing 24 "non-greedy quantifiers"
- m 1 PT ab+? abb ab
- m 2 PT ab+?c abbc abbc
- m 3 PT ab*? abb a
- m 4 PT ab*?c abbc abbc
- m 5 PT ab?? ab a
- m 6 PT ab??c abc abc
- m 7 PQT "ab{2,4}?" abbbb abb
- m 8 PQT "ab{2,4}?c" abbbbc abbbbc
- m 9 - 3z* 123zzzz456 3zzzz
- m 10 PT 3z*? 123zzzz456 3
- m 11 - z*4 123zzzz456 zzzz4
- m 12 PT z*?4 123zzzz456 zzzz4
- doing 25 "mixed quantifiers"
- # this is very incomplete as yet
- # should include |
- m 1 PNT {^(.*?)(a*)$} xyza xyza xyz a
- m 2 PNT {^(.*?)(a*)$} xyzaa xyzaa xyz aa
- m 3 PNT {^(.*?)(a*)$} xyz xyz xyz ""
- doing 26 "tricky cases"
- # attempts to trick the matcher into accepting a short match
- m 1 - (week|wee)(night|knights) weeknights weeknights
- wee knights
- m 2 RP {a(bc*).*1} abccbccb abccbccb b
- m 3 - {a(b.[bc]*)+} abcbd abcbd bd
- doing 27 "implementation misc."
- # duplicate arcs are suppressed
- m 1 P a(?:b|b)c abc abc
- # make color/subcolor relationship go back and forth
- m 2 & {[ab][ab][ab]} aba aba
- m 3 & {[ab][ab][ab][ab][ab][ab][ab]} abababa abababa
- doing 28 "boundary busters etc."
- # color-descriptor allocation changes at 10
- m 1 & abcdefghijkl abcdefghijkl abcdefghijkl
- # so does arc allocation
- m 2 P a(?:b|c|d|e|f|g|h|i|j|k|l|m)n agn agn
- # subexpression tracking also at 10
- m 3 - a(((((((((((((b)))))))))))))c abc abc b b b b b b b b b b b b b
- # state-set handling changes slightly at unsigned size (might be 64...)
- # (also stresses arc allocation)
- m 4 Q "ab{1,100}c" abbc abbc
- m 5 Q "ab{1,100}c" abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
- abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
- m 6 Q "ab{1,100}c"
- abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
- abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
- # force small cache and bust it, several ways
- m 7 LP {w+abcdefgh} xyzabcdefgh xyzabcdefgh
- m 8 %LP {w+abcdefgh} xyzabcdefgh xyzabcdefgh
- m 9 %LP {w+abcdefghijklmnopqrst} xyzabcdefghijklmnopqrst
- xyzabcdefghijklmnopqrst
- i 10 %LP {w+(abcdefgh)?} xyz {0 2} {-1 -1}
- i 11 %LP {w+(abcdefgh)?} xyzabcdefg {0 9} {-1 -1}
- i 12 %LP {w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs
- {0 21} {-1 -1}
- doing 29 "incomplete matches"
- p 1 t def abc {3 2} ""
- p 2 t bcd abc {1 2} ""
- p 3 t abc abab {0 3} ""
- p 4 t abc abdab {3 4} ""
- i 5 t abc abc {0 2} {0 2}
- i 6 t abc xyabc {2 4} {2 4}
- p 7 t abc+ xyab {2 3} ""
- i 8 t abc+ xyabc {2 4} {2 4}
- knownBug i 9 t abc+ xyabcd {2 4} {6 5}
- i 10 t abc+ xyabcdd {2 4} {7 6}
- p 11 tPT abc+? xyab {2 3} ""
- # the retain numbers in these two may look wrong, but they aren't
- i 12 tPT abc+? xyabc {2 4} {5 4}
- i 13 tPT abc+? xyabcc {2 4} {6 5}
- i 14 tPT abc+? xyabcd {2 4} {6 5}
- i 15 tPT abc+? xyabcdd {2 4} {7 6}
- i 16 t abcd|bc xyabc {3 4} {2 4}
- p 17 tn .*k "xxnyyy" {3 5} ""
- doing 30 "misc. oddities and old bugs"
- e 1 & *** BADRPT
- m 2 N a?b* abb abb
- m 3 N a?b* bb bb
- m 4 & a*b aab aab
- m 5 & ^a*b aaaab aaaab
- m 6 &M {[0-6][1-2][0-3][0-6][1-6][0-6]} 010010 010010
- # temporary REG_BOSONLY kludge
- m 7 s abc abcd abc
- f 8 s abc xabcd
- # back to normal stuff
- m 9 HLP {(?n)^(?![t#])S+} "tknn#n#nit0" it0
- # flush any leftover complaints
- doing 0 "flush"
- # Tests resulting from bugs reported by users
- test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
- set str {2:::DebugWin32}
- set re {([[:xdigit:]])([[:space:]]*)}
- list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
- # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
- } {1 2 2 {}}
- test reg-32.1 {canmatch functionality -- at end} testregexp {
- set pat {blah}
- set line "asd asd"
- # can match at the final d, if '%' follows
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 7}
- test reg-32.2 {canmatch functionality -- at end} testregexp {
- set pat {s%$}
- set line "asd asd"
- # can only match after the end of the string
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 7}
- test reg-32.3 {canmatch functionality -- not last char} testregexp {
- set pat {[^d]%$}
- set line "asd asd"
- # can only match after the end of the string
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 7}
- test reg-32.3.1 {canmatch functionality -- no match} testregexp {
- set pat {Zx}
- set line "asd asd"
- # can match the last char, if followed by x
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 -1}
- test reg-32.4 {canmatch functionality -- last char} {knownBug} {
- set pat {.x}
- set line "asd asd"
- # can match the last char, if followed by x
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
- set pat {.x$}
- set line "asd asd"
- # can match the last char, if followed by x
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- test reg-32.5 {canmatch functionality -- last char} {knownBug} {
- set pat {.[^d]x$}
- set line "asd asd"
- # can match the last char, if followed by not-d and x.
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- test reg-32.6 {canmatch functionality -- last char} {knownBug} {
- set pat {[^a]%[^rn]*$}
- set line "asd asd"
- # can match at the final d, if '%' follows
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- test reg-32.7 {canmatch functionality -- last char} {knownBug} {
- set pat {[^a]%$}
- set line "asd asd"
- # can match at the final d, if '%' follows
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- test reg-32.8 {canmatch functionality -- last char} {knownBug} {
- set pat {[^x]%$}
- set line "asd asd"
- # can match at the final d, if '%' follows
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
- set pat {((BB|Bh+line)[ t]*|[^B]%[^rn]*)$}
- set line "asd asd"
- # can match at the final d, if '%' follows
- set res [testregexp -xflags -- c $pat $line resvar]
- lappend res $resvar
- } {0 6}
- # Tests reg-33.*: Checks for bug fixes
- test reg-33.1 {Bug 230589} {
- regexp {[ ]*(^|[^%])%V} "*%V2" m s
- } 1
- test reg-33.2 {Bug 504785} {
- regexp -inline {([^_.]*)([^.]*).(..)(.).*} bbcos_001_c01.q1la
- } {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
- test reg-33.3 {Bug 505048} {
- regexp {As*[^<]*s*<([^>]+)>} a<a>
- } 1
- test reg-33.4 {Bug 505048} {
- regexp {As*([^b]*)b} ab
- } 1
- test reg-33.5 {Bug 505048} {
- regexp {As*[^b]*(b)} ab
- } 1
- test reg-33.6 {Bug 505048} {
- regexp {A(s*)[^b]*(b)} ab
- } 1
- test reg-33.7 {Bug 505048} {
- regexp {As*[^b]*b} ab
- } 1
- test reg-33.8 {Bug 505048} {
- regexp -inline {As*[^b]*b} ab
- } ab
- test reg-33.9 {Bug 505048} {
- regexp -indices -inline {As*[^b]*b} ab
- } {{0 1}}
- test reg-33.10 {Bug 840258} {
- regsub {(^|n)+.*b} n.b {} tmp
- } 1
- test reg-33.11 {Bug 840258} {
- regsub {(^|[nr]+).*?<.*?(n|r)+}
- "TQrn.?<5000267>Test already stoppedrn" {} tmp
- } 1
- test reg-33.12 {Bug 1810264 - bad read} {
- regexp {3161573148} {3161573148}
- } 0
- test reg-33.13 {Bug 1810264 - infinite loop} {
- regexp {($|^)*} {x}
- } 1
- test reg-33.14 {Bug 1810264 - super-expensive expression} {
- set start [clock seconds]
- regexp {(x{200}){200}$y} {x}
- set time [expr {[clock seconds] - $start}]
- expr {$time < 5 ? "ok" : "Complex RE took $time seconds - bad!"}
- } ok
- # cleanup
- ::tcltest::cleanupTests
- return