utf.test
上传用户:rrhhcc
上传日期:2015-12-11
资源大小:54129k
文件大小:9k
- # This file contains a collection of tests for tclUtf.c
- # Sourcing this file into Tcl runs the tests and generates output for
- # errors. No output means no errors were found.
- #
- # Copyright (c) 1997 Sun Microsystems, Inc.
- # Copyright (c) 1998-1999 by Scriptics Corporation.
- #
- # See the file "license.terms" for information on usage and redistribution
- # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- #
- # RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $
- if {[lsearch [namespace children] ::tcltest] == -1} {
- package require tcltest 2
- namespace import -force ::tcltest::*
- }
- catch {unset x}
- test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
- set x x01
- } [bytestring "x01"]
- test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
- set x "x00"
- } [bytestring "xc0x80"]
- test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
- set x "xe0"
- } [bytestring "xc3xa0"]
- test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
- set x "u4e4e"
- } [bytestring "xe4xb9x8e"]
- test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
- string length [format %c -1]
- } 1
- test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
- string length "abc"
- } {3}
- test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
- string length [bytestring "x82x83x84"]
- } {3}
- test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
- string length [bytestring "xC2"]
- } {1}
- test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
- string length [bytestring "xC2xa2"]
- } {1}
- test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
- string length [bytestring "xE2"]
- } {1}
- test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
- string length [bytestring "xE2xA2"]
- } {2}
- test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
- string length [bytestring "xE4xb9x8e"]
- } {1}
- test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
- string length [bytestring "xF4xA2xA2xA2"]
- } {4}
- test utf-3.1 {Tcl_UtfCharComplete} {
- } {}
- testConstraint testnumutfchars [llength [info commands testnumutfchars]]
- test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
- testnumutfchars ""
- } {0}
- test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
- testnumutfchars [bytestring "xC2xA2"]
- } {1}
- test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
- testnumutfchars [bytestring "abcxC2xA2xe4xb9x8euA2u4e4e"]
- } {7}
- test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
- testnumutfchars [bytestring "xC0x80"]
- } {1}
- test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
- testnumutfchars "" 1
- } {0}
- test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
- testnumutfchars [bytestring "xC2xA2"] 1
- } {1}
- test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
- testnumutfchars [bytestring "abcxC2xA2xe4xb9x8euA2u4e4e"] 1
- } {7}
- test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
- testnumutfchars [bytestring "xC0x80"] 1
- } {1}
- test utf-5.1 {Tcl_UtfFindFirsts} {
- } {}
- test utf-6.1 {Tcl_UtfNext} {
- } {}
- test utf-7.1 {Tcl_UtfPrev} {
- } {}
- test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
- string index abcd 0
- } {a}
- test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
- string index u4e4eu25a 0
- } "u4e4e"
- test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
- string index abcd 2
- } {c}
- test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
- string index u4e4eu25axffu543 2
- } "uff"
- test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
- string range abcd 0 2
- } {abc}
- test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
- string range u4e4eu25axffu543klmnop 1 5
- } "u25axffu543kl"
- test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
- set x n
- } {
- }
- test utf-10.2 {Tcl_UtfBackslash: u subst} {
- set x ua2
- } [bytestring "xc2xa2"]
- test utf-10.3 {Tcl_UtfBackslash: longer u subst} {
- set x u4e21
- } [bytestring "xe4xb8xa1"]
- test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
- set x u4e2k
- } "[bytestring xd3xa2]k"
- test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
- set x u4e216
- } "[bytestring xe4xb8xa1]6"
- proc bsCheck {char num} {
- global errNum
- test utf-10.$errNum {backslash substitution} {
- scan $char %c value
- set value
- } $num
- incr errNum
- }
- set errNum 6
- bsCheck b 8
- bsCheck e 101
- bsCheck f 12
- bsCheck n 10
- bsCheck r 13
- bsCheck t 9
- bsCheck v 11
- bsCheck { 123
- bsCheck } 125
- bsCheck [ 91
- bsCheck ] 93
- bsCheck $ 36
- bsCheck 32
- bsCheck ; 59
- bsCheck \ 92
- bsCheck Ca 67
- bsCheck Ma 77
- bsCheck CMa 67
- # prior to 8.3, this returned 8, as 8 as accepted as an
- # octal value - but it isn't! [Bug: 3975]
- bsCheck 8a 56
- bsCheck 14 12
- bsCheck 141 97
- bsCheck b 98
- bsCheck x 120
- bsCheck xa 10
- bsCheck xA 10
- bsCheck x41 65
- bsCheck x541 65
- bsCheck u 117
- bsCheck uk 117
- bsCheck u41 65
- bsCheck ua 10
- bsCheck uA 10
- bsCheck 340 224
- bsCheck ua1 161
- bsCheck u4e21 20001
- test utf-11.1 {Tcl_UtfToUpper} {
- string toupper {}
- } {}
- test utf-11.2 {Tcl_UtfToUpper} {
- string toupper abc
- } ABC
- test utf-11.3 {Tcl_UtfToUpper} {
- string toupper u00e3ab
- } u00c3AB
- test utf-11.4 {Tcl_UtfToUpper} {
- string toupper u01e3ab
- } u01e2AB
- test utf-12.1 {Tcl_UtfToLower} {
- string tolower {}
- } {}
- test utf-12.2 {Tcl_UtfToLower} {
- string tolower ABC
- } abc
- test utf-12.3 {Tcl_UtfToLower} {
- string tolower u00c3AB
- } u00e3ab
- test utf-12.4 {Tcl_UtfToLower} {
- string tolower u01e2AB
- } u01e3ab
- test utf-13.1 {Tcl_UtfToTitle} {
- string totitle {}
- } {}
- test utf-13.2 {Tcl_UtfToTitle} {
- string totitle abc
- } Abc
- test utf-13.3 {Tcl_UtfToTitle} {
- string totitle u00e3ab
- } u00c3ab
- test utf-13.4 {Tcl_UtfToTitle} {
- string totitle u01f3ab
- } u01f2ab
- test utf-14.1 {Tcl_UtfNcasecmp} {
- string compare -nocase a b
- } -1
- test utf-14.2 {Tcl_UtfNcasecmp} {
- string compare -nocase b a
- } 1
- test utf-14.3 {Tcl_UtfNcasecmp} {
- string compare -nocase B a
- } 1
- test utf-14.4 {Tcl_UtfNcasecmp} {
- string compare -nocase aBcB abca
- } 1
- test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
- string toupper aA
- } AA
- test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
- string toupper u0178u00ff
- } u0178u0178
- test utf-15.3 {Tcl_UniCharToUpper, no delta} {
- string toupper !
- } !
- test utf-16.1 {Tcl_UniCharToLower, negative delta} {
- string tolower aA
- } aa
- test utf-16.2 {Tcl_UniCharToLower, positive delta} {
- string tolower u0178u00ff
- } u00ffu00ff
- test utf-17.1 {Tcl_UniCharToLower, no delta} {
- string tolower !
- } !
- test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
- string totitle u01c4
- } u01c5
- test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
- string totitle u01c6
- } u01c5
- test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
- string totitle u017f
- } u0053
- test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
- string totitle u00ff
- } u0178
- test utf-18.5 {Tcl_UniCharToTitle, no delta} {
- string totitle !
- } !
- test utf-19.1 {TclUniCharLen} {
- list [regexp \d abc456def foo] $foo
- } {1 4}
- test utf-20.1 {TclUniCharNcmp} {
- } {}
- test utf-21.1 {TclUniCharIsAlnum} {
- # this returns 1 with Unicode 3 compliance
- string is alnum u1040u021f
- } {1}
- test utf-21.2 {unicode alnum char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- list [regexp {^[[:alnum:]]+$} u1040u021f] [regexp {^w+$} u1040u021f]
- } {1 1}
- test utf-22.1 {TclUniCharIsWordChar} {
- string wordend "xyz123_bar fg" 0
- } 10
- test utf-22.2 {TclUniCharIsWordChar} {
- string wordend "xu5080z123_baru203c fg" 0
- } 10
- test utf-23.1 {TclUniCharIsAlpha} {
- # this returns 1 with Unicode 3 compliance
- string is alpha u021f
- } {1}
- test utf-23.2 {unicode alpha char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- regexp {^[[:alpha:]]+$} u021f
- } {1}
- test utf-24.1 {TclUniCharIsDigit} {
- # this returns 1 with Unicode 3 compliance
- string is digit u1040
- } {1}
- test utf-24.2 {unicode digit char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- list [regexp {^[[:digit:]]+$} u1040] [regexp {^d+$} u1040]
- } {1 1}
- test utf-24.3 {TclUniCharIsSpace} {
- # this returns 1 with Unicode 3 compliance
- string is space u1680
- } {1}
- test utf-24.4 {unicode space char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- list [regexp {^[[:space:]]+$} u1680] [regexp {^s+$} u1680]
- } {1 1}
- testConstraint teststringobj [llength [info commands teststringobj]]
- test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj {
- testobj freeallvars
- teststringobj set 1 a
- teststringobj set 2 b
- teststringobj getunicode 1
- teststringobj getunicode 2
- string compare -nocase [teststringobj get 1] [teststringobj get 2]
- } -1
- test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj {
- testobj freeallvars
- teststringobj set 1 b
- teststringobj set 2 a
- teststringobj getunicode 1
- teststringobj getunicode 2
- string compare -nocase [teststringobj get 1] [teststringobj get 2]
- } 1
- test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj {
- testobj freeallvars
- teststringobj set 1 B
- teststringobj set 2 a
- teststringobj getunicode 1
- teststringobj getunicode 2
- string compare -nocase [teststringobj get 1] [teststringobj get 2]
- } 1
- test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj {
- testobj freeallvars
- teststringobj set 1 aBcB
- teststringobj set 2 abca
- teststringobj getunicode 1
- teststringobj getunicode 2
- string compare -nocase [teststringobj get 1] [teststringobj get 2]
- } 1
- # cleanup
- ::tcltest::cleanupTests
- return