encoding.test
上传用户:rrhhcc
上传日期:2015-12-11
资源大小:54129k
文件大小:18k
- # This file contains a collection of tests for tclEncoding.c
- # Sourcing this file into Tcl runs the tests and generates output for
- # errors. No output means no errors were found.
- #
- # Copyright (c) 1997 Sun Microsystems, Inc.
- # Copyright (c) 1998-1999 by Scriptics Corporation.
- #
- # See the file "license.terms" for information on usage and redistribution
- # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- #
- # RCS: @(#) $Id: encoding.test,v 1.16.2.3 2006/10/05 21:24:56 hobbs Exp $
- package require tcltest 2
- namespace import -force ::tcltest::*
- proc toutf {args} {
- global x
- lappend x "toutf $args"
- }
- proc fromutf {args} {
- global x
- lappend x "fromutf $args"
- }
- # Some tests require the testencoding command
- testConstraint testencoding [llength [info commands testencoding]]
- testConstraint exec [llength [info commands exec]]
- # TclInitEncodingSubsystem is tested by the rest of this file
- # TclFinalizeEncodingSubsystem is not currently tested
- test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} {
- testencoding create foo toutf fromutf
- set old [encoding system]
- encoding system foo
- set x {}
- encoding convertto abcd
- encoding system $old
- testencoding delete foo
- set x
- } {{fromutf }}
- test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
- testencoding create foo toutf fromutf
- set x {}
- encoding convertto foo abcd
- testencoding delete foo
- set x
- } {{fromutf }}
- test encoding-1.3 {Tcl_GetEncoding: load encoding} {
- list [encoding convertto jis0208 u4e4e]
- [encoding convertfrom jis0208 8C]
- } "8C u4e4e"
- test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
- encoding convertto jis0208 u4e4e
- } {8C}
- test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} {
- set system [encoding system]
- set path [testencoding path]
- encoding system shiftjis ;# incr ref count
- testencoding path [list [pwd]]
- set x [encoding convertto shiftjis u4e4e] ;# old one found
- encoding system identity
- lappend x [catch {encoding convertto shiftjis u4e4e} msg] $msg
- encoding system identity
- testencoding path $path
- encoding system $system
- set x
- } "u008cu00c1 1 {unknown encoding "shiftjis"}"
- test encoding-3.1 {Tcl_GetEncodingName, NULL} {
- set old [encoding system]
- encoding system shiftjis
- set x [encoding system]
- encoding system $old
- set x
- } {shiftjis}
- test encoding-3.2 {Tcl_GetEncodingName, non-null} {
- set old [fconfigure stdout -encoding]
- fconfigure stdout -encoding jis0208
- set x [fconfigure stdout -encoding]
- fconfigure stdout -encoding $old
- set x
- } {jis0208}
- test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} {
- cd [makeDirectory tmp]
- makeDirectory [file join tmp encoding]
- makeFile {} [file join tmp encoding junk.enc]
- makeFile {} [file join tmp encoding junk2.enc]
- set path [testencoding path]
- testencoding path {}
- catch {unset encodings}
- catch {unset x}
- foreach encoding [encoding names] {
- set encodings($encoding) 1
- }
- testencoding path [list [pwd]]
- foreach encoding [encoding names] {
- if {![info exists encodings($encoding)]} {
- lappend x $encoding
- }
- }
- testencoding path $path
- cd [workingDirectory]
- removeFile [file join tmp encoding junk2.enc]
- removeFile [file join tmp encoding junk.enc]
- removeDirectory [file join tmp encoding]
- removeDirectory tmp
- lsort $x
- } {junk junk2}
- test encoding-5.1 {Tcl_SetSystemEncoding} {
- set old [encoding system]
- encoding system jis0208
- set x [encoding convertto u4e4e]
- encoding system identity
- encoding system $old
- set x
- } {8C}
- test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} {
- set old [encoding system]
- encoding system $old
- string compare $old [encoding system]
- } {0}
- test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
- testencoding create foo {toutf 1} {fromutf 2}
- set x {}
- encoding convertfrom foo abcd
- encoding convertto foo abcd
- testencoding delete foo
- set x
- } {{toutf 1} {fromutf 2}}
- test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
- testencoding create foo {toutf a} {fromutf b}
- set x {}
- encoding convertfrom foo abcd
- encoding convertto foo abcd
- testencoding delete foo
- set x
- } {{toutf a} {fromutf b}}
- test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
- encoding convertfrom jis0208 8c8c8c8c
- } "u543eu543eu543eu543e"
- test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
- set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C
- append a $a
- append a $a
- append a $a
- append a $a
- set x [encoding convertfrom jis0208 $a]
- list [string length $x] [string index $x 0]
- } "512 u4e4e"
- test encoding-8.1 {Tcl_ExternalToUtf} {
- set f [open [file join [temporaryDirectory] dummy] w]
- fconfigure $f -translation binary -encoding iso8859-1
- puts -nonewline $f "abx8cxc1g"
- close $f
- set f [open [file join [temporaryDirectory] dummy] r]
- fconfigure $f -translation binary -encoding shiftjis
- set x [read $f]
- close $f
- file delete [file join [temporaryDirectory] dummy]
- set x
- } "abu4e4eg"
- test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
- encoding convertto jis0208 "u543eu543eu543eu543e"
- } {8c8c8c8c}
- test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
- set a u4e4eu4e4eu4e4eu4e4eu4e4eu4e4eu4e4eu4e4e
- append a $a
- append a $a
- append a $a
- append a $a
- append a $a
- append a $a
- set x [encoding convertto jis0208 $a]
- list [string length $x] [string range $x 0 1]
- } "1024 8C"
- test encoding-10.1 {Tcl_UtfToExternal} {
- set f [open [file join [temporaryDirectory] dummy] w]
- fconfigure $f -translation binary -encoding shiftjis
- puts -nonewline $f "abu4e4eg"
- close $f
- set f [open [file join [temporaryDirectory] dummy] r]
- fconfigure $f -translation binary -encoding iso8859-1
- set x [read $f]
- close $f
- file delete [file join [temporaryDirectory] dummy]
- set x
- } "abx8cxc1g"
- proc viewable {str} {
- set res ""
- foreach c [split $str {}] {
- if {[string is print $c] && [string is ascii $c]} {
- append res $c
- } else {
- append res "\u[format %4.4x [scan $c %c]]"
- }
- }
- return "$str ($res)"
- }
- test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
- set system [encoding system]
- set path [testencoding path]
- encoding system iso8859-1
- testencoding path {}
- set x [list [catch {encoding convertto jis0208 u4e4e} msg] $msg]
- testencoding path $path
- encoding system $system
- lappend x [encoding convertto jis0208 u4e4e]
- } {1 {unknown encoding "jis0208"} 8C}
- test encoding-11.2 {LoadEncodingFile: single-byte} {
- encoding convertfrom jis0201 xa1
- } "uff61"
- test encoding-11.3 {LoadEncodingFile: double-byte} {
- encoding convertfrom jis0208 8C
- } "u4e4e"
- test encoding-11.4 {LoadEncodingFile: multi-byte} {
- encoding convertfrom shiftjis x8cxc1
- } "u4e4e"
- test encoding-11.5 {LoadEncodingFile: escape file} {
- viewable [encoding convertto iso2022 u4e4e]
- } [viewable "x1b$B8Cx1b(B"]
- test encoding-11.5.1 {LoadEncodingFile: escape file} {
- viewable [encoding convertto iso2022-jp u4e4e]
- } [viewable "x1b$B8Cx1b(B"]
- test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} {
- set system [encoding system]
- set path [testencoding path]
- encoding system identity
- cd [temporaryDirectory]
- testencoding path tmp
- makeDirectory tmp
- makeDirectory [file join tmp encoding]
- set f [open [file join tmp encoding splat.enc] w]
- fconfigure $f -translation binary
- puts $f "abcdefghijklmnop"
- close $f
- set x [list [catch {encoding convertto splat u4e4e} msg] $msg]
- file delete [file join [temporaryDirectory] tmp encoding splat.enc]
- removeDirectory [file join tmp encoding]
- removeDirectory tmp
- cd [workingDirectory]
- testencoding path $path
- encoding system $system
- set x
- } {1 {invalid encoding file "splat"}}
- # OpenEncodingFile is fully tested by the rest of the tests in this file.
- test encoding-12.1 {LoadTableEncoding: normal encoding} {
- set x [encoding convertto iso8859-3 u120]
- append x [encoding convertto iso8859-3 ud5]
- append x [encoding convertfrom iso8859-3 xd5]
- } "xd5?u120"
- test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
- set x [encoding convertto iso8859-3 abu0120g]
- append x [encoding convertfrom iso8859-3 abxd5g]
- } "abxd5gabu120g"
- test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
- set x [encoding convertto shiftjis abu4e4eg]
- append x [encoding convertfrom shiftjis abx8cxc1g]
- } "abx8cxc1gabu4e4eg"
- test encoding-12.4 {LoadTableEncoding: double-byte encoding} {
- set x [encoding convertto jis0208 u4e4eu3b1]
- append x [encoding convertfrom jis0208 8C&A]
- } "8C&Au4e4eu3b1"
- test encoding-12.5 {LoadTableEncoding: symbol encoding} {
- set x [encoding convertto symbol u3b3]
- append x [encoding convertto symbol u67]
- append x [encoding convertfrom symbol x67]
- } "x67x67u3b3"
- test encoding-13.1 {LoadEscapeTable} {
- viewable [set x [encoding convertto iso2022 abu4e4eu68d9g]]
- } [viewable "abx1b$B8Cx1b$(DD%x1b(Bg"]
- test encoding-14.1 {BinaryProc} {
- encoding convertto identity x12x34x56xffx69
- } "x12x34x56xc3xbfx69"
- test encoding-15.1 {UtfToUtfProc} {
- encoding convertto utf-8 xa3
- } "xc2xa3"
- test encoding-15.2 {UtfToUtfProc null character output} {
- set x u0000
- set y [encoding convertto utf-8 u0000]
- set y [encoding convertfrom identity $y]
- binary scan $y H* z
- list [string bytelength $x] [string bytelength $y] $z
- } {2 1 00}
- test encoding-15.3 {UtfToUtfProc null character input} {
- set x [encoding convertfrom identity x00]
- set y [encoding convertfrom utf-8 $x]
- binary scan [encoding convertto identity $y] H* z
- list [string bytelength $x] [string bytelength $y] $z
- } {1 2 c080}
- test encoding-16.1 {UnicodeToUtfProc} {
- set val [encoding convertfrom unicode NN]
- list $val [format %x [scan $val %c]]
- } "u4e4e 4e4e"
- test encoding-17.1 {UtfToUnicodeProc} {
- } {}
- test encoding-18.1 {TableToUtfProc} {
- } {}
- test encoding-19.1 {TableFromUtfProc} {
- } {}
- test encoding-20.1 {TableFreefProc} {
- } {}
- test encoding-21.1 {EscapeToUtfProc} {
- } {}
- test encoding-22.1 {EscapeFromUtfProc} {
- } {}
- set ::iso2022encData "u001b$B;d$I$b$G$O!"%A%C%W$49XF~;~$K$4EPO?$$$?$@$$$?$4=;=j$r%-%c%C%7%e%"%&%H$N:]$Nu001b(B
- u001b$B>.@Z<jAwIU@h$H$7$F;HMQ$7$F$*$j$^$9!#62$lF~$j$^$9$,!"@5$7$$=;=j$r$4EPO?$7$J$*u001b(B
- u001b$B$*4j$$$$$?$7$^$9!#$^$?!"BgJQ62=L$G$9$,!"=;=jJQ99$N$"$H!"F|K\8l%5!<%S%9It!Ju001b(B
- casino_japanese@___.com u001b$B!K$^$G$4=;=jJQ99:Q$NO"Mm$r$$$?$@$1$J$$$Gu001b(B
- u001b$B$7$g$&$+!)u001b(B"
- set ::iso2022uniData [encoding convertfrom iso2022-jp $::iso2022encData]
- set ::iso2022uniData2 "u79c1u3069u3082u3067u306fu3001u30c1u30c3u30d7u3054u8cfcu5165u6642u306bu3054u767bu9332u3044u305fu3060u3044u305fu3054u4f4fu6240u3092u30adu30e3u30c3u30b7u30e5u30a2u30a6u30c8u306eu969bu306e
- u5c0fu5207u624bu9001u4ed8u5148u3068u3057u3066u4f7fu7528u3057u3066u304au308au307eu3059u3002u6050u308cu5165u308au307eu3059u304cu3001u6b63u3057u3044u4f4fu6240u3092u3054u767bu9332u3057u306au304a
- u304au9858u3044u3044u305fu3057u307eu3059u3002u307eu305fu3001u5927u5909u6050u7e2eu3067u3059u304cu3001u4f4fu6240u5909u66f4u306eu3042u3068u3001u65e5u672cu8a9eu30b5u30fcu30d3u30b9u90e8uff08
- u0063u0061u0073u0069u006eu006fu005fu006au0061u0070u0061u006eu0065u0073u0065u0040u005fu005fu005fu002eu0063u006fu006du0020uff09u307eu3067u3054u4f4fu6240u5909u66f4u6e08u306eu9023u7d61u3092u3044u305fu3060u3051u306au3044u3067
- u3057u3087u3046u304buff1f"
- cd [temporaryDirectory]
- set fid [open iso2022.txt w]
- fconfigure $fid -encoding binary
- puts -nonewline $fid $::iso2022encData
- close $fid
- test encoding-23.1 {iso2022-jp escape encoding test} {
- string equal $::iso2022uniData $::iso2022uniData2
- } 1
- test encoding-23.2 {iso2022-jp escape encoding test} {
- # This checks that 'gets' isn't resetting the encoding inappropriately.
- # [Bug #523988]
- set fid [open iso2022.txt r]
- fconfigure $fid -encoding iso2022-jp
- set out ""
- set count 0
- while {[set num [gets $fid line]] >= 0} {
- if {$count} {
- incr count 1 ; # account for newline
- append out n
- }
- append out $line
- incr count $num
- }
- close $fid
- if {[string compare $::iso2022uniData $out]} {
- return -code error "iso2022-jp read in doesn't match original"
- }
- list $count $out
- } [list [string length $::iso2022uniData] $::iso2022uniData]
- test encoding-23.3 {iso2022-jp escape encoding test} {
- # read $fis <size> reads size in chars, not raw bytes.
- set fid [open iso2022.txt r]
- fconfigure $fid -encoding iso2022-jp
- set data [read $fid 50]
- close $fid
- set data
- } [string range $::iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
- cd [workingDirectory]
- test encoding-24.1 {EscapeFreeProc on open channels} -constraints {
- exec
- } -setup {
- # Bug #524674 input
- set file [makeFile {
- set f [open [file join [file dirname [info script]] iso2022.txt]]
- fconfigure $f -encoding iso2022-jp
- gets $f
- } iso2022.tcl]
- } -body {
- exec [interpreter] $file
- } -cleanup {
- removeFile iso2022.tcl
- } -result {}
- test encoding-24.2 {EscapeFreeProc on open channels} -constraints {
- exec
- } -setup {
- # Bug #524674 output
- set file [makeFile {
- fconfigure stdout -encoding iso2022-jp
- puts abu4e4eu68d9g
- exit
- } iso2022.tcl]
- } -body {
- viewable [exec [interpreter] $file]
- } -cleanup {
- removeFile iso2022.tcl
- } -result "abx1b$B8Cx1b$(DD%x1b(Bg (ab\u001b$B8C\u001b$(DD%\u001b(Bg)"
- test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
- # Bug #219314 - if we don't free escape encodings correctly on
- # channel closure, we go boom
- set file [makeFile {
- encoding system iso2022-jp
- set a "u4e4eu4e5eu4e5f"; # 3 Japanese Kanji letters
- puts $a
- } iso2022.tcl]
- set f [open "|[list [interpreter] $file]"]
- fconfigure $f -encoding iso2022-jp
- set count [gets $f line]
- close $f
- removeFile iso2022.tcl
- list $count [viewable $line]
- } [list 3 "u4e4eu4e5eu4e5f (\u4e4e\u4e5e\u4e5f)"]
- file delete [file join [temporaryDirectory] iso2022.txt]
- #
- # Begin jajp encoding round-trip conformity tests
- #
- proc foreach-jisx0208 {varName command} {
- upvar 1 $varName code
- foreach range {
- {2121 217E}
- {2221 222E}
- {223A 2241}
- {224A 2250}
- {225C 226A}
- {2272 2279}
- {227E 227E}
- {2330 2339}
- {2421 2473}
- {2521 2576}
- {2821 2821}
- {282C 282C}
- {2837 2837}
- {30 21 4E 7E}
- {4F21 4F53}
- {50 21 73 7E}
- {7421 7426}
- } {
- if {[llength $range] == 2} {
- # for adhoc range. simple {first last}. inclusive.
- set first [scan [lindex $range 0] %x]
- set last [scan [lindex $range 1] %x]
- for {set i $first} {$i <= $last} {incr i} {
- set code $i
- uplevel 1 $command
- }
- } elseif {[llength $range] == 4} {
- # for uniform range.
- set h0 [scan [lindex $range 0] %x]
- set l0 [scan [lindex $range 1] %x]
- set hend [scan [lindex $range 2] %x]
- set lend [scan [lindex $range 3] %x]
- for {set hi $h0} {$hi <= $hend} {incr hi} {
- for {set lo $l0} {$lo <= $lend} {incr lo} {
- set code [expr {$hi << 8 | ($lo & 0xff)}]
- uplevel 1 $command
- }
- }
- } else {
- error "really?"
- }
- }
- }
- proc gen-jisx0208-euc-jp {code} {
- binary format cc
- [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}]
- }
- proc gen-jisx0208-iso2022-jp {code} {
- binary format a3cca3
- "x1b$B" [expr {$code >> 8}] [expr {$code & 0xff}] "x1b(B"
- }
- proc gen-jisx0208-cp932 {code} {
- set c1 [expr {($code >> 8) | 0x80}]
- set c2 [expr {($code & 0xff)| 0x80}]
- if {$c1 % 2} {
- set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}]
- incr c2 [expr {- (0x60 + ($c2 < 0xe0))}]
- } else {
- set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}]
- incr c2 -2
- }
- binary format cc $c1 $c2
- }
- proc channel-diff {fa fb} {
- set diff {}
- while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} {
- if {[string compare $la $lb] == 0} continue
- # lappend diff $la $lb
- # For more readable (easy to analyze) output.
- set code [lindex $la 0]
- binary scan [lindex $la 1] H* expected
- binary scan [lindex $lb 1] H* got
- lappend diff [list $code $expected $got]
- }
- set diff
- }
- # Create char tables.
- cd [temporaryDirectory]
- foreach enc {cp932 euc-jp iso2022-jp} {
- set f [open $enc.chars w]
- fconfigure $f -encoding binary
- foreach-jisx0208 code {
- puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]]
- }
- close $f
- }
- # shiftjis == cp932 for jisx0208.
- file copy -force cp932.chars shiftjis.chars
- set NUM 0
- foreach from {cp932 shiftjis euc-jp iso2022-jp} {
- foreach to {cp932 shiftjis euc-jp iso2022-jp} {
- test encoding-25.[incr NUM] "jisx0208 $from => $to" {
- cd [temporaryDirectory]
- set f [open $from.chars]
- fconfigure $f -encoding $from
- set out [open $from.$to.out w]
- fconfigure $out -encoding $to
- puts -nonewline $out [read $f]
- close $out
- close $f
-
- # then compare $to.chars <=> $from.to.out as binary.
- set fa [open $to.chars]
- fconfigure $fa -encoding binary
- set fb [open $from.$to.out]
- fconfigure $fb -encoding binary
- set diff [channel-diff $fa $fb]
- close $fa
- close $fb
-
- # Difference should be empty.
- set diff
- } {}
- }
- }
- eval [list file delete] [glob -directory [temporaryDirectory] *.chars *.out]
- # ===> Cut here <===
- # EscapeFreeProc, GetTableEncoding, unilen
- # are fully tested by the rest of this file
- # cleanup
- ::tcltest::cleanupTests
- return