utf8map.pl
资源名称:gateway-1.2.1 [点击查看]
上传用户:gzpyjq
上传日期:2013-01-31
资源大小:1852k
文件大小:2k
源码类别:
手机WAP编程
开发平台:
WINDOWS
- #!/usr/bin/perl -wn
- #
- # utf8map.pl - remap ascii to utf8
- #
- # Program was created to build conversion table from ascii into utf8.
- # Ascii table's first half does not require any changes (because utf8
- # [0-127] encoding is the same as in ascii). To control second half's
- # encoding we have to specify all unicode codes for characters in
- # [128-255] interval. Program takes unicode codes for 128 characters
- # on input (in hex format with leading 0x) and generates conversion table .
- # Every utf8 code is padded by '0' and occupies 4 bytes.
- # It is suitable for use in 'C' programs.
- #
- # For example,
- # in windows-1257 table character 169 '(c)' has code 0x00A9 in unicode.
- # Program will generate folowing string:
- #
- # 0xC2, 0xA9, 0x00, 0x00, /* 169 0x00a9 */
- # ______________________/ ___/ ______/
- # utf8 code (2 bytes ascii unicode
- # with padding)
- #
- # USAGE:
- # perl utf8map.pl asci_128-255_unicode_table.txt
- #
- # Andrejs Dubovskis
- #
- use strict ;
- use vars qw/$N/ ;
- BEGIN {
- # we going to prepare table for characters in 128-255 interval
- $N = 128 ;
- }
- # look for hex number (unicode)
- for my $hex (/0x[da-f]+/ig) {
- my $num = hex($hex) ;
- my @out = () ;
- if ($num > 0xffff) {
- die "too large number: $hex" ;
- } elsif ($num > 0x07ff) {
- # result is three bytes long
- @out = (
- (($num >> 12) & 0xf) | 0xe0,
- (($num >> 6) & 0x3f) | 0x80,
- ($num & 0x3f) | 0x80
- ) ;
- } elsif ($num > 0x7f) {
- # result is two bytes long
- @out = (
- (($num >> 6) & 0x1f) | 0xc0,
- ($num & 0x3f) | 0x80
- ) ;
- } else {
- # only zero is legal here
- die "wrong input data: $hex" if $num ;
- }
- # pad by '0'
- push(@out, 0) while @out < 4 ;
- # output utf8 code
- printf("0x%02X,t0x%02X,t0x%02X,t0x%02X,t", @out) ;
- # output comments
- print "/*t$Nt$hext*/n" ;
- # characters in [128-255] interval only
- exit if ++$N > 255 ;
- }