| #!../perl |
| |
| BEGIN { |
| if ($ENV{'PERL_CORE'}){ |
| chdir 't'; |
| unshift @INC, '../lib'; |
| } |
| require Config; import Config; |
| if ($Config{'extensions'} !~ /\bEncode\b/) { |
| print "1..0 # Skip: Encode was not built\n"; |
| exit 0; |
| } |
| } |
| |
| use strict; |
| use Encode; |
| use Encode::Alias; |
| my %a2c; |
| my @override_tests; |
| my $ON_EBCDIC; |
| |
| sub init_a2c{ |
| %a2c = ( |
| 'US-ascii' => 'ascii', |
| 'ISO-646-US' => 'ascii', |
| 'UTF-8' => 'utf-8-strict', |
| 'en_US.UTF-8' => 'utf-8-strict', |
| 'UCS-2' => 'UCS-2BE', |
| 'UCS2' => 'UCS-2BE', |
| 'iso-10646-1' => 'UCS-2BE', |
| 'ucs2-le' => 'UCS-2LE', |
| 'ucs2-be' => 'UCS-2BE', |
| 'utf16' => 'UTF-16', |
| 'utf32' => 'UTF-32', |
| 'utf16-be' => 'UTF-16BE', |
| 'utf32-be' => 'UTF-32BE', |
| 'utf16-le' => 'UTF-16LE', |
| 'utf32-le' => 'UTF-32LE', |
| 'UCS4-BE' => 'UTF-32BE', |
| 'UCS-4-LE' => 'UTF-32LE', |
| 'cyrillic' => 'iso-8859-5', |
| 'arabic' => 'iso-8859-6', |
| 'greek' => 'iso-8859-7', |
| 'hebrew' => 'iso-8859-8', |
| 'thai' => 'iso-8859-11', |
| 'tis620' => 'iso-8859-11', |
| 'tis-620' => 'iso-8859-11', |
| 'WinLatin1' => 'cp1252', |
| 'WinLatin2' => 'cp1250', |
| 'WinCyrillic' => 'cp1251', |
| 'WinGreek' => 'cp1253', |
| 'WinTurkish' => 'cp1254', |
| 'WinHebrew' => 'cp1255', |
| 'WinArabic' => 'cp1256', |
| 'WinBaltic' => 'cp1257', |
| 'WinVietnamese' => 'cp1258', |
| 'Macintosh' => 'MacRoman', |
| 'koi8r' => 'koi8-r', |
| 'koi8u' => 'koi8-u', |
| 'ja_JP.euc' => $ON_EBCDIC ? '' : 'euc-jp', |
| 'x-euc-jp' => $ON_EBCDIC ? '' : 'euc-jp', |
| 'zh_CN.euc' => $ON_EBCDIC ? '' : 'euc-cn', |
| 'x-euc-cn' => $ON_EBCDIC ? '' : 'euc-cn', |
| 'ko_KR.euc' => $ON_EBCDIC ? '' : 'euc-kr', |
| 'x-euc-kr' => $ON_EBCDIC ? '' : 'euc-kr', |
| 'ujis' => $ON_EBCDIC ? '' : 'euc-jp', |
| 'Shift_JIS' => $ON_EBCDIC ? '' : 'shiftjis', |
| 'x-sjis' => $ON_EBCDIC ? '' : 'shiftjis', |
| 'jis' => $ON_EBCDIC ? '' : '7bit-jis', |
| 'big-5' => $ON_EBCDIC ? '' : 'big5-eten', |
| 'zh_TW.Big5' => $ON_EBCDIC ? '' : 'big5-eten', |
| 'tca-big5' => $ON_EBCDIC ? '' : 'big5-eten', |
| 'big5-hk' => $ON_EBCDIC ? '' : 'big5-hkscs', |
| 'hkscs-big5' => $ON_EBCDIC ? '' : 'big5-hkscs', |
| 'GB_2312-80' => $ON_EBCDIC ? '' : 'euc-cn', |
| 'KS_C_5601-1987' => $ON_EBCDIC ? '' : 'cp949', |
| # |
| 'gb12345-raw' => $ON_EBCDIC ? '' : 'gb12345-raw', |
| 'gb2312-raw' => $ON_EBCDIC ? '' : 'gb2312-raw', |
| 'jis0201-raw' => $ON_EBCDIC ? '' : 'jis0201-raw', |
| 'jis0208-raw' => $ON_EBCDIC ? '' : 'jis0208-raw', |
| 'jis0212-raw' => $ON_EBCDIC ? '' : 'jis0212-raw', |
| 'ksc5601-raw' => $ON_EBCDIC ? '' : 'ksc5601-raw', |
| ); |
| |
| for my $i (1..11,13..16){ |
| $a2c{"ISO 8859 $i"} = "iso-8859-$i"; |
| } |
| for my $i (1..10){ |
| $a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]"; |
| } |
| for my $k (keys %Encode::Alias::Winlatin2cp){ |
| my $v = $Encode::Alias::Winlatin2cp{$k}; |
| $a2c{"Win" . ucfirst($k)} = "cp" . $v; |
| $a2c{"IBM-$v"} = $a2c{"MS-$v"} = "cp" . $v; |
| $a2c{"cp-" . $v} = "cp" . $v; |
| } |
| my @a2c = keys %a2c; |
| for my $k (@a2c){ |
| $a2c{uc($k)} = $a2c{$k}; |
| $a2c{lc($k)} = $a2c{$k}; |
| $a2c{lcfirst($k)} = $a2c{$k}; |
| $a2c{ucfirst($k)} = $a2c{$k}; |
| } |
| } |
| |
| BEGIN{ |
| $ON_EBCDIC = ord("A") == 193; |
| @ARGV and $ON_EBCDIC = $ARGV[0] eq 'EBCDIC'; |
| $Encode::ON_EBCDIC = $ON_EBCDIC; |
| init_a2c(); |
| @override_tests = qw( |
| myascii:cp1252 |
| mygreek:cp1253 |
| myhebrew:iso-8859-2 |
| myarabic:cp1256 |
| ueightsomething:utf-8-strict |
| unknown: |
| ); |
| } |
| |
| if ($ON_EBCDIC){ |
| delete @Encode::ExtModule{ |
| qw(euc-cn gb2312 gb12345 gbk cp936 iso-ir-165 MacChineseSimp |
| euc-jp iso-2022-jp 7bit-jis shiftjis MacJapanese cp932 |
| euc-kr ksc5601 cp949 MacKorean |
| big5 big5-hkscs cp950 MacChineseTrad |
| gb18030 big5plus euc-tw) |
| }; |
| } |
| |
| use Test::More tests => (scalar keys %a2c) * 3 + @override_tests; |
| |
| print "# alias test; \$ON_EBCDIC == $ON_EBCDIC\n"; |
| |
| foreach my $a (keys %a2c){ |
| print "# $a => $a2c{$a}\n"; |
| my $e = Encode::find_encoding($a); |
| is((defined($e) and $e->name), $a2c{$a},$a) |
| or warn "alias was $a";; |
| } |
| |
| # now we override some of the aliases and see if it works fine |
| |
| define_alias( |
| qr/ascii/i => '"WinLatin1"', |
| qr/cyrillic/i => '"WinCyrillic"', |
| qr/arabic/i => '"WinArabic"', |
| qr/greek/i => '"WinGreek"', |
| qr/hebrew/i => '"WinHebrew"' |
| ); |
| |
| Encode::find_encoding("myhebrew"); # polute alias cache |
| |
| define_alias( sub { |
| my $enc = shift; |
| return "iso-8859-2" if $enc =~ /hebrew/i; |
| return "does-not-exist" if $enc =~ /arabic/i; # should then use other override alias |
| return "utf-8" if $enc =~ /eight/i; |
| return; |
| }); |
| |
| print "# alias test with alias overrides\n"; |
| |
| for my $test (@override_tests) { |
| my($a, $c) = split /:/, $test; |
| my $e = Encode::find_encoding($a); |
| is((defined($e) and $e->name), $c, $a); |
| } |
| |
| print "# alias undef test\n"; |
| |
| Encode::Alias->undef_aliases; |
| foreach my $a (keys %a2c){ |
| my $e = Encode::find_encoding($a); |
| ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a") |
| or warn "alias was $a"; |
| } |
| |
| print "# alias reinit test\n"; |
| |
| Encode::Alias->init_aliases; |
| init_a2c(); |
| foreach my $a (keys %a2c){ |
| my $e = Encode::find_encoding($a); |
| is((defined($e) and $e->name), $a2c{$a}, "Reinit $a") |
| or warn "alias was $a"; |
| } |
| __END__ |
| for my $k (keys %a2c){ |
| $k =~ /[A-Z]/ and next; |
| print "$k => $a2c{$k}\n"; |
| } |
| |
| |
| |