| Index: source/data/brkitr/word.txt |
| =================================================================== |
| --- source/data/brkitr/word.txt (revision 264859) |
| +++ source/data/brkitr/word.txt (working copy) |
| @@ -56,15 +56,13 @@ |
| # 5.0 or later as the definition of Complex_Context was corrected to include all |
| # characters requiring dictionary break. |
| |
| -$Control = [\p{Grapheme_Cluster_Break = Control}]; |
| +$Control = [\p{Grapheme_Cluster_Break = Control}]; |
| $HangulSyllable = [\uac00-\ud7a3]; |
| $ComplexContext = [:LineBreak = Complex_Context:]; |
| $KanaKanji = [$Han $Hiragana $Katakana]; |
| -$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
| -$dictionary = [$ComplexContext $dictionaryCJK]; |
| +$dictionary = [$ComplexContext]; |
| |
| -# leave CJK scripts out of ALetterPlus |
| -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
| +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; |
| |
| |
| # |
| @@ -166,11 +164,6 @@ |
| |
| $Regional_IndicatorEx $Regional_IndicatorEx; |
| |
| -# special handling for CJK characters: chain for later dictionary segmentation |
| -$HangulSyllable $HangulSyllable {200}; |
| -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found |
| - |
| - |
| ## ------------------------------------------------- |
| |
| !!reverse; |
| @@ -237,10 +230,6 @@ |
| |
| $BackRegional_IndicatorEx $BackRegional_IndicatorEx; |
| |
| -# special handling for CJK characters: chain for later dictionary segmentation |
| -$HangulSyllable $HangulSyllable; |
| -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
| - |
| ## ------------------------------------------------- |
| |
| !!safe_reverse; |
| Index: source/data/brkitr/brklocal.mk |
| =================================================================== |
| --- source/data/brkitr/brklocal.mk (revision 264859) |
| +++ source/data/brkitr/brklocal.mk (working copy) |
| @@ -34,13 +34,13 @@ |
| |
| |
| # List of dictionary files (dict). |
| -BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt |
| +BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt |
| |
| |
| # List of break iterator files (brk). |
| -BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt |
| +BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt word_ja.txt |
| |
| |
| # Ordinary resources |
| -BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt |
| +BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt |
| |
| Index: source/data/brkitr/root.txt |
| =================================================================== |
| --- source/data/brkitr/root.txt (revision 264859) |
| +++ source/data/brkitr/root.txt (working copy) |
| @@ -16,9 +16,6 @@ |
| word:process(dependency){"word.brk"} |
| } |
| dictionaries{ |
| - Hani:process(dependency){"cjdict.dict"} |
| - Hira:process(dependency){"cjdict.dict"} |
| - Kata:process(dependency){"cjdict.dict"} |
| Khmr:process(dependency){"khmerdict.dict"} |
| Laoo:process(dependency){"laodict.dict"} |
| Thai:process(dependency){"thaidict.dict"} |
| Index: source/data/brkitr/ja.txt |
| =================================================================== |
| --- source/data/brkitr/ja.txt (revision 264859) |
| +++ source/data/brkitr/ja.txt (working copy) |
| @@ -9,6 +9,6 @@ |
| ja{ |
| Version{"1.1"} |
| boundaries{ |
| - line:process(dependency){"line_ja.brk"} |
| + word:process(dependency){"word_ja.brk"} |
| } |
| } |