blob: 0f4017e4748a0c0c28d0b59b7ca70abb80b5cbf4 [file] [log] [blame]
Index: source/data/brkitr/word.txt
===================================================================
--- source/data/brkitr/word.txt (revision 264859)
+++ source/data/brkitr/word.txt (working copy)
@@ -56,15 +56,13 @@
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
-$Control = [\p{Grapheme_Cluster_Break = Control}];
+$Control = [\p{Grapheme_Cluster_Break = Control}];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana];
-$dictionaryCJK = [$KanaKanji $HangulSyllable];
-$dictionary = [$ComplexContext $dictionaryCJK];
+$dictionary = [$ComplexContext];
-# leave CJK scripts out of ALetterPlus
-$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
+$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]];
#
@@ -166,11 +164,6 @@
$Regional_IndicatorEx $Regional_IndicatorEx;
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
-
-
## -------------------------------------------------
!!reverse;
@@ -237,10 +230,6 @@
$BackRegional_IndicatorEx $BackRegional_IndicatorEx;
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable;
-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
-
## -------------------------------------------------
!!safe_reverse;
Index: source/data/brkitr/brklocal.mk
===================================================================
--- source/data/brkitr/brklocal.mk (revision 264859)
+++ source/data/brkitr/brklocal.mk (working copy)
@@ -34,13 +34,13 @@
# List of dictionary files (dict).
-BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt
+BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt
# List of break iterator files (brk).
-BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt
+BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt word_ja.txt
# Ordinary resources
-BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt
+BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt
Index: source/data/brkitr/root.txt
===================================================================
--- source/data/brkitr/root.txt (revision 264859)
+++ source/data/brkitr/root.txt (working copy)
@@ -16,9 +16,6 @@
word:process(dependency){"word.brk"}
}
dictionaries{
- Hani:process(dependency){"cjdict.dict"}
- Hira:process(dependency){"cjdict.dict"}
- Kata:process(dependency){"cjdict.dict"}
Khmr:process(dependency){"khmerdict.dict"}
Laoo:process(dependency){"laodict.dict"}
Thai:process(dependency){"thaidict.dict"}
Index: source/data/brkitr/ja.txt
===================================================================
--- source/data/brkitr/ja.txt (revision 264859)
+++ source/data/brkitr/ja.txt (working copy)
@@ -9,6 +9,6 @@
ja{
Version{"1.1"}
boundaries{
- line:process(dependency){"line_ja.brk"}
+ word:process(dependency){"word_ja.brk"}
}
}