blob: 96eab91370fce72f922ab4e35bb4fb68fcdae344 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="ThaiLogical" target="Latin" direction="both" visibility="internal">
<tRule><![CDATA[
# Thai-Latin
# This set of rules follows ISO 11940
# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
# except that that does not mention an implicit vowel, so we use ọ
#
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
# see: http://www.eki.ee/wgrs/rom1_th.pdf
# and probably make that the main variant.
#
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
# The insertion of spaces between words, the reversal of the vowels
# and the conversion of space to semicolon are done *outside* of these rules.
# So as far as these rules are concerned, the vowels are in logical order!
# insert implicit vowel (and remove it going the other way)
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
#$consonant = [ก-ฮ];
#$vowel = [ะ-ฺเ-ไ็];
#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
#\uE000 → ọ ;
# ← ọ ;
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
# Consonants
# Warning: the 'h's need to be handled carefully!
# What we really want to say is the following, but we can't
# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
$freeStandingBelow = [̥ ];
$hAccent = [ ̄ ̣];
$notHAccent0 = [^$freeStandingBelow$hAccent];
$notHAccent1 = $freeStandingBelow [^$hAccent];
ห → h̄ ; # THAI CHARACTER HO HIP
ห | $1 ← h ($notAbove*) ̄; # backward case, account for reordering
ฮ ↔ ḥ ; # THAI CHARACTER HO NOKHUK
ข ↔ k̄h ; # THAI CHARACTER KHO KHAI
ฃ ↔ ḳ̄h ; # THAI CHARACTER KHO KHUAT
ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
ฆ ↔ ḳh ; # THAI CHARACTER KHO RAKHANG
ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
ก ↔ k ; # THAI CHARACTER KO KAI
ภ ↔ p̣h ; # THAI CHARACTER PHO SAMPHAO
ผ ↔ p̄h ; # THAI CHARACTER PHO PHUNG
พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
ป ↔ p ; # THAI CHARACTER PO PLA
ฉ ↔ c̄h ; # THAI CHARACTER CHO CHING
ฌ ↔ c̣h ; # THAI CHARACTER CHO CHOE
ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
จ ↔ c ; # THAI CHARACTER CHO CHAN
ฐ ↔ ṭ̄h ; # THAI CHARACTER THO THAN
ฑ ↔ ṯh ; # THAI CHARACTER THO NANGMONTHO
ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
ถ ↔ t̄h ; # THAI CHARACTER THO THUNG
ธ ↔ ṭh ; # THAI CHARACTER THO THONG
ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
ฏ ↔ t̩ ; # THAI CHARACTER TO PATAK
ต ↔ t ; # THAI CHARACTER TO TAO
# since there is no singleton g (generated), don't worry about that.
ง ↔ ng ; # THAI CHARACTER NGO NGU
ณ ↔ ṇ ; # THAI CHARACTER NO NEN
น ↔ n ; # THAI CHARACTER NO NU
ญ ↔ ỵ ; # THAI CHARACTER YO YING
ฎ ↔ ḍ ; # THAI CHARACTER DO CHADA
ด ↔ d ; # THAI CHARACTER DO DEK
บ ↔ b ; # THAI CHARACTER BO BAIMAI
ฝ ↔ f̄ ; # THAI CHARACTER FO FA
ฝ | $1 ← f ($notAbove*) ̄; # backward case, account for reordering
ม ↔ m ; # THAI CHARACTER MO MA
ย ↔ y ; # THAI CHARACTER YO YAK
ร ↔ r ; # THAI CHARACTER RO RUA
ฤ ↔ v ; # THAI CHARACTER RU
ฦ ↔ ł ; # THAI CHARACTER LU
ว ↔ w ; # THAI CHARACTER WO WAEN
ศ ↔ ṣ̄ ; # THAI CHARACTER SO SALA***
ศ | $1 ← s ̣ ($notAbove*) ̄; # backward case, account for reordering
ษ ↔ s̄ʹ ; # THAI CHARACTER SO RUSI
ส → s̄ ; # THAI CHARACTER SO SUA***
ส | $1 ← s ($notAbove*) ̄; # backward case, account for reordering
ฬ ↔ ḷ ; # THAI CHARACTER LO CHULA
ล ↔ l ; # THAI CHARACTER LO LING
ฟ ↔ f ; # THAI CHARACTER FO FAN
อ ↔ x ; # THAI CHARACTER O ANG
ซ ↔ s ; # THAI CHARACTER SO SO
# vowels
ั ↔ ạ ; # THAI CHARACTER MAI HAN-AKAT
า → ā ; # THAI CHARACTER SARA AA
า | $1 ← a ($notAbove*) ̄; # backward case, account for reordering
# We deviate from ISO for SARA AM for disambiguation
ำ → a ̉; # THAI CHARACTER SARA AM
ำ | $1 ← a ($notAbove*) ̉ ; # backward case, account for reordering
ะ ↔ a ; # THAI CHARACTER SARA A
ี ↔ ī ; # THAI CHARACTER SARA II
ี | $1 ← i ($notAbove*) ̄ ; # backward case, account for reordering
ื ↔ ụ̄ ; # THAI CHARACTER SARA UEE
ื | $1 ← u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
ึ ↔ ụ ; # THAI CHARACTER SARA UE
ู ↔ ū ; # THAI CHARACTER SARA UU
ู | $1 ← u ($notAbove*) ̄ ; # backward case, account for reordering
ุ ↔ u ; # THAI CHARACTER SARA U
ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
เ ↔ e ; # THAI CHARACTER SARA E
แ ↔ æ ; # THAI CHARACTER SARA AE
โ ↔ o ; # THAI CHARACTER SARA O
ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
ไ ↔ ị ; # THAI CHARACTER SARA AI MAIMALAI
ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
็ ↔ ̆ ; # THAI CHARACTER MAITAIKHU
่ ↔ ̀ ; # THAI CHARACTER MAI EK
้ ↔ ̂ ; # THAI CHARACTER MAI THO
๊ ↔ ́ ; # THAI CHARACTER MAI TRI
๋ ↔ ̌ ; # THAI CHARACTER MAI CHATTAWA
์ ↔ ̒ ; # THAI CHARACTER THANTHAKHAT
๎ ↔ '~' ; # THAI CHARACTER YAMAKKAN
# We deviate from ISO for disambiguation
ํ ↔ ̊ ; # THAI CHARACTER NIKHAHIT
๏ ↔ '§' ; # THAI CHARACTER FONGMAN
๐ ↔ 0 ; # THAI DIGIT ZERO
๑ ↔ 1 ; # THAI DIGIT ONE
๒ ↔ 2 ; # THAI DIGIT TWO
๓ ↔ 3 ; # THAI DIGIT THREE
๔ ↔ 4 ; # THAI DIGIT FOUR
๕ ↔ 5 ; # THAI DIGIT FIVE
๖ ↔ 6 ; # THAI DIGIT SIX
๗ ↔ 7 ; # THAI DIGIT SEVEN
๘ ↔ 8 ; # THAI DIGIT EIGHT
๙ ↔ 9 ; # THAI DIGIT NINE
๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
๛ ↔ » ; # THAI CHARACTER KHOMUT
ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
# moved down to make shorter first
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
ฺ ↔ ˌ ; # THAI CHARACTER PHINTHU
ิ ↔ i ; # THAI CHARACTER SARA I
# fallbacks
| k ← g ;
| k ← h ;
| c ← j ;
| k ← q ;
| s ← z ;
:: (lower);
]]></tRule>
</transform>
</transforms>
</supplementalData>