blob: 8628c681af0f64855dbfff7f37ea77f3e2f9a681 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="ar" target="ar_Latn" variant="BGN" direction="forward" draft="contributed" alias="Arabic-Latin/BGN ar-Latn-t-ar-m0-bgn">
<tRule>
#
########################################################################
# BGN/PCGN 1956 System
#
# This system was adopted by the BGN in 1946 and by the PCGN
# in 1956 and has been applied in the systematic romanization
# of geographic names in Bahrain, Egypt, Iraq, Jordan,
# Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan,
# Syria, Tunisia, the United Arab Emirates, and Yemen, all
# of which has been covered by published BGN engineers.
#
# Originally prepared by Michael Everson &lt;everson@evertype.com&gt;
########################################################################
#
# MINIMAL FILTER: Arabic-Latin
#
:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ;
:: NFKD (NFC) ;
#
#
########################################################################
#
########################################################################
#
# Define All Transformation Variables
#
########################################################################
#
$alef = ’;
$ayin = ‘;
$disambig = ̱ ;
#
#
# Use this $wordBoundary until bug 2034 is fixed in ICU:
# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
#
$wordBoundary = [^[:L:][:M:][:N:]] ;
#
#
########################################################################
# non-letters
[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
، ↔ ',' ; # ARABIC COMMA
؛ ↔ ';' ; # ARABIC SEMICOLON
؟ ↔ '?' ; # ARABIC QUESTION MARK
٪ ↔ '%' ; # ARABIC PERCENT SIGN
۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
#
########################################################################
#
# Rules moved to front to avoid masking
#
########################################################################
#
########################################################################
#
# BGN Page 8 Rule 5
#
# The character sequences ت , كه , ته , and سه may be romanized t·h, k·h,
# d·h, and s·h in order to differentiate those romanizations from the
# digraphs th, kh, dh, and sh.
#
########################################################################
#
ته → t·h ; # ARABIC LETTER TEH + HEH
كه → k·h ; # ARABIC LETTER KAF + HEH
ده → d·h ; # ARABIC LETTER DAL + HEH
سه → s·h ; # ARABIC LETTER SEEN + HEH
#
#
########################################################################
#
# End Rule 5
#
########################################################################
########################################################################
#
#
# BGN Page 8 Rule 9
#
# Doubles consonant sounds are represented in Arabic script by placing
# a shaddah ( ّ ) over a consonant character. In romanization the letter
# should be doubled. [The remainder of this rule deals with the definite
# article and is lexical.]
#
########################################################################
#
بّ → bb ; # ARABIC LETTER BEH + SHADDA
تّ → tt ; # ARABIC LETTER TEH + SHADDA
ثّ → thth ; # ARABIC LETTER THEH + SHADDA
جّ → jj ; # ARABIC LETTER JEEM + SHADDA
حّ → ḥḥ ; # ARABIC LETTER HAH + SHADDA
خّ → khkh ; # ARABIC LETTER KHAH + SHADDA
دّ → dd ; # ARABIC LETTER DAL + SHADDA
ذّ → dhdh ; # ARABIC LETTER THAL + SHADDA
رّ → rr ; # ARABIC LETTER REH + SHADDA
زّ → zz ; # ARABIC LETTER ZAIN + SHADDA
سّ → ss ; # ARABIC LETTER SEEN + SHADDA
شّ → shsh ; # ARABIC LETTER SHEEN + SHADDA
صّ → ṣṣ ; # ARABIC LETTER SAD + SHADDA
ضّ → ḍḍ ; # ARABIC LETTER DAD + SHADDA
طّ → ṭṭ ; # ARABIC LETTER TAH + SHADDA
ظّ → ẓẓ ; # ARABIC LETTER ZAH + SHADDA
عّ → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
غّ → ghgh ; # ARABIC LETTER GHAIN + SHADDA
فّ → ff ; # ARABIC LETTER FEH + SHADDA
قّ → qq ; # ARABIC LETTER QAF + SHADDA
كّ → kk ; # ARABIC LETTER KAF + SHADDA
لّ → ll ; # ARABIC LETTER LAM + SHADDA
مّ → mm ; # ARABIC LETTER MEEM + SHADDA
نّ → nn ; # ARABIC LETTER NOON + SHADDA
هّ → hh ; # ARABIC LETTER HEH + SHADDA
وّ → ww ; # ARABIC LETTER WAW + SHADDA
ىّ → yy ; # ARABIC LETTER YEH + SHADDA
#
#
########################################################################
#
# End Rule 9
#
########################################################################
#
########################################################################
#
# Start of Transformations
#
########################################################################
#
$wordBoundary{ء → ; # ARABIC LETTER HAMZA
ء → $alef ; # ARABIC LETTER HAMZA
$wordBoundary{ا → ; # ARABIC LETTER ALEF
ٱ → $alef ; # ARABIC LETTER ALEF WASLA
$wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE
آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE
ب → b ; # ARABIC LETTER BEH
ت → t ; # ARABIC LETTER TEH
ة → h ; # ARABIC LETTER TEH MARBUTA
ث → th ; # ARABIC LETTER THEH
ج → j ; # ARABIC LETTER JEEM
ح → ḩ ; # ARABIC LETTER HAH
خ → kh ; # ARABIC LETTER KHAH
د → d ; # ARABIC LETTER DAL
ذ → dh ; # ARABIC LETTER THAL
ر → r ; # ARABIC LETTER REH
ز → z ; # ARABIC LETTER ZAIN
س → s ; # ARABIC LETTER SEEN
ش → sh ; # ARABIC LETTER SHEEN
ص → ş ; # ARABIC LETTER SAD
ض → ḑ ; # ARABIC LETTER DAD
ط → ţ ; # ARABIC LETTER TAH
ظ → z̧ ; # ARABIC LETTER ZAH
ع → $ayin ; # ARABIC LETTER AIN
غ → gh ; # ARABIC LETTER GHAIN
ف → f ; # ARABIC LETTER FEH
ق → q ; # ARABIC LETTER QAF
ک ↔ k $disambig ; # ARABIC LETTER KEHEH
ك ↔ k ; # ARABIC LETTER KAF
ل → l ; # ARABIC LETTER LAM
م → m ; # ARABIC LETTER MEEM
ن → n ; # ARABIC LETTER NOON
ه → h ; # ARABIC LETTER HEH
و → w ; # ARABIC LETTER WAW
ى → y ; # ARABIC LETTER YEH
َا → ā ; # ARABIC FATHA + ALEF
َى → á ; # ARABIC FATHA + ALEF MAKSURA
َيْ → ay ; # ARABIC FATHA + YEH + SUKUN
َوْ → aw ; # ARABIC FATHA + WAW + SUKUN
َ → a ; # ARABIC FATHA
ِي → ī ; # ARABIC KASRA + YEH
ِ → i ; # ARABIC KASRA
ُو → ū ; # ARABIC DAMMA + WAW
ُ → u ; # ARABIC DAMMA
ْ → ; # ARABIC SUKUN
ً → aⁿ ; # ARABIC FATHATAN
ٍ → iⁿ ; # ARABIC KASRATAN
ٌ → uⁿ ; # ARABIC DAMMATAN
::NFC (NFD) ;
#
#
########################################################################
</tRule>
</transform>
</transforms>
</supplementalData>