blob: ecb4ff280d073ede9f9d61f3611dfe79f822ac3c [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="Grek" target="Latn" variant="UNGEGN" direction="both" alias="Greek-Latin/UNGEGN und-Latn-t-und-grek-m0-ungegn" backwardAlias="Latin-Greek/UNGEGN und-Grek-t-und-latn-m0-ungegn">
<tRule><![CDATA[
# For modern Greek, based on UNGEGN rules.
# Rules are predicated on running NFD first, and NFC afterwards
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
# WARNING: need to add accents to both filters ###
# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
::NFD (NFC) ;
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]] ;
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$accent = [[:Mn:][:Me:]] ;
$macron = ̄ ;
$ddot = ̈ ;
$lcgvowel = [αεηιουω] ;
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
$gvowel = [$lcgvowel $ucgvowel] ;
$lcgvowelC = [$lcgvowel $accent] ;
$evowel = [aeiouyAEIOUY];
$vowel = [ $evowel $gvowel] ;
$beforeLower = $accent * $lower ;
$gammaLike = [ΓΚΞΧγκξχϰ] ;
$egammaLike = [GKXCgkxc] ;
$smooth = ̓ ;
$rough = ̔ ;
$iotasub = ͅ ;
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
$under = ̱;
$caron = ̌;
$afterLetter = [:L:] [\'$accent]* ;
$beforeLetter = [\'$accent]* [:L:] ;
# Fix punctuation
# preserve orginal
\: ↔ \: $under ;
\? ↔ \? $under ;
\; ↔ \? ;
· ↔ \: ;
# Fix any ancient characters that creep in
͂ → ́ ;
̂ → ́ ;
̀ → ́ ;
$smooth → ;
$rough → ;
$iotasub → ;
ͺ → ;
# need to have these up here so the rules don't mask
η ↔ i $under ;
Η ↔ I $under ;
Ψ } $beforeLower ↔ Ps ;
Ψ ↔ PS ;
ψ ↔ ps ;
ω ↔ o $under ;
Ω ↔ O $under;
# at begining or end of word, convert mp to b
[^[:L:]$accent] { μπ → b ;
μπ } [^[:L:]$accent] → b ;
[^[:L:]$accent] { [Μμ][Ππ] → B ;
[Μμ][Ππ] } [^[:L:]$accent] → B ;
μπ ← b ;
Μπ ← B } $beforeLower ;
ΜΠ ← B ;
# handle diphthongs ending with upsilon
ου ↔ ou ;
ΟΥ ↔ OU ;
Ου ↔ Ou ;
οΥ ↔ oU ;
$fmaker = [aeiAEI] $under ? ;
$shiftForwardVowels = [[:Mn:]-[̈]]; # note: a diaeresis keeps the items separate
$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
υ $1 ← ( $shiftForwardVowels )* v $under ;
$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
υ $1 ← ( $shiftForwardVowels )* f $under ;
$fmaker { Υ } $softener ↔ V $under ;
$fmaker { Υ ↔ U $under ;
υ ↔ y ;
Υ ↔ Y ;
# NORMAL
α ↔ a ;
Α ↔ A ;
β ↔ v ;
Β ↔ V ;
γ } $gammaLike ↔ n } $egammaLike ;
γ ↔ g ;
Γ } $gammaLike ↔ N } $egammaLike ;
Γ ↔ G ;
δ ↔ d ;
Δ ↔ D ;
ε ↔ e ;
Ε ↔ E ;
ζ ↔ z ;
Ζ ↔ Z ;
θ ↔ th ;
Θ } $beforeLower ↔ Th ;
Θ ↔ TH ;
ι ↔ i ;
Ι ↔ I ;
κ ↔ k ;
Κ ↔ K ;
λ ↔ l ;
Λ ↔ L ;
μ ↔ m ;
Μ ↔ M ;
ν } $gammaLike → n\' ;
ν ↔ n ;
Ν } $gammaLike ↔ N\' ;
Ν ↔ N ;
ξ ↔ x ;
Ξ ↔ X ;
ο ↔ o ;
Ο ↔ O ;
π ↔ p ;
Π ↔ P ;
ρ ↔ r ;
Ρ ↔ R ;
# insert separator before things that turn into s
[Pp] { } [ςσΣϷϸϺϻ] → \' ;
# special S variants
Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
# Caron means exception
# before a letter, initial
ς } $beforeLetter ↔ s $under } $beforeLetter;
σ } $beforeLetter ↔ s } $beforeLetter;
# otherwise, after a letter = final
$afterLetter { σ ↔ $afterLetter { s $under;
$afterLetter { ς ↔ $afterLetter { s ;
# otherwise (isolated) = initial
ς ↔ s $under;
σ ↔ s ;
# [Pp] { Σ ↔ \'S ;
Σ ↔ S ;
τ ↔ t ;
Τ ↔ T ;
φ ↔ f ;
Φ ↔ F ;
χ ↔ ch ;
Χ } $beforeLower ↔ Ch ;
Χ ↔ CH ;
# Completeness for ASCII
# $ignore = [[:Mark:]''] * ;
| ch ← h ;
| k ← c ;
| i ← j ;
| k ← q ;
| b ← u } $vowel ;
| b ← w } $vowel ;
| y ← u ;
| y ← w ;
| Ch ← H ;
| K ← C ;
| I ← J ;
| K ← Q ;
| B ← W } $vowel ;
| B ← U } $vowel ;
| Y ← W ;
| Y ← U ;
# Completeness for Greek
ϐ → | β ;
ϑ → | θ ;
ϒ → | Υ ;
ϕ → | φ ;
ϖ → | π ;
ϰ → | κ ;
ϱ → | ρ ;
ϲ → | σ ;
Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
ϳ → j ;
ϴ → | Θ ;
ϵ → | ε ;
µ → | μ ;
# delete any trailing ' marks used for roundtripping
← [Ππ] { \' } [Ss] ;
← [Νν] { \' } $egammaLike ;
::NFC (NFD) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
]]></tRule>
</transform>
</transforms>
</supplementalData>