common/transforms/Han-Spacedhan.xml - platform/external/cldr - Git at Google

 <?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 <!--
 Copyright © 1991-2013 Unicode, Inc.
 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 For terms of use, see http://www.unicode.org/copyright.html
 -->
 <supplementalData>
 	<version number="$Revision: 12137 $"/>
 	<transforms>
 		<transform source="Han" target="Spacedhan" direction="both" visibility="internal">
 			<tRule>
 # Only intended for internal use
 # Make sure Han are normalized, including characters that contain them.
 # The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
 # Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
 :: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
 :: fullwidth-halfwidth;
 ｡ → '.';
 $terminalPunct = [\.\,\:\;\?\!．，：？！｡、；[:Pe:][:Pf:]];
 $initialPunct = [:Ps:][:Pi:];
 # add space between any Han or terminal punctuation and letters, and
 # between letters and Han or initial punct
 [[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
 [:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
 # remove spacing between ideographs and other letters
 ← [:Ideographic:] { ' ' } [:Letter:] ;
 ← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
 			</tRule>
 		</transform>
 	</transforms>
 </supplementalData>
	<?xml version="1.0" encoding="UTF-8" ?>
	<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
	<!--
	Copyright © 1991-2013 Unicode, Inc.
	CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
	For terms of use, see http://www.unicode.org/copyright.html
	-->
	<supplementalData>
	<version number="$Revision: 12137 $"/>
	<transforms>
	<transform source="Han" target="Spacedhan" direction="both" visibility="internal">
	<tRule>
	# Only intended for internal use
	# Make sure Han are normalized, including characters that contain them.
	# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
	# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
	:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
	:: fullwidth-halfwidth;
	｡ → '.';
	$terminalPunct = [\.\,\:\;\?\!．，：？！｡、；[:Pe:][:Pf:]];
	$initialPunct = [:Ps:][:Pi:];
	# add space between any Han or terminal punctuation and letters, and
	# between letters and Han or initial punct
	[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
	[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
	# remove spacing between ideographs and other letters
	← [:Ideographic:] { ' ' } [:Letter:] ;
	← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
	</tRule>
	</transform>
	</transforms>
	</supplementalData>