tools/java/org/unicode/cldr/draft/UnicodeSetBuilderTests.txt - platform/external/cldr - Git at Google

 # These are tests of the UnicodeSetBuilder.
 # The format is:
 # <builder_format> ; <old_UnicodeSet_format_or_error>
 # The second field is empty if the formats are the same.
 # It is the error code if an error is expected.
 #
 # The differences in format are that the new format has:
 # && and -- defined as in ICU4C Regex
 # ~~ defined as symmetric difference, as in UTS#18
 # Various quirks for compatibility with Java Regex

 @showerror
 \p{Nd} ;
 @hide

 # Check - and --; & and &&

 [[a-z] ; doSetNoCloseError
 [[a-z]-[b-m][s]] ;
 [[a-z]--[b-m][s]] ; [[a-z]-[[b-m][s]]]
 [a-z--b-ms] ; [[a-z]-[[b-m][s]]]
 [[a-z]&[b-m][s]] ;
 [[a-z]&&[b-m][s]] ; [[a-z]&[[b-m][s]]]
 [a-z&&b-ms] ; [[a-z]&[[b-m][s]]]

 # Basic

 [ab[^a-z]e] ;
 [b\x{AC00}cd-mf] ;
 [^abc] ;

 # Properties

 [\p{Nd}] ;
 [\P{Nd}] ;
 [\p{gc=Nd}] ;
 [\P{gc=Nd}] ;
 [\p{gc≠Nd}] ; [\P{gc=Nd}]
 [\P{gc≠Nd}] ; [\p{gc=Nd}]

 \p{Nd} ;
 \P{Nd} ;
 \p{gc=Nd} ;
 \P{gc=Nd} ;
 \p{gc≠Nd} ; \P{gc=Nd}
 \P{gc≠Nd} ; \p{gc=Nd}

 [[:Nd:]] ;
 [[:^Nd:]] ;
 [[:gc=Nd:]] ;
 [[:^gc=Nd:]] ;
 [[:gc≠Nd:]] ; [[:^gc=Nd:]]
 [[:^gc≠Nd:]] ; [[:gc=Nd:]]

 [:Nd:] ;
 [:^Nd:] ;
 [:gc=Nd:] ;
 [:^gc=Nd:] ;
 [:gc≠Nd:] ; [:^gc=Nd:]
 [:^gc≠Nd:] ; [:gc=Nd:]

 # more set-like objects

 \s ; [:white_space:]
 \S ; [:^white-space:]
 \w ; [[:alphabetic:][:Nd:]]
 \W ; [^[:alphabetic:][:Nd:]]
 \d ; [:Nd:]
 \D ; [:^Nd:]

 [\s] ; [:white_space:]
 [\S] ; [:^white-space:]
 [\w] ; [[:alphabetic:][:Nd:]]
 [\W] ; [^[:alphabetic:][:Nd:]]
 [\d] ; [:Nd:]
 [\D] ; [:^Nd:]

 # characters
 \N{FULL STOP} ; [.]
 [\N{FULL STOP}]
 \x{61} ; [a]
 [\x{61}] ; [a]
 [\a] ; [a]
 [\[]

 # quirks
 [:abc] ; [abc\:]
 [abc:] ; [abc\:]
 [-a] ; [a\-]
 [~a] ; [a\~]
 [&a] ; [a\&]
 [--a] ; error
 [&&a] ; error
 [a&b] ; [ab\&]
 [a-[b]] ; [ab\-]
 [[a]-b] ; [ab\-]
 [a&[b]] ; [ab\&]
 [[a]&b] ; [ab\&]
 [a&&] ; error
 [a--] ; error
 [a-b-z] ; [a-b\-z]
 [a-b&z] ; [a-b\&z]

 # Logically \N{...} and \x{...}, etc should act like a literal
 # and \s, \w, \p{...} etc. should act like a set
 [\N{FULL STOP}-a] ; [.-a]
 [a-\N{FULL STOP}] ; [.-a]
 [[ab]&\S] ; [[ab]&[:^whitespace:]]
 [\S&[ab]] ; [[ab]&[:^whitespace:]]
 [\s-z] ; [[:whitespace:]\-z]
 [\s&z] ; [[:whitespace:]\&z]

 [abc-\p{xx}] ; error
	# These are tests of the UnicodeSetBuilder.
	# The format is:
	# <builder_format> ; <old_UnicodeSet_format_or_error>
	# The second field is empty if the formats are the same.
	# It is the error code if an error is expected.
	#
	# The differences in format are that the new format has:
	# && and -- defined as in ICU4C Regex
	# ~~ defined as symmetric difference, as in UTS#18
	# Various quirks for compatibility with Java Regex

	@showerror
	\p{Nd} ;
	@hide

	# Check - and --; & and &&

	[[a-z] ; doSetNoCloseError
	[[a-z]-[b-m][s]] ;
	[[a-z]--[b-m][s]] ; [[a-z]-[[b-m][s]]]
	[a-z--b-ms] ; [[a-z]-[[b-m][s]]]
	[[a-z]&[b-m][s]] ;
	[[a-z]&&[b-m][s]] ; [[a-z]&[[b-m][s]]]
	[a-z&&b-ms] ; [[a-z]&[[b-m][s]]]

	# Basic

	[ab[^a-z]e] ;
	[b\x{AC00}cd-mf] ;
	[^abc] ;

	# Properties

	[\p{Nd}] ;
	[\P{Nd}] ;
	[\p{gc=Nd}] ;
	[\P{gc=Nd}] ;
	[\p{gc≠Nd}] ; [\P{gc=Nd}]
	[\P{gc≠Nd}] ; [\p{gc=Nd}]

	\p{Nd} ;
	\P{Nd} ;
	\p{gc=Nd} ;
	\P{gc=Nd} ;
	\p{gc≠Nd} ; \P{gc=Nd}
	\P{gc≠Nd} ; \p{gc=Nd}

	[[:Nd:]] ;
	[[:^Nd:]] ;
	[[:gc=Nd:]] ;
	[[:^gc=Nd:]] ;
	[[:gc≠Nd:]] ; [[:^gc=Nd:]]
	[[:^gc≠Nd:]] ; [[:gc=Nd:]]

	[:Nd:] ;
	[:^Nd:] ;
	[:gc=Nd:] ;
	[:^gc=Nd:] ;
	[:gc≠Nd:] ; [:^gc=Nd:]
	[:^gc≠Nd:] ; [:gc=Nd:]

	# more set-like objects

	\s ; [:white_space:]
	\S ; [:^white-space:]
	\w ; [[:alphabetic:][:Nd:]]
	\W ; [^[:alphabetic:][:Nd:]]
	\d ; [:Nd:]
	\D ; [:^Nd:]

	[\s] ; [:white_space:]
	[\S] ; [:^white-space:]
	[\w] ; [[:alphabetic:][:Nd:]]
	[\W] ; [^[:alphabetic:][:Nd:]]
	[\d] ; [:Nd:]
	[\D] ; [:^Nd:]

	# characters
	\N{FULL STOP} ; [.]
	[\N{FULL STOP}]
	\x{61} ; [a]
	[\x{61}] ; [a]
	[\a] ; [a]
	[\[]

	# quirks
	[:abc] ; [abc\:]
	[abc:] ; [abc\:]
	[-a] ; [a\-]
	[~a] ; [a\~]
	[&a] ; [a\&]
	[--a] ; error
	[&&a] ; error
	[a&b] ; [ab\&]
	[a-[b]] ; [ab\-]
	[[a]-b] ; [ab\-]
	[a&[b]] ; [ab\&]
	[[a]&b] ; [ab\&]
	[a&&] ; error
	[a--] ; error
	[a-b-z] ; [a-b\-z]
	[a-b&z] ; [a-b\&z]

	# Logically \N{...} and \x{...}, etc should act like a literal
	# and \s, \w, \p{...} etc. should act like a set
	[\N{FULL STOP}-a] ; [.-a]
	[a-\N{FULL STOP}] ; [.-a]
	[[ab]&\S] ; [[ab]&[:^whitespace:]]
	[\S&[ab]] ; [[ab]&[:^whitespace:]]
	[\s-z] ; [[:whitespace:]\-z]
	[\s&z] ; [[:whitespace:]\&z]

	[abc-\p{xx}] ; error