testsuite/SPENCER.tests - toolchain/sed - Git at Google

 # regular expression test set
 # Lines are at least three fields, separated by one or more tabs.  "" stands
 # for an empty field.  First field is an RE.  Second field is flags.  If
 # C flag given, regcomp() is expected to fail, and the third field is the
 # error name (minus the leading REG_).
 #
 # Otherwise it is expected to succeed, and the third field is the string to
 # try matching it against.  If there is no fourth field, the match is
 # expected to fail.  If there is a fourth field, it is the substring that
 # the RE is expected to match.  If there is a fifth field, it is a comma-
 # separated list of what the subexpressions should match, with - indicating
 # no match for that one.  In both the fourth and fifth fields, a (sub)field
 # starting with @ indicates that the (sub)expression is expected to match
 # a null string followed by the stuff after the @; this provides a way to
 # test where null strings match.  The character `N' in REs and strings
 # is newline, `S' is space, `T' is tab, `Z' is NUL.
 #
 # The full list of flags:
 #	-	placeholder, does nothing
 #	b	RE is a BRE, not an ERE
 #	&	try it as both an ERE and a BRE
 #	C	regcomp() error expected, third field is error name
 #	i	REG_ICASE
 #	m	("mundane") REG_NOSPEC
 #	s	REG_NOSUB (not really testable)
 #	n	REG_NEWLINE
 #	^	REG_NOTBOL
 #	$	REG_NOTEOL
 #	#	REG_STARTEND (see below)
 #	p	REG_PEND
 #
 # For REG_STARTEND, the start/end offsets are those of the substring
 # enclosed in ().

 # basics
 a		&	a	a
 abc		&	abc	abc
 abc|de		-	abc	abc
 a|b|c		-	abc	a

 # parentheses and perversions thereof
 a(b)c		-	abc	abc
 a\(b\)c		b	abc	abc
 a(		C	EPAREN
 a(		b	a(	a(
 a\(		-	a(	a(
 a\(		bC	EPAREN
 a\(b		bC	EPAREN
 a(b		C	EPAREN
 a(b		b	a(b	a(b
 # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
 a)		-	a)	a)
 )		-	)	)
 # end gagging (in a just world, those *should* give EPAREN)
 a)		b	a)	a)
 a\)		bC	EPAREN
 \)		bC	EPAREN
 a()b		-	ab	ab
 a\(\)b		b	ab	ab

 # anchoring and REG_NEWLINE
 ^abc$		&	abc	abc
 a^b		-	a^b
 a^b		b	a^b	a^b
 a$b		-	a$b
 a$b		b	a$b	a$b
 ^		&	abc	@abc
 $		&	abc	@
 ^$		&	""	@
 $^		-	""	@
 \($\)\(^\)	b	""	@
 # stop retching, those are legitimate (although disgusting)
 ^^		-	""	@
 $$		-	""	@
 b$		&	abNc
 b$		&n	abNc	b
 ^b$		&	aNbNc
 ^b$		&n	aNbNc	b
 ^$		&n	aNNb	@Nb
 ^$		n	abc
 ^$		n	abcN	@
 $^		n	aNNb	@Nb
 \($\)\(^\)	bn	aNNb	@Nb
 ^^		n^	aNNb	@Nb
 $$		n	aNNb	@NN
 ^a		^	a
 a$		$	a
 ^a		^n	aNb
 ^b		^n	aNb	b
 a$		$n	bNa
 b$		$n	bNa	b
 a*(^b$)c*	-	b	b
 a*\(^b$\)c*	b	b	b

 # certain syntax errors and non-errors
 |		C	EMPTY
 |		b	|	|
 *		C	BADRPT
 *		b	*	*
 +		C	BADRPT
 ?		C	BADRPT
 ""		&C	EMPTY
 ()		-	abc	@abc
 \(\)		b	abc	@abc
 a||b		C	EMPTY
 |ab		C	EMPTY
 ab|		C	EMPTY
 (|a)b		C	EMPTY
 (a|)b		C	EMPTY
 (*a)		C	BADRPT
 (+a)		C	BADRPT
 (?a)		C	BADRPT
 ({1}a)		C	BADRPT
 \(\{1\}a\)	bC	BADRPT
 (a|*b)		C	BADRPT
 (a|+b)		C	BADRPT
 (a|?b)		C	BADRPT
 (a|{1}b)	C	BADRPT
 ^*		C	BADRPT
 ^*		b	*	*
 ^+		C	BADRPT
 ^?		C	BADRPT
 ^{1}		C	BADRPT
 ^\{1\}		bC	BADRPT

 # metacharacters, backslashes
 a.c		&	abc	abc
 a[bc]d		&	abd	abd
 a\*c		&	a*c	a*c
 a\\b		&	a\b	a\b
 a\\\*b		&	a\*b	a\*b
 # The following test is wrong.  Using \b in an BRE or ERE is undefined.
 # a\bc		&	abc	abc
 a\		&C	EESCAPE
 a\\bc		&	a\bc	a\bc
 \{		bC	BADRPT
 a\[b		&	a[b	a[b
 a[b		&C	EBRACK
 # trailing $ is a peculiar special case for the BRE code
 a$		&	a	a
 a$		&	a$
 a\$		&	a
 a\$		&	a$	a$
 a\\$		&	a
 a\\$		&	a$
 a\\$		&	a\$
 a\\$		&	a\	a\

 # back references, ugh
 a\(b\)\2c	bC	ESUBREG
 a\(b\1\)c	bC	ESUBREG
 a\(b*\)c\1d	b	abbcbbd	abbcbbd	bb
 a\(b*\)c\1d	b	abbcbd
 a\(b*\)c\1d	b	abbcbbbd
 ^\(.\)\1	b	abc
 a\([bc]\)\1d	b	abcdabbd	abbd	b
 a\(\([bc]\)\2\)*d	b	abbccd	abbccd
 a\(\([bc]\)\2\)*d	b	abbcbd
 # actually, this next one probably ought to fail, but the spec is unclear
 a\(\(b\)*\2\)*d		b	abbbd	abbbd
 # here is a case that no NFA implementation does right
 \(ab*\)[ab]*\1	b	ababaaa	ababaaa	a
 # check out normal matching in the presence of back refs
 \(a\)\1bcd	b	aabcd	aabcd
 \(a\)\1bc*d	b	aabcd	aabcd
 \(a\)\1bc*d	b	aabd	aabd
 \(a\)\1bc*d	b	aabcccd	aabcccd
 \(a\)\1bc*[ce]d	b	aabcccd	aabcccd
 ^\(a\)\1b\(c\)*cd$	b	aabcccd	aabcccd

 # ordinary repetitions
 ab*c		&	abc	abc
 ab+c		-	abc	abc
 ab?c		-	abc	abc
 a\(*\)b		b	a*b	a*b
 a\(**\)b	b	ab	ab
 a\(***\)b	bC	BADRPT
 *a		b	*a	*a
 **a		b	a	a
 ***a		bC	BADRPT

 # the dreaded bounded repetitions
 # The following two tests are not correct:
 #{		&	{	{
 #{abc		&	{abc	{abc
 # '{' is always a special char outside bracket expressions.  So test ony BRE:
 {		b	{	{
 {abc		b	{abc	{abc
 {1		C	BADRPT
 {1}		C	BADRPT
 # Same reason as for the two tests above:
 #a{b		&	a{b	a{b
 a{b		b	a{b	a{b
 a{1}b		-	ab	ab
 a\{1\}b		b	ab	ab
 a{1,}b		-	ab	ab
 a\{1,\}b	b	ab	ab
 a{1,2}b		-	aab	aab
 a\{1,2\}b	b	aab	aab
 a{1		C	EBRACE
 a\{1		bC	EBRACE
 a{1a		C	EBRACE
 a\{1a		bC	EBRACE
 a{1a}		C	BADBR
 a\{1a\}		bC	BADBR
 # These four tests checks for undefined behavior.  Our implementation does
 # something different.
 #a{,2}		-	a{,2}	a{,2}
 #a\{,2\}		bC	BADBR
 #a{,}		-	a{,}	a{,}
 #a\{,\}		bC	BADBR
 a{1,x}		C	BADBR
 a\{1,x\}	bC	BADBR
 a{1,x		C	EBRACE
 a\{1,x		bC	EBRACE
 # These two tests probably fails due to an arbitrary limit on the number of
 # repetitions in the other implementation.
 #a{300}		C	BADBR
 #a\{300\}	bC	BADBR
 a{1,0}		C	BADBR
 a\{1,0\}	bC	BADBR
 ab{0,0}c	-	abcac	ac
 ab\{0,0\}c	b	abcac	ac
 ab{0,1}c	-	abcac	abc
 ab\{0,1\}c	b	abcac	abc
 ab{0,3}c	-	abbcac	abbc
 ab\{0,3\}c	b	abbcac	abbc
 ab{1,1}c	-	acabc	abc
 ab\{1,1\}c	b	acabc	abc
 ab{1,3}c	-	acabc	abc
 ab\{1,3\}c	b	acabc	abc
 ab{2,2}c	-	abcabbc	abbc
 ab\{2,2\}c	b	abcabbc	abbc
 ab{2,4}c	-	abcabbc	abbc
 ab\{2,4\}c	b	abcabbc	abbc
 ((a{1,10}){1,10}){1,10}	-	a	a	a,a

 # multiple repetitions
 # Wow, there is serious disconnect here.  The ERE grammar is like this:
 # ERE_expression : one_char_or_coll_elem_ERE
 #                | '^'
 #                | '$'
 #                | '(' extended_reg_exp ')'
 #                | ERE_expression ERE_dupl_symbol
 #                ;
 # where ERE_dupl_symbol is any of the repetition methods.  It is clear from
 # this that consecutive repetition is OK.  On top of this, the one test not
 # marked as failing must fail.  For BREs the situation is different, so we
 # use the four tests.
 #a**		&C	BADRPT
 a**		bC	BADRPT
 #a++		C	BADRPT
 #a??		C	BADRPT
 #a*+		C	BADRPT
 #a*?		C	BADRPT
 #a+*		C	BADRPT
 #a+?		C	BADRPT
 #a?*		C	BADRPT
 #a?+		C	BADRPT
 #a{1}{1}		C	BADRPT
 #a*{1}		C	BADRPT
 #a+{1}		C	BADRPT
 #a?{1}		C	BADRPT
 #a{1}*		C	BADRPT
 #a{1}+		C	BADRPT
 #a{1}?		C	BADRPT
 #a*{b}		-	a{b}	a{b}
 a\{1\}\{1\}	bC	BADRPT
 a*\{1\}		bC	BADRPT
 a\{1\}*		bC	BADRPT

 # brackets, and numerous perversions thereof
 a[b]c		&	abc	abc
 a[ab]c		&	abc	abc
 a[^ab]c		&	adc	adc
 a[]b]c		&	a]c	a]c
 a[[b]c		&	a[c	a[c
 a[-b]c		&	a-c	a-c
 a[^]b]c		&	adc	adc
 a[^-b]c		&	adc	adc
 a[b-]c		&	a-c	a-c
 a[b		&C	EBRACK
 a[]		&C	EBRACK
 a[1-3]c		&	a2c	a2c
 a[3-1]c		&C	ERANGE
 a[1-3-5]c	&C	ERANGE
 a[[.-.]--]c	&	a-c	a-c
 # I don't thing the error value should be ERANGE since a[1-] would be
 # valid, too.  Expect EBRACK.
 #a[1-		&C	ERANGE
 a[1-		&C	EBRACK
 a[[.		&C	EBRACK
 a[[.x		&C	EBRACK
 a[[.x.		&C	EBRACK
 a[[.x.]		&C	EBRACK
 a[[.x.]]	&	ax	ax
 a[[.x,.]]	&C	ECOLLATE
 # This test is invalid.  "one" is no collating symbol in any standardized
 # locale.
 # a[[.one.]]b	&	a1b	a1b
 a[[.notdef.]]b	&C	ECOLLATE
 a[[.].]]b	&	a]b	a]b
 a[[:alpha:]]c	&	abc	abc
 a[[:notdef:]]c	&C	ECTYPE
 a[[:		&C	EBRACK
 a[[:alpha	&C	EBRACK
 a[[:alpha:]	&C	EBRACK
 a[[:alpha,:]	&C	ECTYPE
 a[[:]:]]b	&C	ECTYPE
 a[[:-:]]b	&C	ECTYPE
 a[[:alph:]]	&C	ECTYPE
 a[[:alphabet:]]	&C	ECTYPE
 [[:alnum:]]+	-	-%@a0X-	a0X
 [[:alpha:]]+	-	-%@aX0-	aX
 [[:blank:]]+	-	aSSTb	SST
 [[:cntrl:]]+	-	aNTb	NT
 [[:digit:]]+	-	a019b	019
 [[:graph:]]+	-	Sa%bS	a%b
 [[:lower:]]+	-	AabC	ab
 [[:print:]]+	-	NaSbN	aSb
 [[:punct:]]+	-	S%-&T	%-&
 [[:space:]]+	-	aSNTb	SNT
 [[:upper:]]+	-	aBCd	BC
 [[:xdigit:]]+	-	p0f3Cq	0f3C
 a[[=b=]]c	&	abc	abc
 a[[=		&C	EBRACK
 a[[=b		&C	EBRACK
 a[[=b=		&C	EBRACK
 a[[=b=]		&C	EBRACK
 a[[=b,=]]	&C	ECOLLATE
 # This test is invalid.  "one" is no collating symbol in any standardized
 # locale.
 #a[[=one=]]b	&	a1b	a1b

 # complexities
 a(((b)))c	-	abc	abc
 a(b|(c))d	-	abd	abd
 a(b*|c)d	-	abbd	abbd
 # just gotta have one DFA-buster, of course
 a[ab]{20}	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
 # and an inline expansion in case somebody gets tricky
 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
 # and in case somebody just slips in an NFA...
 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)	-	aaaaabaaaabaaaabaaaabweeknights	aaaaabaaaabaaaabaaaabweeknights
 # fish for anomalies as the number of states passes 32
 12345678901234567890123456789	-	a12345678901234567890123456789b	12345678901234567890123456789
 123456789012345678901234567890	-	a123456789012345678901234567890b	123456789012345678901234567890
 1234567890123456789012345678901	-	a1234567890123456789012345678901b	1234567890123456789012345678901
 12345678901234567890123456789012	-	a12345678901234567890123456789012b	12345678901234567890123456789012
 123456789012345678901234567890123	-	a123456789012345678901234567890123b	123456789012345678901234567890123
 # and one really big one, beyond any plausible word width
 1234567890123456789012345678901234567890123456789012345678901234567890	-	a1234567890123456789012345678901234567890123456789012345678901234567890b	1234567890123456789012345678901234567890123456789012345678901234567890
 # fish for problems as brackets go past 8
 [ab][cd][ef][gh][ij][kl][mn]	-	xacegikmoq	acegikm
 [ab][cd][ef][gh][ij][kl][mn][op]	-	xacegikmoq	acegikmo
 [ab][cd][ef][gh][ij][kl][mn][op][qr]	-	xacegikmoqy	acegikmoq
 [ab][cd][ef][gh][ij][kl][mn][op][q]	-	xacegikmoqy	acegikmoq

 # subtleties of matching
 abc		&	xabcy	abc
 a\(b\)?c\1d	b	acd
 aBc		i	Abc	Abc
 a[Bc]*d		i	abBCcd	abBCcd
 0[[:upper:]]1	&i	0a1	0a1
 0[[:lower:]]1	&i	0A1	0A1
 a[^b]c		&i	abc
 a[^b]c		&i	aBc
 a[^b]c		&i	adc	adc
 [a]b[c]		-	abc	abc
 [a]b[a]		-	aba	aba
 [abc]b[abc]	-	abc	abc
 [abc]b[abd]	-	abd	abd
 a(b?c)+d	-	accd	accd
 (wee|week)(knights|night)	-	weeknights	weeknights
 (we|wee|week|frob)(knights|night|day)	-	weeknights	weeknights
 a[bc]d		-	xyzaaabcaababdacd	abd
 a[ab]c		-	aaabc	abc
 abc		s	abc	abc
 ()		s	abc	@abc
 a*		&	b	@b

 # Let's have some fun -- try to match a C comment.
 # first the obvious, which looks okay at first glance...
 /\*.*\*/	-	/*x*/	/*x*/
 # but...
 /\*.*\*/	-	/*x*/y/*z*/	/*x*/y/*z*/
 # okay, we must not match */ inside; try to do that...
 /\*([^*]|\*[^/])*\*/	-	/*x*/	/*x*/
 /\*([^*]|\*[^/])*\*/	-	/*x*/y/*z*/	/*x*/
 # but...
 /\*([^*]|\*[^/])*\*/	-	/*x**/y/*z*/	/*x**/y/*z*/
 # and a still fancier version, which does it right (I think)...
 /\*([^*]|\*+[^*/])*\*+/	-	/*x*/	/*x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x*/y/*z*/	/*x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x**/y/*z*/	/*x**/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x****/y/*z*/	/*x****/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x**x*/y/*z*/	/*x**x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x***x/y/*z*/	/*x***x/y/*z*/

 # subexpressions
 .*		-	abc	abc	-
 a(b)(c)d	-	abcd	abcd	b,c
 a(((b)))c	-	abc	abc	b,b,b
 a(b|(c))d	-	abd	abd	b,-
 a(b*|c|e)d	-	abbd	abbd	bb
 a(b*|c|e)d	-	acd	acd	c
 a(b*|c|e)d	-	ad	ad	@d
 a(b?)c		-	abc	abc	b
 a(b?)c		-	ac	ac	@c
 a(b+)c		-	abc	abc	b
 a(b+)c		-	abbbc	abbbc	bbb
 a(b*)c		-	ac	ac	@c
 (a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
 # the regression tester only asks for 9 subexpressions
 a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
 a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l	-	abcdefghijkl	abcdefghijkl	b,c,d,e,f,g,h,i,j,k
 a([bc]?)c	-	abc	abc	b
 a([bc]?)c	-	ac	ac	@c
 a([bc]+)c	-	abc	abc	b
 a([bc]+)c	-	abcc	abcc	bc
 a([bc]+)bc	-	abcbc	abcbc	bc
 a(bb+|b)b	-	abb	abb	b
 a(bbb+|bb+|b)b	-	abb	abb	b
 a(bbb+|bb+|b)b	-	abbb	abbb	bb
 a(bbb+|bb+|b)bb	-	abbb	abbb	b
 (.*).*		-	abcdef	abcdef	abcdef
 (a*)*		-	bc	@b	@b

 # do we get the right subexpression when it is used more than once?
 a(b|c)*d	-	ad	ad	-
 a(b|c)*d	-	abcd	abcd	c
 a(b|c)+d	-	abd	abd	b
 a(b|c)+d	-	abcd	abcd	c
 a(b|c?)+d	-	ad	ad	@d
 a(b|c?)+d	-	abcd	abcd	c
 a(b|c){0,0}d	-	ad	ad	-
 a(b|c){0,1}d	-	ad	ad	-
 a(b|c){0,1}d	-	abd	abd	b
 a(b|c){0,2}d	-	ad	ad	-
 a(b|c){0,2}d	-	abcd	abcd	c
 a(b|c){0,}d	-	ad	ad	-
 a(b|c){0,}d	-	abcd	abcd	c
 a(b|c){1,1}d	-	abd	abd	b
 a(b|c){1,1}d	-	acd	acd	c
 a(b|c){1,2}d	-	abd	abd	b
 a(b|c){1,2}d	-	abcd	abcd	c
 a(b|c){1,}d	-	abd	abd	b
 a(b|c){1,}d	-	abcd	abcd	c
 a(b|c){2,2}d	-	acbd	acbd	b
 a(b|c){2,2}d	-	abcd	abcd	c
 a(b|c){2,4}d	-	abcd	abcd	c
 a(b|c){2,4}d	-	abcbd	abcbd	b
 a(b|c){2,4}d	-	abcbcd	abcbcd	c
 a(b|c){2,}d	-	abcd	abcd	c
 a(b|c){2,}d	-	abcbd	abcbd	b
 a(b+|((c)*))+d	-	abd	abd	b,-,-
 a(b+|((c)*))+d	-	abcd	abcd	c,c,c

 # check out the STARTEND option
 [abc]		&#	a(b)c	b
 [abc]		&#	a(d)c
 [abc]		&#	a(bc)d	b
 [abc]		&#	a(dc)d	c
 .		&#	a()c
 b.*c		&#	b(bc)c	bc
 b.*		&#	b(bc)c	bc
 .*c		&#	b(bc)c	bc

 # plain strings, with the NOSPEC flag
 abc		m	abc	abc
 abc		m	xabcy	abc
 abc		m	xyz
 a*b		m	aba*b	a*b
 a*b		m	ab
 ""		mC	EMPTY

 # cases involving NULs
 aZb		&	a	a
 aZb		&p	a
 aZb		&p#	(aZb)	aZb
 aZ*b		&p#	(ab)	ab
 a.b		&#	(aZb)	aZb
 a.*		&#	(aZb)c	aZb

 # word boundaries (ick)
 [[:<:]]a	&	a	a
 [[:<:]]a	&	ba
 [[:<:]]a	&	-a	a
 a[[:>:]]	&	a	a
 a[[:>:]]	&	ab
 a[[:>:]]	&	a-	a
 [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc	abc
 [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc-q	abc
 [[:<:]]a.c[[:>:]]	&	axc-dayc-dazce-abc	axc
 [[:<:]]b.c[[:>:]]	&	a_bxc-byc_d-bzc-q	bzc
 [[:<:]].x..[[:>:]]	&	y_xa_-_xb_y-_xc_-axdc	_xc_
 [[:<:]]a_b[[:>:]]	&	x_a_b

 # past problems, and suspected problems
 (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])	-	A1	A1
 abcdefghijklmnop	i	abcdefghijklmnop	abcdefghijklmnop
 abcdefghijklmnopqrstuv	i	abcdefghijklmnopqrstuv	abcdefghijklmnopqrstuv
 (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])	-	CC11	CC11
 CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a	-	CC11	CC11
 Char \([a-z0-9_]*\)\[.*	b	Char xyz[k	Char xyz[k	xyz
 a?b	-	ab	ab
 -\{0,1\}[0-9]*$	b	-5	-5
 a*a*a*a*a*a*a*	&	aaaaaa	aaaaaa
 (\b){0}	-	x	@x	-
 \(\b\)\{0,0\}	b	abc	@abc	-
 a(\b){0}c	-	ac	ac	-
 a(.*)b(\1){0}c	-	abc	abc	@bc,-
 a(.*)b(\1){0}c	-	axbc	axbc	x,-

 a\(\(b*\)\)c\1d	b	abbcbbd	abbcbbd	bb,bb
 a\(\([bc]\)\)\2d	b	abcdabbd	abbd	b,b
 a\(\(\(\([bc]\)\)\3\)\)*d	b	abbccd	abbccd	cc,cc,c,c
 a(b)(c)d	-	abcd	abcd	b,c
 a(((b)))c	-	abc	abc	b,b,b
 a(((b|(((c))))))d	-	abd	abd	b,b,b,-,-,-
 a(((b*|c|e)))d	-	abbd	abbd	bb,bb,bb
 a((b|c)){0,0}d	-	ad	ad	-,-
 a((b|c)){0,1}d	-	abd	abd	b,b
 a((b|c)){0,2}d	-	abcd	abcd	c,c
 a((b+|((c)*)))+d	-	abd	abd	b,b,-,-
 a((b+|((c)*)))+d	-	abcd	abcd	c,c,c,c
 (((\b))){0}	-	x	@x	-,-,-
 a(((.*)))b((\2)){0}c	-	abc	abc	@bc,@bc,@bc,-,-
 a(((.*)))b((\1)){0}c	-	axbc	axbc	x,x,x,-,-

 \b	&	SaT	@aT
 \b	&	aT	@aT
 a.*\b	&	abT	ab
 \b	&	STSS
 \B	&	abc	@bc
 \B	&	aSbTc
 \B	&	SaT	@SaT
 \B	&	aSTSb	@TSb

 o$($|.)        -       oN
 o$($|.)        -       op
 o$($|.)        -       o       o
	# regular expression test set
	# Lines are at least three fields, separated by one or more tabs. "" stands
	# for an empty field. First field is an RE. Second field is flags. If
	# C flag given, regcomp() is expected to fail, and the third field is the
	# error name (minus the leading REG_).
	#
	# Otherwise it is expected to succeed, and the third field is the string to
	# try matching it against. If there is no fourth field, the match is
	# expected to fail. If there is a fourth field, it is the substring that
	# the RE is expected to match. If there is a fifth field, it is a comma-
	# separated list of what the subexpressions should match, with - indicating
	# no match for that one. In both the fourth and fifth fields, a (sub)field
	# starting with @ indicates that the (sub)expression is expected to match
	# a null string followed by the stuff after the @; this provides a way to
	# test where null strings match. The character `N' in REs and strings
	# is newline, `S' is space, `T' is tab, `Z' is NUL.
	#
	# The full list of flags:
	# - placeholder, does nothing
	# b RE is a BRE, not an ERE
	# & try it as both an ERE and a BRE
	# C regcomp() error expected, third field is error name
	# i REG_ICASE
	# m ("mundane") REG_NOSPEC
	# s REG_NOSUB (not really testable)
	# n REG_NEWLINE
	# ^ REG_NOTBOL
	# $ REG_NOTEOL
	# # REG_STARTEND (see below)
	# p REG_PEND
	#
	# For REG_STARTEND, the start/end offsets are those of the substring
	# enclosed in ().

	# basics
	a & a a
	abc & abc abc
	abc\|de - abc abc
	a\|b\|c - abc a

	# parentheses and perversions thereof
	a(b)c - abc abc
	a\(b\)c b abc abc
	a( C EPAREN
	a( b a( a(
	a\( - a( a(
	a\( bC EPAREN
	a\(b bC EPAREN
	a(b C EPAREN
	a(b b a(b a(b
	# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
	a) - a) a)
	) - ) )
	# end gagging (in a just world, those should give EPAREN)
	a) b a) a)
	a\) bC EPAREN
	\) bC EPAREN
	a()b - ab ab
	a\(\)b b ab ab

	# anchoring and REG_NEWLINE
	^abc$ & abc abc
	a^b - a^b
	a^b b a^b a^b
	a$b - a$b
	a$b b a$b a$b
	^ & abc @abc
	$ & abc @
	^$ & "" @
	$^ - "" @
	\($\)\(^\) b "" @
	# stop retching, those are legitimate (although disgusting)
	^^ - "" @
	$$ - "" @
	b$ & abNc
	b$ &n abNc b
	^b$ & aNbNc
	^b$ &n aNbNc b
	^$ &n aNNb @Nb
	^$ n abc
	^$ n abcN @
	$^ n aNNb @Nb
	\($\)\(^\) bn aNNb @Nb
	^^ n^ aNNb @Nb
	$$ n aNNb @NN
	^a ^ a
	a$ $ a
	^a ^n aNb
	^b ^n aNb b
	a$ $n bNa
	b$ $n bNa b
	a(^b$)c - b b
	a\(^b$\)c b b b

	# certain syntax errors and non-errors
	\| C EMPTY
	\| b \| \|
	* C BADRPT
	* b * *
	+ C BADRPT
	? C BADRPT
	"" &C EMPTY
	() - abc @abc
	\(\) b abc @abc
	a\|\|b C EMPTY
	\|ab C EMPTY
	ab\| C EMPTY
	(\|a)b C EMPTY
	(a\|)b C EMPTY
	(*a) C BADRPT
	(+a) C BADRPT
	(?a) C BADRPT
	({1}a) C BADRPT
	\(\{1\}a\) bC BADRPT
	(a\|*b) C BADRPT
	(a\|+b) C BADRPT
	(a\|?b) C BADRPT
	(a\|{1}b) C BADRPT
	^* C BADRPT
	^* b * *
	^+ C BADRPT
	^? C BADRPT
	^{1} C BADRPT
	^\{1\} bC BADRPT

	# metacharacters, backslashes
	a.c & abc abc
	a[bc]d & abd abd
	a\c & ac a*c
	a\\b & a\b a\b
	a\\\b & a\b a\*b
	# The following test is wrong. Using \b in an BRE or ERE is undefined.
	# a\bc & abc abc
	a\ &C EESCAPE
	a\\bc & a\bc a\bc
	\{ bC BADRPT
	a\[b & a[b a[b
	a[b &C EBRACK
	# trailing $ is a peculiar special case for the BRE code
	a$ & a a
	a$ & a$
	a\$ & a
	a\$ & a$ a$
	a\\$ & a
	a\\$ & a$
	a\\$ & a\$
	a\\$ & a\ a\

	# back references, ugh
	a\(b\)\2c bC ESUBREG
	a\(b\1\)c bC ESUBREG
	a\(b*\)c\1d b abbcbbd abbcbbd bb
	a\(b*\)c\1d b abbcbd
	a\(b*\)c\1d b abbcbbbd
	^\(.\)\1 b abc
	a\([bc]\)\1d b abcdabbd abbd b
	a\(\([bc]\)\2\)*d b abbccd abbccd
	a\(\([bc]\)\2\)*d b abbcbd
	# actually, this next one probably ought to fail, but the spec is unclear
	a\(\(b\)\2\)d b abbbd abbbd
	# here is a case that no NFA implementation does right
	\(ab\)[ab]\1 b ababaaa ababaaa a
	# check out normal matching in the presence of back refs
	\(a\)\1bcd b aabcd aabcd
	\(a\)\1bc*d b aabcd aabcd
	\(a\)\1bc*d b aabd aabd
	\(a\)\1bc*d b aabcccd aabcccd
	\(a\)\1bc*[ce]d b aabcccd aabcccd
	^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd

	# ordinary repetitions
	ab*c & abc abc
	ab+c - abc abc
	ab?c - abc abc
	a\(\)b b ab a*b
	a\(**\)b b ab ab
	a\(***\)b bC BADRPT
	a b a *a
	**a b a a
	***a bC BADRPT

	# the dreaded bounded repetitions
	# The following two tests are not correct:
	#{ & { {
	#{abc & {abc {abc
	# '{' is always a special char outside bracket expressions. So test ony BRE:
	{ b { {
	{abc b {abc {abc
	{1 C BADRPT
	{1} C BADRPT
	# Same reason as for the two tests above:
	#a{b & a{b a{b
	a{b b a{b a{b
	a{1}b - ab ab
	a\{1\}b b ab ab
	a{1,}b - ab ab
	a\{1,\}b b ab ab
	a{1,2}b - aab aab
	a\{1,2\}b b aab aab
	a{1 C EBRACE
	a\{1 bC EBRACE
	a{1a C EBRACE
	a\{1a bC EBRACE
	a{1a} C BADBR
	a\{1a\} bC BADBR
	# These four tests checks for undefined behavior. Our implementation does
	# something different.
	#a{,2} - a{,2} a{,2}
	#a\{,2\} bC BADBR
	#a{,} - a{,} a{,}
	#a\{,\} bC BADBR
	a{1,x} C BADBR
	a\{1,x\} bC BADBR
	a{1,x C EBRACE
	a\{1,x bC EBRACE
	# These two tests probably fails due to an arbitrary limit on the number of
	# repetitions in the other implementation.
	#a{300} C BADBR
	#a\{300\} bC BADBR
	a{1,0} C BADBR
	a\{1,0\} bC BADBR
	ab{0,0}c - abcac ac
	ab\{0,0\}c b abcac ac
	ab{0,1}c - abcac abc
	ab\{0,1\}c b abcac abc
	ab{0,3}c - abbcac abbc
	ab\{0,3\}c b abbcac abbc
	ab{1,1}c - acabc abc
	ab\{1,1\}c b acabc abc
	ab{1,3}c - acabc abc
	ab\{1,3\}c b acabc abc
	ab{2,2}c - abcabbc abbc
	ab\{2,2\}c b abcabbc abbc
	ab{2,4}c - abcabbc abbc
	ab\{2,4\}c b abcabbc abbc
	((a{1,10}){1,10}){1,10} - a a a,a

	# multiple repetitions
	# Wow, there is serious disconnect here. The ERE grammar is like this:
	# ERE_expression : one_char_or_coll_elem_ERE
	# \| '^'
	# \| '$'
	# \| '(' extended_reg_exp ')'
	# \| ERE_expression ERE_dupl_symbol
	# ;
	# where ERE_dupl_symbol is any of the repetition methods. It is clear from
	# this that consecutive repetition is OK. On top of this, the one test not
	# marked as failing must fail. For BREs the situation is different, so we
	# use the four tests.
	#a** &C BADRPT
	a** bC BADRPT
	#a++ C BADRPT
	#a?? C BADRPT
	#a*+ C BADRPT
	#a*? C BADRPT
	#a+* C BADRPT
	#a+? C BADRPT
	#a?* C BADRPT
	#a?+ C BADRPT
	#a{1}{1} C BADRPT
	#a*{1} C BADRPT
	#a+{1} C BADRPT
	#a?{1} C BADRPT
	#a{1}* C BADRPT
	#a{1}+ C BADRPT
	#a{1}? C BADRPT
	#a*{b} - a{b} a{b}
	a\{1\}\{1\} bC BADRPT
	a*\{1\} bC BADRPT
	a\{1\}* bC BADRPT

	# brackets, and numerous perversions thereof
	a[b]c & abc abc
	a[ab]c & abc abc
	a[^ab]c & adc adc
	a[]b]c & a]c a]c
	a[[b]c & a[c a[c
	a[-b]c & a-c a-c
	a[^]b]c & adc adc
	a[^-b]c & adc adc
	a[b-]c & a-c a-c
	a[b &C EBRACK
	a[] &C EBRACK
	a[1-3]c & a2c a2c
	a[3-1]c &C ERANGE
	a[1-3-5]c &C ERANGE
	a[[.-.]--]c & a-c a-c
	# I don't thing the error value should be ERANGE since a[1-] would be
	# valid, too. Expect EBRACK.
	#a[1- &C ERANGE
	a[1- &C EBRACK
	a[[. &C EBRACK
	a[[.x &C EBRACK
	a[[.x. &C EBRACK
	a[[.x.] &C EBRACK
	a[[.x.]] & ax ax
	a[[.x,.]] &C ECOLLATE
	# This test is invalid. "one" is no collating symbol in any standardized
	# locale.
	# a[[.one.]]b & a1b a1b
	a[[.notdef.]]b &C ECOLLATE
	a[[.].]]b & a]b a]b
	a[[:alpha:]]c & abc abc
	a[[:notdef:]]c &C ECTYPE
	a[[: &C EBRACK
	a[[:alpha &C EBRACK
	a[[:alpha:] &C EBRACK
	a[[:alpha,:] &C ECTYPE
	a[[:]:]]b &C ECTYPE
	a[[:-:]]b &C ECTYPE
	a[[:alph:]] &C ECTYPE
	a[[:alphabet:]] &C ECTYPE
	[[:alnum:]]+ - -%@a0X- a0X
	[[:alpha:]]+ - -%@aX0- aX
	[[:blank:]]+ - aSSTb SST
	[[:cntrl:]]+ - aNTb NT
	[[:digit:]]+ - a019b 019
	[[:graph:]]+ - Sa%bS a%b
	[[:lower:]]+ - AabC ab
	[[:print:]]+ - NaSbN aSb
	[[:punct:]]+ - S%-&T %-&
	[[:space:]]+ - aSNTb SNT
	[[:upper:]]+ - aBCd BC
	[[:xdigit:]]+ - p0f3Cq 0f3C
	a[[=b=]]c & abc abc
	a[[= &C EBRACK
	a[[=b &C EBRACK
	a[[=b= &C EBRACK
	a[[=b=] &C EBRACK
	a[[=b,=]] &C ECOLLATE
	# This test is invalid. "one" is no collating symbol in any standardized
	# locale.
	#a[[=one=]]b & a1b a1b

	# complexities
	a(((b)))c - abc abc
	a(b\|(c))d - abd abd
	a(b*\|c)d - abbd abbd
	# just gotta have one DFA-buster, of course
	a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
	# and an inline expansion in case somebody gets tricky
	a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
	# and in case somebody just slips in an NFA...
	a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee\|week)(knights\|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
	# fish for anomalies as the number of states passes 32
	12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
	123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
	1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
	12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
	123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
	# and one really big one, beyond any plausible word width
	1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
	# fish for problems as brackets go past 8
	[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
	[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
	[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
	[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq

	# subtleties of matching
	abc & xabcy abc
	a\(b\)?c\1d b acd
	aBc i Abc Abc
	a[Bc]*d i abBCcd abBCcd
	0[[:upper:]]1 &i 0a1 0a1
	0[[:lower:]]1 &i 0A1 0A1
	a[^b]c &i abc
	a[^b]c &i aBc
	a[^b]c &i adc adc
	[a]b[c] - abc abc
	[a]b[a] - aba aba
	[abc]b[abc] - abc abc
	[abc]b[abd] - abd abd
	a(b?c)+d - accd accd
	(wee\|week)(knights\|night) - weeknights weeknights
	(we\|wee\|week\|frob)(knights\|night\|day) - weeknights weeknights
	a[bc]d - xyzaaabcaababdacd abd
	a[ab]c - aaabc abc
	abc s abc abc
	() s abc @abc
	a* & b @b

	# Let's have some fun -- try to match a C comment.
	# first the obvious, which looks okay at first glance...
	/\.\/ - /x/ /x*/
	# but...
	/\.\/ - /x/y/z/ /x/y/z*/
	# okay, we must not match */ inside; try to do that...
	/\([^]\|\[^/])\/ - /x/ /x*/
	/\([^]\|\[^/])\/ - /x/y/z/ /x*/
	# but...
	/\([^]\|\[^/])\/ - /x*/y/z/ /x*/y/z*/
	# and a still fancier version, which does it right (I think)...
	/\([^]\|\+[^/])\+/ - /x/ /x/
	/\([^]\|\+[^/])\+/ - /x/y/z/ /x/
	/\([^]\|\+[^/])\+/ - /x/y/z/ /x**/
	/\([^]\|\+[^/])\+/ - /x**/y/z/ /x****/
	/\([^]\|\+[^/])\+/ - /xx/y/z/ /xx/
	/\([^]\|\+[^/])\+/ - /x*x/y/z/ /x**x/y/z*/

	# subexpressions
	.* - abc abc -
	a(b)(c)d - abcd abcd b,c
	a(((b)))c - abc abc b,b,b
	a(b\|(c))d - abd abd b,-
	a(b*\|c\|e)d - abbd abbd bb
	a(b*\|c\|e)d - acd acd c
	a(b*\|c\|e)d - ad ad @d
	a(b?)c - abc abc b
	a(b?)c - ac ac @c
	a(b+)c - abc abc b
	a(b+)c - abbbc abbbc bbb
	a(b*)c - ac ac @c
	(a\|ab)(bc([de]+)f\|cde) - abcdef abcdef a,bcdef,de
	# the regression tester only asks for 9 subexpressions
	a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
	a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k
	a([bc]?)c - abc abc b
	a([bc]?)c - ac ac @c
	a([bc]+)c - abc abc b
	a([bc]+)c - abcc abcc bc
	a([bc]+)bc - abcbc abcbc bc
	a(bb+\|b)b - abb abb b
	a(bbb+\|bb+\|b)b - abb abb b
	a(bbb+\|bb+\|b)b - abbb abbb bb
	a(bbb+\|bb+\|b)bb - abbb abbb b
	(.). - abcdef abcdef abcdef
	(a) - bc @b @b

	# do we get the right subexpression when it is used more than once?
	a(b\|c)*d - ad ad -
	a(b\|c)*d - abcd abcd c
	a(b\|c)+d - abd abd b
	a(b\|c)+d - abcd abcd c
	a(b\|c?)+d - ad ad @d
	a(b\|c?)+d - abcd abcd c
	a(b\|c){0,0}d - ad ad -
	a(b\|c){0,1}d - ad ad -
	a(b\|c){0,1}d - abd abd b
	a(b\|c){0,2}d - ad ad -
	a(b\|c){0,2}d - abcd abcd c
	a(b\|c){0,}d - ad ad -
	a(b\|c){0,}d - abcd abcd c
	a(b\|c){1,1}d - abd abd b
	a(b\|c){1,1}d - acd acd c
	a(b\|c){1,2}d - abd abd b
	a(b\|c){1,2}d - abcd abcd c
	a(b\|c){1,}d - abd abd b
	a(b\|c){1,}d - abcd abcd c
	a(b\|c){2,2}d - acbd acbd b
	a(b\|c){2,2}d - abcd abcd c
	a(b\|c){2,4}d - abcd abcd c
	a(b\|c){2,4}d - abcbd abcbd b
	a(b\|c){2,4}d - abcbcd abcbcd c
	a(b\|c){2,}d - abcd abcd c
	a(b\|c){2,}d - abcbd abcbd b
	a(b+\|((c)*))+d - abd abd b,-,-
	a(b+\|((c)*))+d - abcd abcd c,c,c

	# check out the STARTEND option
	[abc] &# a(b)c b
	[abc] &# a(d)c
	[abc] &# a(bc)d b
	[abc] &# a(dc)d c
	. &# a()c
	b.*c &# b(bc)c bc
	b.* &# b(bc)c bc
	.*c &# b(bc)c bc

	# plain strings, with the NOSPEC flag
	abc m abc abc
	abc m xabcy abc
	abc m xyz
	ab m abab a*b
	a*b m ab
	"" mC EMPTY

	# cases involving NULs
	aZb & a a
	aZb &p a
	aZb &p# (aZb) aZb
	aZ*b &p# (ab) ab
	a.b &# (aZb) aZb
	a.* &# (aZb)c aZb

	# word boundaries (ick)
	[[:<:]]a & a a
	[[:<:]]a & ba
	[[:<:]]a & -a a
	a[[:>:]] & a a
	a[[:>:]] & ab
	a[[:>:]] & a- a
	[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
	[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
	[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
	[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc
	[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_
	[[:<:]]a_b[[:>:]] & x_a_b

	# past problems, and suspected problems
	(A[1])\|(A[2])\|(A[3])\|(A[4])\|(A[5])\|(A[6])\|(A[7])\|(A[8])\|(A[9])\|(A[A]) - A1 A1
	abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
	abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
	(ALAK)\|(ALT[AB])\|(CC[123]1)\|(CM[123]1)\|(GAMC)\|(LC[23][EO ])\|(SEM[1234])\|(SL[ES][12])\|(SLWW)\|(SLF )\|(SLDT)\|(VWH[12])\|(WH[34][EW])\|(WP1[ESN]) - CC11 CC11
	CC[13]1\|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
	Char \([a-z0-9_]\)\[. b Char xyz[k Char xyz[k xyz
	a?b - ab ab
	-\{0,1\}[0-9]*$ b -5 -5
	aaaaaaa* & aaaaaa aaaaaa
	(\b){0} - x @x -
	\(\b\)\{0,0\} b abc @abc -
	a(\b){0}c - ac ac -
	a(.*)b(\1){0}c - abc abc @bc,-
	a(.*)b(\1){0}c - axbc axbc x,-

	a\(\(b*\)\)c\1d b abbcbbd abbcbbd bb,bb
	a\(\([bc]\)\)\2d b abcdabbd abbd b,b
	a\(\(\(\([bc]\)\)\3\)\)*d b abbccd abbccd cc,cc,c,c
	a(b)(c)d - abcd abcd b,c
	a(((b)))c - abc abc b,b,b
	a(((b\|(((c))))))d - abd abd b,b,b,-,-,-
	a(((b*\|c\|e)))d - abbd abbd bb,bb,bb
	a((b\|c)){0,0}d - ad ad -,-
	a((b\|c)){0,1}d - abd abd b,b
	a((b\|c)){0,2}d - abcd abcd c,c
	a((b+\|((c)*)))+d - abd abd b,b,-,-
	a((b+\|((c)*)))+d - abcd abcd c,c,c,c
	(((\b))){0} - x @x -,-,-
	a(((.*)))b((\2)){0}c - abc abc @bc,@bc,@bc,-,-
	a(((.*)))b((\1)){0}c - axbc axbc x,x,x,-,-

	\b & SaT @aT
	\b & aT @aT
	a.*\b & abT ab
	\b & STSS
	\B & abc @bc
	\B & aSbTc
	\B & SaT @SaT
	\B & aSTSb @TSb

	o$($\|.) - oN
	o$($\|.) - op
	o$($\|.) - o o