import cl @42098
diff --git a/i18n/regexcmp.cpp b/i18n/regexcmp.cpp
index 2c84e3d..e857d26 100644
--- a/i18n/regexcmp.cpp
+++ b/i18n/regexcmp.cpp
@@ -1400,6 +1400,7 @@
case 0x64: /* 'd' */ bit = UREGEX_UNIX_LINES; break;
case 0x6d: /* 'm' */ bit = UREGEX_MULTILINE; break;
case 0x73: /* 's' */ bit = UREGEX_DOTALL; break;
+ case 0x75: /* 'u' */ bit = 0; /* Unicode casing */ break;
case 0x77: /* 'w' */ bit = UREGEX_UWORD; break;
case 0x78: /* 'x' */ bit = UREGEX_COMMENTS; break;
case 0x2d: /* '-' */ fSetModeFlag = FALSE; break;
diff --git a/i18n/regexcst.h b/i18n/regexcst.h
index 8c75310..ab43137 100644
--- a/i18n/regexcst.h
+++ b/i18n/regexcst.h
@@ -133,20 +133,20 @@
, {doPatStart, 255, 2,0, FALSE} // 1 start
, {doLiteralChar, 254, 14,0, TRUE} // 2 term
, {doLiteralChar, 129, 14,0, TRUE} // 3
- , {doSetBegin, 91 /* [ */, 102, 180, TRUE} // 4
+ , {doSetBegin, 91 /* [ */, 104, 182, TRUE} // 4
, {doNOP, 40 /* ( */, 27,0, TRUE} // 5
, {doDotAny, 46 /* . */, 14,0, TRUE} // 6
, {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
, {doDollar, 36 /* $ */, 14,0, TRUE} // 8
- , {doNOP, 92 /* \ */, 82,0, TRUE} // 9
+ , {doNOP, 92 /* \ */, 84,0, TRUE} // 9
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
, {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
, {doPatFinish, 253, 2,0, FALSE} // 12
- , {doRuleError, 255, 181,0, FALSE} // 13
- , {doNOP, 42 /* * */, 61,0, TRUE} // 14 expr-quant
- , {doNOP, 43 /* + */, 64,0, TRUE} // 15
- , {doNOP, 63 /* ? */, 67,0, TRUE} // 16
- , {doIntervalInit, 123 /* { */, 70,0, TRUE} // 17
+ , {doRuleError, 255, 183,0, FALSE} // 13
+ , {doNOP, 42 /* * */, 63,0, TRUE} // 14 expr-quant
+ , {doNOP, 43 /* + */, 66,0, TRUE} // 15
+ , {doNOP, 63 /* ? */, 69,0, TRUE} // 16
+ , {doIntervalInit, 123 /* { */, 72,0, TRUE} // 17
, {doNOP, 40 /* ( */, 23,0, TRUE} // 18
, {doNOP, 255, 20,0, FALSE} // 19
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
@@ -154,7 +154,7 @@
, {doNOP, 255, 2,0, FALSE} // 22
, {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
, {doNOP, 255, 27,0, FALSE} // 24
- , {doNOP, 35 /* # */, 48, 14, TRUE} // 25 open-paren-quant2
+ , {doNOP, 35 /* # */, 49, 14, TRUE} // 25 open-paren-quant2
, {doNOP, 255, 29,0, FALSE} // 26
, {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
, {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
@@ -162,155 +162,157 @@
, {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
, {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
, {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
- , {doNOP, 60 /* < */, 45,0, TRUE} // 33
- , {doNOP, 35 /* # */, 48, 2, TRUE} // 34
- , {doBeginMatchMode, 105 /* i */, 51,0, FALSE} // 35
- , {doBeginMatchMode, 100 /* d */, 51,0, FALSE} // 36
- , {doBeginMatchMode, 109 /* m */, 51,0, FALSE} // 37
- , {doBeginMatchMode, 115 /* s */, 51,0, FALSE} // 38
- , {doBeginMatchMode, 119 /* w */, 51,0, FALSE} // 39
- , {doBeginMatchMode, 120 /* x */, 51,0, FALSE} // 40
- , {doBeginMatchMode, 45 /* - */, 51,0, FALSE} // 41
- , {doConditionalExpr, 40 /* ( */, 181,0, TRUE} // 42
- , {doPerlInline, 123 /* { */, 181,0, TRUE} // 43
- , {doBadOpenParenType, 255, 181,0, FALSE} // 44
- , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 45 open-paren-lookbehind
- , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 46
- , {doBadOpenParenType, 255, 181,0, FALSE} // 47
- , {doNOP, 41 /* ) */, 255,0, TRUE} // 48 paren-comment
- , {doMismatchedParenErr, 253, 181,0, FALSE} // 49
- , {doNOP, 255, 48,0, TRUE} // 50
- , {doMatchMode, 105 /* i */, 51,0, TRUE} // 51 paren-flag
- , {doMatchMode, 100 /* d */, 51,0, TRUE} // 52
- , {doMatchMode, 109 /* m */, 51,0, TRUE} // 53
- , {doMatchMode, 115 /* s */, 51,0, TRUE} // 54
- , {doMatchMode, 119 /* w */, 51,0, TRUE} // 55
- , {doMatchMode, 120 /* x */, 51,0, TRUE} // 56
- , {doMatchMode, 45 /* - */, 51,0, TRUE} // 57
- , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 58
- , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 59
- , {doBadModeFlag, 255, 181,0, FALSE} // 60
- , {doNGStar, 63 /* ? */, 20,0, TRUE} // 61 quant-star
- , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 62
- , {doStar, 255, 20,0, FALSE} // 63
- , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 64 quant-plus
- , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 65
- , {doPlus, 255, 20,0, FALSE} // 66
- , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 67 quant-opt
- , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 68
- , {doOpt, 255, 20,0, FALSE} // 69
- , {doNOP, 128, 72,0, FALSE} // 70 interval-open
- , {doIntervalError, 255, 181,0, FALSE} // 71
- , {doIntevalLowerDigit, 128, 72,0, TRUE} // 72 interval-lower
- , {doNOP, 44 /* , */, 76,0, TRUE} // 73
- , {doIntervalSame, 125 /* } */, 79,0, TRUE} // 74
- , {doIntervalError, 255, 181,0, FALSE} // 75
- , {doIntervalUpperDigit, 128, 76,0, TRUE} // 76 interval-upper
- , {doNOP, 125 /* } */, 79,0, TRUE} // 77
- , {doIntervalError, 255, 181,0, FALSE} // 78
- , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 79 interval-type
- , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 80
- , {doInterval, 255, 20,0, FALSE} // 81
- , {doBackslashA, 65 /* A */, 2,0, TRUE} // 82 backslash
- , {doBackslashB, 66 /* B */, 2,0, TRUE} // 83
- , {doBackslashb, 98 /* b */, 2,0, TRUE} // 84
- , {doBackslashd, 100 /* d */, 14,0, TRUE} // 85
- , {doBackslashD, 68 /* D */, 14,0, TRUE} // 86
- , {doBackslashG, 71 /* G */, 2,0, TRUE} // 87
- , {doNamedChar, 78 /* N */, 14,0, FALSE} // 88
- , {doProperty, 112 /* p */, 14,0, FALSE} // 89
- , {doProperty, 80 /* P */, 14,0, FALSE} // 90
- , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 91
- , {doBackslashS, 83 /* S */, 14,0, TRUE} // 92
- , {doBackslashs, 115 /* s */, 14,0, TRUE} // 93
- , {doBackslashW, 87 /* W */, 14,0, TRUE} // 94
- , {doBackslashw, 119 /* w */, 14,0, TRUE} // 95
- , {doBackslashX, 88 /* X */, 14,0, TRUE} // 96
- , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 97
- , {doBackslashz, 122 /* z */, 2,0, TRUE} // 98
- , {doBackRef, 128, 14,0, TRUE} // 99
- , {doEscapeError, 253, 181,0, FALSE} // 100
- , {doEscapedLiteralChar, 255, 14,0, TRUE} // 101
- , {doSetNegate, 94 /* ^ */, 105,0, TRUE} // 102 set-open
- , {doSetPosixProp, 58 /* : */, 107,0, FALSE} // 103
- , {doNOP, 255, 105,0, FALSE} // 104
- , {doSetLiteral, 93 /* ] */, 120,0, TRUE} // 105 set-open2
- , {doNOP, 255, 110,0, FALSE} // 106
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 107 set-posix
- , {doNOP, 58 /* : */, 110,0, FALSE} // 108
- , {doRuleError, 255, 181,0, FALSE} // 109
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 110 set-start
- , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE} // 111
- , {doNOP, 92 /* \ */, 170,0, TRUE} // 112
- , {doNOP, 45 /* - */, 116,0, TRUE} // 113
- , {doNOP, 38 /* & */, 118,0, TRUE} // 114
- , {doSetLiteral, 255, 120,0, TRUE} // 115
- , {doRuleError, 45 /* - */, 181,0, FALSE} // 116 set-start-dash
- , {doSetAddDash, 255, 120,0, FALSE} // 117
- , {doRuleError, 38 /* & */, 181,0, FALSE} // 118 set-start-amp
- , {doSetAddAmp, 255, 120,0, FALSE} // 119
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 120 set-after-lit
- , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE} // 121
- , {doNOP, 45 /* - */, 157,0, TRUE} // 122
- , {doNOP, 38 /* & */, 148,0, TRUE} // 123
- , {doNOP, 92 /* \ */, 170,0, TRUE} // 124
- , {doSetNoCloseError, 253, 181,0, FALSE} // 125
- , {doSetLiteral, 255, 120,0, TRUE} // 126
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 127 set-after-set
- , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE} // 128
- , {doNOP, 45 /* - */, 150,0, TRUE} // 129
- , {doNOP, 38 /* & */, 145,0, TRUE} // 130
- , {doNOP, 92 /* \ */, 170,0, TRUE} // 131
- , {doSetNoCloseError, 253, 181,0, FALSE} // 132
- , {doSetLiteral, 255, 120,0, TRUE} // 133
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 134 set-after-range
- , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE} // 135
- , {doNOP, 45 /* - */, 153,0, TRUE} // 136
- , {doNOP, 38 /* & */, 155,0, TRUE} // 137
- , {doNOP, 92 /* \ */, 170,0, TRUE} // 138
- , {doSetNoCloseError, 253, 181,0, FALSE} // 139
- , {doSetLiteral, 255, 120,0, TRUE} // 140
- , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE} // 141 set-after-op
- , {doSetOpError, 93 /* ] */, 181,0, FALSE} // 142
- , {doNOP, 92 /* \ */, 170,0, TRUE} // 143
- , {doSetLiteral, 255, 120,0, TRUE} // 144
- , {doSetBeginIntersection1, 91 /* [ */, 102, 127, TRUE} // 145 set-set-amp
- , {doSetIntersection2, 38 /* & */, 141,0, TRUE} // 146
- , {doSetAddAmp, 255, 120,0, FALSE} // 147
- , {doSetIntersection2, 38 /* & */, 141,0, TRUE} // 148 set-lit-amp
- , {doSetAddAmp, 255, 120,0, FALSE} // 149
- , {doSetBeginDifference1, 91 /* [ */, 102, 127, TRUE} // 150 set-set-dash
- , {doSetDifference2, 45 /* - */, 141,0, TRUE} // 151
- , {doSetAddDash, 255, 120,0, FALSE} // 152
- , {doSetDifference2, 45 /* - */, 141,0, TRUE} // 153 set-range-dash
- , {doSetAddDash, 255, 120,0, FALSE} // 154
- , {doSetIntersection2, 38 /* & */, 141,0, TRUE} // 155 set-range-amp
- , {doSetAddAmp, 255, 120,0, FALSE} // 156
- , {doSetDifference2, 45 /* - */, 141,0, TRUE} // 157 set-lit-dash
- , {doSetAddDash, 91 /* [ */, 120,0, FALSE} // 158
- , {doSetAddDash, 93 /* ] */, 120,0, FALSE} // 159
- , {doNOP, 92 /* \ */, 162,0, TRUE} // 160
- , {doSetRange, 255, 134,0, TRUE} // 161
- , {doSetOpError, 115 /* s */, 181,0, FALSE} // 162 set-lit-dash-escape
- , {doSetOpError, 83 /* S */, 181,0, FALSE} // 163
- , {doSetOpError, 119 /* w */, 181,0, FALSE} // 164
- , {doSetOpError, 87 /* W */, 181,0, FALSE} // 165
- , {doSetOpError, 100 /* d */, 181,0, FALSE} // 166
- , {doSetOpError, 68 /* D */, 181,0, FALSE} // 167
- , {doSetNamedRange, 78 /* N */, 134,0, FALSE} // 168
- , {doSetRange, 255, 134,0, TRUE} // 169
- , {doSetProp, 112 /* p */, 127,0, FALSE} // 170 set-escape
- , {doSetProp, 80 /* P */, 127,0, FALSE} // 171
- , {doSetNamedChar, 78 /* N */, 120,0, FALSE} // 172
- , {doSetBackslash_s, 115 /* s */, 134,0, TRUE} // 173
- , {doSetBackslash_S, 83 /* S */, 134,0, TRUE} // 174
- , {doSetBackslash_w, 119 /* w */, 134,0, TRUE} // 175
- , {doSetBackslash_W, 87 /* W */, 134,0, TRUE} // 176
- , {doSetBackslash_d, 100 /* d */, 134,0, TRUE} // 177
- , {doSetBackslash_D, 68 /* D */, 134,0, TRUE} // 178
- , {doSetLiteralEscaped, 255, 120,0, TRUE} // 179
- , {doSetFinish, 255, 14,0, FALSE} // 180 set-finish
- , {doExit, 255, 181,0, TRUE} // 181 errorDeath
+ , {doNOP, 60 /* < */, 46,0, TRUE} // 33
+ , {doNOP, 35 /* # */, 49, 2, TRUE} // 34
+ , {doBeginMatchMode, 105 /* i */, 52,0, FALSE} // 35
+ , {doBeginMatchMode, 100 /* d */, 52,0, FALSE} // 36
+ , {doBeginMatchMode, 109 /* m */, 52,0, FALSE} // 37
+ , {doBeginMatchMode, 115 /* s */, 52,0, FALSE} // 38
+ , {doBeginMatchMode, 117 /* u */, 52,0, FALSE} // 39
+ , {doBeginMatchMode, 119 /* w */, 52,0, FALSE} // 40
+ , {doBeginMatchMode, 120 /* x */, 52,0, FALSE} // 41
+ , {doBeginMatchMode, 45 /* - */, 52,0, FALSE} // 42
+ , {doConditionalExpr, 40 /* ( */, 183,0, TRUE} // 43
+ , {doPerlInline, 123 /* { */, 183,0, TRUE} // 44
+ , {doBadOpenParenType, 255, 183,0, FALSE} // 45
+ , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
+ , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
+ , {doBadOpenParenType, 255, 183,0, FALSE} // 48
+ , {doNOP, 41 /* ) */, 255,0, TRUE} // 49 paren-comment
+ , {doMismatchedParenErr, 253, 183,0, FALSE} // 50
+ , {doNOP, 255, 49,0, TRUE} // 51
+ , {doMatchMode, 105 /* i */, 52,0, TRUE} // 52 paren-flag
+ , {doMatchMode, 100 /* d */, 52,0, TRUE} // 53
+ , {doMatchMode, 109 /* m */, 52,0, TRUE} // 54
+ , {doMatchMode, 115 /* s */, 52,0, TRUE} // 55
+ , {doMatchMode, 117 /* u */, 52,0, TRUE} // 56
+ , {doMatchMode, 119 /* w */, 52,0, TRUE} // 57
+ , {doMatchMode, 120 /* x */, 52,0, TRUE} // 58
+ , {doMatchMode, 45 /* - */, 52,0, TRUE} // 59
+ , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 60
+ , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 61
+ , {doBadModeFlag, 255, 183,0, FALSE} // 62
+ , {doNGStar, 63 /* ? */, 20,0, TRUE} // 63 quant-star
+ , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 64
+ , {doStar, 255, 20,0, FALSE} // 65
+ , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 66 quant-plus
+ , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 67
+ , {doPlus, 255, 20,0, FALSE} // 68
+ , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 69 quant-opt
+ , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 70
+ , {doOpt, 255, 20,0, FALSE} // 71
+ , {doNOP, 128, 74,0, FALSE} // 72 interval-open
+ , {doIntervalError, 255, 183,0, FALSE} // 73
+ , {doIntevalLowerDigit, 128, 74,0, TRUE} // 74 interval-lower
+ , {doNOP, 44 /* , */, 78,0, TRUE} // 75
+ , {doIntervalSame, 125 /* } */, 81,0, TRUE} // 76
+ , {doIntervalError, 255, 183,0, FALSE} // 77
+ , {doIntervalUpperDigit, 128, 78,0, TRUE} // 78 interval-upper
+ , {doNOP, 125 /* } */, 81,0, TRUE} // 79
+ , {doIntervalError, 255, 183,0, FALSE} // 80
+ , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 81 interval-type
+ , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 82
+ , {doInterval, 255, 20,0, FALSE} // 83
+ , {doBackslashA, 65 /* A */, 2,0, TRUE} // 84 backslash
+ , {doBackslashB, 66 /* B */, 2,0, TRUE} // 85
+ , {doBackslashb, 98 /* b */, 2,0, TRUE} // 86
+ , {doBackslashd, 100 /* d */, 14,0, TRUE} // 87
+ , {doBackslashD, 68 /* D */, 14,0, TRUE} // 88
+ , {doBackslashG, 71 /* G */, 2,0, TRUE} // 89
+ , {doNamedChar, 78 /* N */, 14,0, FALSE} // 90
+ , {doProperty, 112 /* p */, 14,0, FALSE} // 91
+ , {doProperty, 80 /* P */, 14,0, FALSE} // 92
+ , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 93
+ , {doBackslashS, 83 /* S */, 14,0, TRUE} // 94
+ , {doBackslashs, 115 /* s */, 14,0, TRUE} // 95
+ , {doBackslashW, 87 /* W */, 14,0, TRUE} // 96
+ , {doBackslashw, 119 /* w */, 14,0, TRUE} // 97
+ , {doBackslashX, 88 /* X */, 14,0, TRUE} // 98
+ , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 99
+ , {doBackslashz, 122 /* z */, 2,0, TRUE} // 100
+ , {doBackRef, 128, 14,0, TRUE} // 101
+ , {doEscapeError, 253, 183,0, FALSE} // 102
+ , {doEscapedLiteralChar, 255, 14,0, TRUE} // 103
+ , {doSetNegate, 94 /* ^ */, 107,0, TRUE} // 104 set-open
+ , {doSetPosixProp, 58 /* : */, 109,0, FALSE} // 105
+ , {doNOP, 255, 107,0, FALSE} // 106
+ , {doSetLiteral, 93 /* ] */, 122,0, TRUE} // 107 set-open2
+ , {doNOP, 255, 112,0, FALSE} // 108
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 109 set-posix
+ , {doNOP, 58 /* : */, 112,0, FALSE} // 110
+ , {doRuleError, 255, 183,0, FALSE} // 111
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 112 set-start
+ , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 113
+ , {doNOP, 92 /* \ */, 172,0, TRUE} // 114
+ , {doNOP, 45 /* - */, 118,0, TRUE} // 115
+ , {doNOP, 38 /* & */, 120,0, TRUE} // 116
+ , {doSetLiteral, 255, 122,0, TRUE} // 117
+ , {doRuleError, 45 /* - */, 183,0, FALSE} // 118 set-start-dash
+ , {doSetAddDash, 255, 122,0, FALSE} // 119
+ , {doRuleError, 38 /* & */, 183,0, FALSE} // 120 set-start-amp
+ , {doSetAddAmp, 255, 122,0, FALSE} // 121
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 122 set-after-lit
+ , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 123
+ , {doNOP, 45 /* - */, 159,0, TRUE} // 124
+ , {doNOP, 38 /* & */, 150,0, TRUE} // 125
+ , {doNOP, 92 /* \ */, 172,0, TRUE} // 126
+ , {doSetNoCloseError, 253, 183,0, FALSE} // 127
+ , {doSetLiteral, 255, 122,0, TRUE} // 128
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 129 set-after-set
+ , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 130
+ , {doNOP, 45 /* - */, 152,0, TRUE} // 131
+ , {doNOP, 38 /* & */, 147,0, TRUE} // 132
+ , {doNOP, 92 /* \ */, 172,0, TRUE} // 133
+ , {doSetNoCloseError, 253, 183,0, FALSE} // 134
+ , {doSetLiteral, 255, 122,0, TRUE} // 135
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 136 set-after-range
+ , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 137
+ , {doNOP, 45 /* - */, 155,0, TRUE} // 138
+ , {doNOP, 38 /* & */, 157,0, TRUE} // 139
+ , {doNOP, 92 /* \ */, 172,0, TRUE} // 140
+ , {doSetNoCloseError, 253, 183,0, FALSE} // 141
+ , {doSetLiteral, 255, 122,0, TRUE} // 142
+ , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 143 set-after-op
+ , {doSetOpError, 93 /* ] */, 183,0, FALSE} // 144
+ , {doNOP, 92 /* \ */, 172,0, TRUE} // 145
+ , {doSetLiteral, 255, 122,0, TRUE} // 146
+ , {doSetBeginIntersection1, 91 /* [ */, 104, 129, TRUE} // 147 set-set-amp
+ , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 148
+ , {doSetAddAmp, 255, 122,0, FALSE} // 149
+ , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 150 set-lit-amp
+ , {doSetAddAmp, 255, 122,0, FALSE} // 151
+ , {doSetBeginDifference1, 91 /* [ */, 104, 129, TRUE} // 152 set-set-dash
+ , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 153
+ , {doSetAddDash, 255, 122,0, FALSE} // 154
+ , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 155 set-range-dash
+ , {doSetAddDash, 255, 122,0, FALSE} // 156
+ , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 157 set-range-amp
+ , {doSetAddAmp, 255, 122,0, FALSE} // 158
+ , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 159 set-lit-dash
+ , {doSetAddDash, 91 /* [ */, 122,0, FALSE} // 160
+ , {doSetAddDash, 93 /* ] */, 122,0, FALSE} // 161
+ , {doNOP, 92 /* \ */, 164,0, TRUE} // 162
+ , {doSetRange, 255, 136,0, TRUE} // 163
+ , {doSetOpError, 115 /* s */, 183,0, FALSE} // 164 set-lit-dash-escape
+ , {doSetOpError, 83 /* S */, 183,0, FALSE} // 165
+ , {doSetOpError, 119 /* w */, 183,0, FALSE} // 166
+ , {doSetOpError, 87 /* W */, 183,0, FALSE} // 167
+ , {doSetOpError, 100 /* d */, 183,0, FALSE} // 168
+ , {doSetOpError, 68 /* D */, 183,0, FALSE} // 169
+ , {doSetNamedRange, 78 /* N */, 136,0, FALSE} // 170
+ , {doSetRange, 255, 136,0, TRUE} // 171
+ , {doSetProp, 112 /* p */, 129,0, FALSE} // 172 set-escape
+ , {doSetProp, 80 /* P */, 129,0, FALSE} // 173
+ , {doSetNamedChar, 78 /* N */, 122,0, FALSE} // 174
+ , {doSetBackslash_s, 115 /* s */, 136,0, TRUE} // 175
+ , {doSetBackslash_S, 83 /* S */, 136,0, TRUE} // 176
+ , {doSetBackslash_w, 119 /* w */, 136,0, TRUE} // 177
+ , {doSetBackslash_W, 87 /* W */, 136,0, TRUE} // 178
+ , {doSetBackslash_d, 100 /* d */, 136,0, TRUE} // 179
+ , {doSetBackslash_D, 68 /* D */, 136,0, TRUE} // 180
+ , {doSetLiteralEscaped, 255, 122,0, TRUE} // 181
+ , {doSetFinish, 255, 14,0, FALSE} // 182 set-finish
+ , {doExit, 255, 183,0, TRUE} // 183 errorDeath
};
static const char * const RegexStateNames[] = { 0,
"start",
@@ -357,6 +359,7 @@
0,
0,
0,
+ 0,
"open-paren-lookbehind",
0,
0,
@@ -373,6 +376,7 @@
0,
0,
0,
+ 0,
"quant-star",
0,
0,
diff --git a/i18n/regexcst.txt b/i18n/regexcst.txt
index 304ac57..8ddfa99 100644
--- a/i18n/regexcst.txt
+++ b/i18n/regexcst.txt
@@ -136,6 +136,7 @@
'd' paren-flag doBeginMatchMode
'm' paren-flag doBeginMatchMode
's' paren-flag doBeginMatchMode
+ 'u' paren-flag doBeginMatchMode
'w' paren-flag doBeginMatchMode
'x' paren-flag doBeginMatchMode
'-' paren-flag doBeginMatchMode
@@ -165,6 +166,7 @@
'd' n paren-flag doMatchMode
'm' n paren-flag doMatchMode
's' n paren-flag doMatchMode
+ 'u' n paren-flag doMatchMode
'w' n paren-flag doMatchMode
'x' n paren-flag doMatchMode
'-' n paren-flag doMatchMode
diff --git a/i18n/rematch.cpp b/i18n/rematch.cpp
index 9439e8a..c3d7132 100644
--- a/i18n/rematch.cpp
+++ b/i18n/rematch.cpp
@@ -1712,8 +1712,10 @@
case URX_BACKSLASH_Z: // Test for end of Input
if (fp->fInputIdx < fAnchorLimit) {
- fHitEnd = TRUE;
fp = (REStackFrame *)fStack->popFrame(frameSize);
+ } else {
+ fHitEnd = TRUE;
+ fRequireEnd = TRUE;
}
break;
diff --git a/test/testdata/regextst.txt b/test/testdata/regextst.txt
index 01a867e..8da1091 100644
--- a/test/testdata/regextst.txt
+++ b/test/testdata/regextst.txt
@@ -42,7 +42,7 @@
# a <r>region</r> has been specified in the string.
# z|Z hitEnd was expected(z) or not expected (Z).
# With neither, hitEnd is not checked.
-# y|Y Require End expeted(y) or not expected (Y).
+# y|Y Require End expected(y) or not expected (Y).
#
# White space must be present between the flags and the match string.
#
@@ -62,6 +62,16 @@
".(?!\p{L})" "abc<0>d</0> " # Negated look-ahead
".(?!(\p{L}))" "abc<0>d</0> " # Negated look-ahead, no capture
# visible outside of look-ahead
+"and(?=roid)" L "<0>and</0>roid"
+"and(?=roid)" M "<r>and</r>roid"
+"and(?=roid)" bM "<r><0>and</0></r>roid"
+
+"and(?!roid)" L "<0>and</0>roix"
+"and(?!roid)" L "android"
+
+"and(?!roid)" M "<r><0>and</0></r>roid" # Opaque bounds
+"and(?!roid)" bM "<r>and</r>roid"
+"and(?!roid)" bM "<r><0>and</0></r>roix"
#
# Negated Lookahead, various regions and region transparency
@@ -255,20 +265,20 @@
".*^(Hello)" " Hello Hello Hello Hello Goodbye"# No Match
# $ matches only at end of line, or before a newline preceding the end of line
-".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
-".*?(Goodbye)" "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
-".*?(Goodbye)$" "Hello Goodbye> Goodbye Goodbye "# No Match
+".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)" ZY "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
+".*?(Goodbye)$" z "Hello Goodbye> Goodbye Goodbye "# No Match
-".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
-".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
-".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\r\n"
-".*?(Goodbye)$" "Hello Goodbye Goodbye Goodbye\n\n"# No Match
+".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\r\n"
+".*?(Goodbye)$" z "Hello Goodbye Goodbye Goodbye\n\n"# No Match
# \Z matches at end of input, like $ with default flags.
-".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
-".*?(Goodbye)" "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
-".*?(Goodbye)\Z" "Hello Goodbye> Goodbye Goodbye "# No Match
-"here$" "here\nthe end"# No Match
+".*?(Goodbye)\Z" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)" ZY "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
+".*?(Goodbye)\Z" z "Hello Goodbye> Goodbye Goodbye "# No Match
+"here$" z "here\nthe end"# No Match
".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
@@ -278,12 +288,13 @@
# \z matches only at the end of string.
# no special treatment of new lines.
# no dependencies on flag settings.
-".*?(Goodbye)\z" "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
-".*?(Goodbye)\z" "Hello Goodbye Goodbye Goodbye "# No Match
-"here$" "here\nthe end"# No Match
+".*?(Goodbye)\z" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)\z" z "Hello Goodbye Goodbye Goodbye "# No Match
+"here$" z "here\nthe end"# No Match
-".*?(Goodbye)\z" "Hello Goodbye Goodbye Goodbye\n"# No Match
-".*?(Goodbye)\n\z" "<0>Hello Goodbye Goodbye <1>Goodbye</1>\n</0>"
+".*?(Goodbye)\z" z "Hello Goodbye Goodbye Goodbye\n"# No Match
+".*?(Goodbye)\n\z" zy "<0>Hello Goodbye Goodbye <1>Goodbye</1>\n</0>"
+"abc\z|def" ZY "abc<0>def</0>"
# (?# comment) doesn't muck up pattern
"Hello (?# this is a comment) world" " <0>Hello world</0>..."
@@ -447,6 +458,12 @@
"ab c" x "ab c "
"ab\ c" x "<0>ab c</0> "
+#
+# Pattern Flags
+#
+"(?u)abc" "<0>abc</0>"
+"(?-u)abc" "<0>abc</0>"
+
#Multi-line mode
'b\s^' m "a\nb\n"
@@ -556,6 +573,7 @@
"\0776" "<0>\u003f\u0036</0>" # overflow, the 6 is literal.
"\0376xyz" "<0>\u00fexyz</0>"
"\08" E "<0>\u00008</0>"
+"\0" E "x"
#
# \u Surrogate Pairs
@@ -573,6 +591,18 @@
"abc" 3z "aa>abc abcab"
#
+# Bug xxxx
+#
+"(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61.789+71:81</0>"
+
+
+#
+# A random complex, meaningless pattern that should at least compile
+#
+"(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\004|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V[|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\]\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\037\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07|\e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\0267\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\02-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\0114\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0></0>abc"
+
+
+#
# Bug 3225
"1|9" "<0>1</0>"