| Index: source/i18n/regexcmp.h |
| =================================================================== |
| --- source/i18n/regexcmp.h (revision 292476) |
| +++ source/i18n/regexcmp.h (working copy) |
| @@ -182,7 +182,9 @@ |
| int32_t fMatchOpenParen; // The position in the compiled pattern |
| // of the slot reserved for a state save |
| // at the start of the most recently processed |
| - // parenthesized block. |
| + // parenthesized block. Updated when processing |
| + // a close to the location for the corresponding open. |
| + |
| int32_t fMatchCloseParen; // The position in the pattern of the first |
| // location after the most recently processed |
| // parenthesized block. |
| Index: source/i18n/regexcmp.cpp |
| =================================================================== |
| --- source/i18n/regexcmp.cpp (revision 292476) |
| +++ source/i18n/regexcmp.cpp (working copy) |
| @@ -2133,6 +2133,10 @@ |
| int32_t patEnd = fRXPat->fCompiledPat->size() - 1; |
| int32_t minML = minMatchLength(fMatchOpenParen, patEnd); |
| int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); |
| + if (URX_TYPE(maxML) != 0) { |
| + error(U_REGEX_LOOK_BEHIND_LIMIT); |
| + break; |
| + } |
| if (maxML == INT32_MAX) { |
| error(U_REGEX_LOOK_BEHIND_LIMIT); |
| break; |
| @@ -2166,6 +2170,10 @@ |
| int32_t patEnd = fRXPat->fCompiledPat->size() - 1; |
| int32_t minML = minMatchLength(fMatchOpenParen, patEnd); |
| int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); |
| + if (URX_TYPE(maxML) != 0) { |
| + error(U_REGEX_LOOK_BEHIND_LIMIT); |
| + break; |
| + } |
| if (maxML == INT32_MAX) { |
| error(U_REGEX_LOOK_BEHIND_LIMIT); |
| break; |
| @@ -2329,7 +2337,15 @@ |
| int32_t topOfBlock = blockTopLoc(FALSE); |
| if (fIntervalUpper == 0) { |
| // Pathological case. Attempt no matches, as if the block doesn't exist. |
| + // Discard the generated code for the block. |
| + // If the block included parens, discard the info pertaining to them as well. |
| fRXPat->fCompiledPat->setSize(topOfBlock); |
| + if (fMatchOpenParen >= topOfBlock) { |
| + fMatchOpenParen = -1; |
| + } |
| + if (fMatchCloseParen >= topOfBlock) { |
| + fMatchCloseParen = -1; |
| + } |
| return TRUE; |
| } |
| |
| Index: source/test/testdata/regextst.txt |
| =================================================================== |
| --- source/test/testdata/regextst.txt (revision 292476) |
| +++ source/test/testdata/regextst.txt (working copy) |
| @@ -1173,6 +1173,24 @@ |
| "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. |
| |
| |
| +# Bug 11369 |
| +# Incorrect optimization of patterns with a zero length quantifier {0} |
| + |
| +"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" |
| +"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>" |
| +"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>" |
| +"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>" |
| +"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" |
| + |
| +# Bug 11370 |
| +# Max match length computation of look-behind expression gives result that is too big to fit in the |
| +# in the 24 bit operand portion of the compiled code. Expressions should fail to compile |
| +# (Look-behind match length must be bounded. This case is treated as unbounded, an error.) |
| + |
| +"(?<!(0123456789a){10000000})x" E "no match" |
| +"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" |
| + |
| + |
| # Random debugging, Temporary |
| # |
| #"^(?:a?b?)*$" "a--" |