import cl @41939

commit: a733874a9800264e7b41d880049ac4bf9d7f405f [log] [tgz]
author: Jean-Baptiste Queru <jbq@google.com> Fri Jul 17 17:41:21 2009 -0700
committer: Jean-Baptiste Queru <jbq@google.com> Fri Jul 17 17:41:21 2009 -0700
tree: 441afd6147fa76fab98f166726c0f4f192081ca8
parent: a09443ea25f9d6e6cfae4b8e438c7a6ced388a06 [diff]
diff --git a/i18n/regexcmp.cpp b/i18n/regexcmp.cpp
index 09da39c..2c84e3d 100644
--- a/i18n/regexcmp.cpp
+++ b/i18n/regexcmp.cpp

@@ -313,7 +313,6 @@
     // Optimization passes
     //
     matchStartType();
-    OptDotStar();
     stripNOPs();
 
     //
@@ -515,14 +514,29 @@
 
     case doOpenLookAhead:
         // Positive Look-ahead   (?=  stuff  )
+        //
+        //   Note:   Addition of transparent input regions, with the need to
+        //           restore the original regions when failing out of a lookahead
+        //           block, complicated this sequence.  Some conbined opcodes
+        //           might make sense - or might not, lookahead aren't that common.
+        //
+        //      Caution:  min match length optimization knows about this
+        //               sequence; don't change without making updates there too.
+        //
         // Compiles to
-        //    1    START_LA     dataLoc
-        //    2.   NOP              reserved for use by quantifiers on the block.
+        //    1    START_LA     dataLoc     Saves SP, Input Pos
+        //    2.   STATE_SAVE   4            on failure of lookahead, goto 4
+        //    3    JMP          6           continue ...
+        //
+        //    4.   LA_END                   Look Ahead failed.  Restore regions.
+        //    5.   BACKTRACK                and back track again.
+        //
+        //    6.   NOP              reserved for use by quantifiers on the block.
         //                          Look-ahead can't have quantifiers, but paren stack
         //                             compile time conventions require the slot anyhow.
-        //    3.   NOP              may be replaced if there is are '|' ops in the block.
-        //    4.     code for parenthesized stuff.
-        //    5.   ENDLA
+        //    7.   NOP              may be replaced if there is are '|' ops in the block.
+        //    8.     code for parenthesized stuff.
+        //    9.   LA_END
         //
         //  Two data slots are reserved, for saving the stack ptr and the input position.
         {
@@ -531,6 +545,18 @@
             int32_t op = URX_BUILD(URX_LA_START, dataLoc);
             fRXPat->fCompiledPat->addElement(op, *fStatus);
 
+            op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+
+            op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+            
+            op = URX_BUILD(URX_LA_END, dataLoc);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+
+            op = URX_BUILD(URX_BACKTRACK, 0);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+            
             op = URX_BUILD(URX_NOP, 0);
             fRXPat->fCompiledPat->addElement(op, *fStatus);
             fRXPat->fCompiledPat->addElement(op, *fStatus);
@@ -554,7 +580,8 @@
         //    4.       code for parenthesized stuff.
         //    5.    END_LA                // Cut back stack, remove saved state from step 2.
         //    6.    FAIL                  // code in block succeeded, so neg. lookahead fails.
-        //    7.    ...
+        //    7.    END_LA                // Restore match region, in case look-ahead was using
+        //                                        an alternate (transparent) region.
         {
             int32_t dataLoc = fRXPat->fDataSize;
             fRXPat->fDataSize += 2;
@@ -570,7 +597,7 @@
             // On the Parentheses stack, start a new frame and add the postions
             //   of the StateSave and NOP.
             fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
-            fParenStack.push( negLookAhead, *fStatus);                    // Frame type
+            fParenStack.push(negLookAhead, *fStatus);                    // Frame type
             fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The STATE_SAVE location
             fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP location
 
@@ -750,13 +777,17 @@
                 }
 
                 if (URX_TYPE(repeatedOp) == URX_DOTANY ||
-                    URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
+                    URX_TYPE(repeatedOp) == URX_DOTANY_ALL ||
+                    URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
                     // Emit Optimized code for .+ operations.
                     int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0);
                     if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
-                        // URX_LOOP_DOT_I operand is a flag indicating . matches any mode.
+                        // URX_LOOP_DOT_I operand is a flag indicating ". matches any" mode.
                         loopOpI |= 1;
                     }
+                    if (fModeFlags & UREGEX_UNIX_LINES) {
+                        loopOpI |= 2;
+                    }
                     fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
                     frameLoc = fRXPat->fFrameSize;
                     fRXPat->fFrameSize++;
@@ -889,13 +920,17 @@
                 }
 
                 if (URX_TYPE(repeatedOp) == URX_DOTANY ||
-                    URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
+                    URX_TYPE(repeatedOp) == URX_DOTANY_ALL ||
+                    URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
                     // Emit Optimized code for .* operations.
                     int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0);
                     if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
                         // URX_LOOP_DOT_I operand is a flag indicating . matches any mode.
                         loopOpI |= 1;
                     }
+                    if ((fModeFlags & UREGEX_UNIX_LINES) != 0) {
+                        loopOpI |= 2;
+                    }
                     fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
                     dataLoc = fRXPat->fFrameSize;
                     fRXPat->fFrameSize++;
@@ -1068,6 +1103,8 @@
             int32_t   op;
             if (fModeFlags & UREGEX_DOTALL) {
                 op = URX_BUILD(URX_DOTANY_ALL, 0);
+            } else if (fModeFlags & UREGEX_UNIX_LINES) {
+                op = URX_BUILD(URX_DOTANY_UNIX, 0);
             } else {
                 op = URX_BUILD(URX_DOTANY, 0);
             }
@@ -1077,15 +1114,35 @@
 
     case doCaret:
         {
-            int32_t op = (fModeFlags & UREGEX_MULTILINE)? URX_CARET_M : URX_CARET;
+            int32_t op;
+            if (       (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
+                op = URX_CARET;
+            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
+                op = URX_CARET_M;
+            } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
+                op = URX_CARET;   // Only testing true start of input. 
+            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
+                op = URX_CARET_M_UNIX;
+            }
+            if (fModeFlags & UREGEX_MULTILINE) {
+                op = (fModeFlags & UREGEX_UNIX_LINES)? URX_CARET_M_UNIX : URX_CARET_M;
+            }
             fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
         }
         break;
 
-
     case doDollar:
         {
-            int32_t op = (fModeFlags & UREGEX_MULTILINE)? URX_DOLLAR_M : URX_DOLLAR;
+            int32_t op;
+            if (       (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
+                op = URX_DOLLAR;
+            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
+                op = URX_DOLLAR_M;
+            } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
+                op = URX_DOLLAR_D;
+            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
+                op = URX_DOLLAR_MD;
+            }
             fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
         }
         break;
@@ -1340,6 +1397,7 @@
             int32_t  bit = 0;
             switch (fC.fChar) {
             case 0x69: /* 'i' */   bit = UREGEX_CASE_INSENSITIVE; break;
+            case 0x64: /* 'd' */   bit = UREGEX_UNIX_LINES;       break;
             case 0x6d: /* 'm' */   bit = UREGEX_MULTILINE;        break;
             case 0x73: /* 's' */   bit = UREGEX_DOTALL;           break;
             case 0x77: /* 'w' */   bit = UREGEX_UWORD;            break;
@@ -2062,7 +2120,7 @@
 
     case lookAhead:
         {
-            int32_t  startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
+            int32_t  startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
             U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
             int32_t dataLoc  = URX_VAL(startOp);
             int32_t op       = URX_BUILD(URX_LA_END, dataLoc);
@@ -2078,13 +2136,16 @@
             int32_t dataLoc  = URX_VAL(startOp);
             int32_t op       = URX_BUILD(URX_LA_END, dataLoc);
             fRXPat->fCompiledPat->addElement(op, *fStatus);
-             op              = URX_BUILD(URX_FAIL, 0);
+            op               = URX_BUILD(URX_BACKTRACK, 0);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+            op               = URX_BUILD(URX_LA_END, 0);
             fRXPat->fCompiledPat->addElement(op, *fStatus);
 
             // Patch the URX_SAVE near the top of the block.
+            // The destination of the SAVE is the final LA_END that was just added.
             int32_t saveOp   = fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
             U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
-            int32_t dest     = fRXPat->fCompiledPat->size();
+            int32_t dest     = fRXPat->fCompiledPat->size()-1;
             saveOp           = URX_BUILD(URX_STATE_SAVE, dest);
             fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen);
         }
@@ -2392,7 +2453,6 @@
         // If the op we are now at was the destination of a branch in the pattern,
         // and that path has a shorter minimum length than the current accumulated value,
         // replace the current accumulated value.
-        U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
         if (forwardedLength.elementAti(loc) < currentLen) {
             currentLen = forwardedLength.elementAti(loc);
             U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
@@ -2402,6 +2462,7 @@
             // Ops that don't change the total length matched
         case URX_RESERVED_OP:
         case URX_END:
+        case URX_FAIL:
         case URX_STRING_LEN:
         case URX_NOP:
         case URX_START_CAPTURE:
@@ -2411,10 +2472,11 @@
         case URX_BACKSLASH_G:
         case URX_BACKSLASH_Z:
         case URX_DOLLAR:
+        case URX_DOLLAR_M:
+        case URX_DOLLAR_D:
+        case URX_DOLLAR_MD:
         case URX_RELOC_OPRND:
         case URX_STO_INP_LOC:
-        case URX_DOLLAR_M:
-        case URX_BACKTRACK:
         case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
         case URX_BACKREF_I:
 
@@ -2429,6 +2491,7 @@
             break;
 
         case URX_CARET_M:
+        case URX_CARET_M_UNIX:
             if (atStart) {
                 fRXPat->fStartType = START_LINE;
             }
@@ -2553,8 +2616,7 @@
         case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
         case URX_DOTANY_ALL:    // . matches one or two.
         case URX_DOTANY:
-        case URX_DOTANY_ALL_PL:
-        case URX_DOTANY_PL:
+        case URX_DOTANY_UNIX:
             if (currentLen == 0) {
                 // These constructs are all bad news when they appear at the start
                 //   of a match.  Any character can begin the match.
@@ -2595,7 +2657,7 @@
             atStart = FALSE;
             break;
 
-        case URX_FAIL:
+        case URX_BACKTRACK:
             // Fails are kind of like a branch, except that the min length was
             //   propagated already, by the state save.
             currentLen = forwardedLength.elementAti(loc+1);
@@ -2718,18 +2780,25 @@
             {
                 // Look-around.  Scan forward until the matching look-ahead end,
                 //   without processing the look-around block.  This is overly pessimistic.
-                int32_t  depth = 0;
+                
+                // Keep track of the nesting depth of look-around blocks.  Boilerplate code for
+                //   lookahead contains two LA_END instructions, so count goes up by two
+                //   for each LA_START.
+                int32_t  depth = (opType == URX_LA_START? 2: 1);
                 for (;;) {
                     loc++;
                     op = fRXPat->fCompiledPat->elementAti(loc);
-                    if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
+                    if (URX_TYPE(op) == URX_LA_START) {
+                        depth+=2;
+                    }
+                    if (URX_TYPE(op) == URX_LB_START) {
                         depth++;
                     }
                     if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) {
+                        depth--;
                         if (depth == 0) {
                             break;
                         }
-                        depth--;
                     }
                     if (URX_TYPE(op) == URX_STATE_SAVE) {
                         // Need this because neg lookahead blocks will FAIL to outside
@@ -2863,7 +2932,8 @@
         // If the op we are now at was the destination of a branch in the pattern,
         // and that path has a shorter minimum length than the current accumulated value,
         // replace the current accumulated value.
-        U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
+        // U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);  // MinLength == INT32_MAX for some
+                                                               //   no-match-possible cases.
         if (forwardedLength.elementAti(loc) < currentLen) {
             currentLen = forwardedLength.elementAti(loc);
             U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
@@ -2883,11 +2953,13 @@
         case URX_BACKSLASH_Z:
         case URX_CARET:
         case URX_DOLLAR:
+        case URX_DOLLAR_M:
+        case URX_DOLLAR_D:
+        case URX_DOLLAR_MD:
         case URX_RELOC_OPRND:
         case URX_STO_INP_LOC:
-        case URX_DOLLAR_M:
         case URX_CARET_M:
-        case URX_BACKTRACK:
+        case URX_CARET_M_UNIX:
         case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
         case URX_BACKREF_I:
 
@@ -2910,8 +2982,7 @@
         case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
         case URX_DOTANY_ALL:    // . matches one or two.
         case URX_DOTANY:
-        case URX_DOTANY_PL:
-        case URX_DOTANY_ALL_PL:
+        case URX_DOTANY_UNIX:
             currentLen++;
             break;
 
@@ -2936,12 +3007,11 @@
             }
             break;
 
-        case URX_FAIL:
+        case URX_BACKTRACK:
             {
-                // Fails are kind of like a branch, except that the min length was
+                // Back-tracks are kind of like a branch, except that the min length was
                 //   propagated already, by the state save.
                 currentLen = forwardedLength.elementAti(loc+1);
-                U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
             }
             break;
 
@@ -3008,21 +3078,33 @@
         case URX_LB_START:
             {
                 // Look-around.  Scan forward until the matching look-ahead end,
-                //   without processing the look-around block.  This is overly pessimistic.
+                //   without processing the look-around block.  This is overly pessimistic for look-ahead,
+                //   it assumes that the look-ahead match might be zero-length.
                 //   TODO:  Positive lookahead could recursively do the block, then continue
                 //          with the longer of the block or the value coming in.
-                int32_t  depth = 0;
+                int32_t  depth = (opType == URX_LA_START? 2: 1);;
                 for (;;) {
                     loc++;
                     op = fRXPat->fCompiledPat->elementAti(loc);
-                    if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
+                    if (URX_TYPE(op) == URX_LA_START) {
+                        // The boilerplate for look-ahead includes two LA_END insturctions,
+                        //    Depth will be decremented by each one when it is seen.
+                        depth += 2;
+                    }
+                    if (URX_TYPE(op) == URX_LB_START) {
                         depth++;
                     }
-                    if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) {
+                    if (URX_TYPE(op) == URX_LA_END) {
+                        depth--;
                         if (depth == 0) {
                             break;
                         }
+                    }
+                    if (URX_TYPE(op)==URX_LBN_END) {
                         depth--;
+                        if (depth == 0) {
+                            break;
+                        }
                     }
                     if (URX_TYPE(op) == URX_STATE_SAVE) {
                         // Need this because neg lookahead blocks will FAIL to outside
@@ -3034,7 +3116,9 @@
                             }
                         }
                     }
-
+                    if (loc > end) {
+                        RegexPatternDump(fRXPat);
+                    }
                     U_ASSERT(loc <= end);
                 }
             }
@@ -3123,11 +3207,13 @@
         case URX_BACKSLASH_Z:
         case URX_CARET:
         case URX_DOLLAR:
+        case URX_DOLLAR_M:
+        case URX_DOLLAR_D:
+        case URX_DOLLAR_MD:
         case URX_RELOC_OPRND:
         case URX_STO_INP_LOC:
-        case URX_DOLLAR_M:
         case URX_CARET_M:
-        case URX_BACKTRACK:
+        case URX_CARET_M_UNIX:
 
         case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
         case URX_LD_SP:
@@ -3145,8 +3231,6 @@
         case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
         case URX_BACKREF_I:
         case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
-        case URX_DOTANY_PL:
-        case URX_DOTANY_ALL_PL:
             currentLen = INT32_MAX;
             break;
 
@@ -3160,6 +3244,7 @@
         case URX_ONECHAR_I:
         case URX_DOTANY_ALL:
         case URX_DOTANY:
+        case URX_DOTANY_UNIX:
             currentLen+=2;
             break;
 
@@ -3193,8 +3278,8 @@
             }
             break;
 
-        case URX_FAIL:
-            // Fails are kind of like a branch, except that the max length was
+        case URX_BACKTRACK:
+            // back-tracks are kind of like a branch, except that the max length was
             //   propagated already, by the state save.
             currentLen = forwardedLength.elementAti(loc+1);
             break;
@@ -3377,13 +3462,12 @@
         case URX_BACKSLASH_X:
         case URX_BACKSLASH_Z:
         case URX_DOTANY_ALL:
-        case URX_DOTANY_ALL_PL:
-        case URX_DOTANY_PL:
         case URX_BACKSLASH_D:
         case URX_CARET:
         case URX_DOLLAR:
         case URX_CTR_INIT:
         case URX_CTR_INIT_NG:
+        case URX_DOTANY_UNIX:
         case URX_STO_SP:
         case URX_LD_SP:
         case URX_BACKREF:
@@ -3395,6 +3479,7 @@
         case URX_BACKREF_I:
         case URX_DOLLAR_M:
         case URX_CARET_M:
+        case URX_CARET_M_UNIX:
         case URX_LB_START:
         case URX_LB_CONT:
         case URX_LB_END:
@@ -3403,6 +3488,8 @@
         case URX_LOOP_SR_I:
         case URX_LOOP_DOT_I:
         case URX_LOOP_C:
+        case URX_DOLLAR_D:
+        case URX_DOLLAR_MD:
             // These instructions are unaltered by the relocation.
             fRXPat->fCompiledPat->setElementAt(op, dst);
             dst++;
@@ -3423,83 +3510,6 @@
 
 //------------------------------------------------------------------------------
 //
-//   OptDotStar       Optimize patterns that end with a '.*' or '.+' to
-//                    just advance the input to the end.
-//
-//         Transform this compiled sequence
-//            [DOT_ANY | DOT_ANY_ALL]
-//            JMP_SAV  to previous instruction
-//            [NOP | END_CAPTURE | DOLLAR | BACKSLASH_Z]*
-//            END
-//
-//         To
-//            NOP
-//            [DOT_ANY_PL | DOT_ANY_ALL_PL]
-//            [NOP | END_CAPTURE | DOLLAR | BACKSLASH_Z]*
-//            END
-//
-//------------------------------------------------------------------------------
-void RegexCompile::OptDotStar() {
-    // Scan backwards in the pattern, looking for a JMP_SAV near the end.
-    int32_t  jmpLoc;
-    int32_t  op = 0;
-    int32_t  opType;
-    for (jmpLoc=fRXPat->fCompiledPat->size(); jmpLoc--;) {
-        U_ASSERT(jmpLoc>0);
-        op     = fRXPat->fCompiledPat->elementAti(jmpLoc);
-        opType = URX_TYPE(op);
-        switch(opType) {
-
-
-        case URX_END:
-        case URX_NOP:
-        case URX_END_CAPTURE:
-        case URX_DOLLAR_M:
-        case URX_DOLLAR:
-        case URX_BACKSLASH_Z:
-            // These ops may follow the JMP_SAV without preventing us from
-            //   doing this optimization.
-            continue;
-
-        case URX_JMP_SAV:
-            // Got a trailing JMP_SAV that's a candidate for optimization.
-            break;
-
-        default:
-            // This optimization not possible.
-            return;
-        }
-        break;   // from the for loop.
-    }
-
-    // We found in URX_JMP_SAV near the end that is a candidate for optimizing.
-    // Is the target address the previous instruction?
-    // Is the previous instruction a flavor of URX_DOTANY
-    int32_t  loopTopLoc = URX_VAL(op);
-    if (loopTopLoc != jmpLoc-1) {
-        return;
-    }
-    int32_t newOp;
-    int32_t oldOp     = fRXPat->fCompiledPat->elementAti(loopTopLoc);
-    int32_t oldOpType = opType = URX_TYPE(oldOp);
-    if (oldOpType == URX_DOTANY) {
-        newOp = URX_BUILD(URX_DOTANY_PL, 0);
-    }
-    else if (oldOpType == URX_DOTANY_ALL) {
-        newOp = URX_BUILD(URX_DOTANY_ALL_PL, 0);
-    } else {
-        return;    // Sequence we were looking for isn't there.
-    }
-
-    // Substitute the new instructions into the pattern.
-    // The NOP will be removed in a later optimization step.
-    fRXPat->fCompiledPat->setElementAt(URX_BUILD(URX_NOP, 0), loopTopLoc);
-    fRXPat->fCompiledPat->setElementAt(newOp, jmpLoc);
-}
-
-
-//------------------------------------------------------------------------------
-//
 //  Error         Report a rule parse error.
 //                Only report it if no previous error has been recorded.
 //
@@ -3701,6 +3711,10 @@
                 for (index=0; index<3; index++) {
                     int32_t ch = peekCharLL();
                     if (ch<chDigit0 || ch>chDigit7) {
+                        if (index==0) {
+                           // \0 is not followed by any octal digits.
+                           error(U_REGEX_BAD_ESCAPE_SEQUENCE);
+                        }
                         break;
                     }
                     c.fChar <<= 3;
@@ -3975,13 +3989,27 @@
     
     //
     //  The property as it was didn't work.
-    //    Do an emergency fixe -  
+    //    Do emergency fixes -
     //       InGreek -> InGreek or Coptic, that being the official Unicode name for that block.
+    //       InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
+    //
+    //       Note on Spaces:  either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
+    //                        is accepted by Java.  The property part of the name is compared 
+    //                        case-insenstively.  The spaces must be exactly as shown, either
+    //                        all there, or all omitted, with exactly one at each position
+    //                        if they are present.  From checking against JDK 1.6
+    //
+    //       This code should be removed ICU properties support the Java  compatibility names
+    //          (ICU 4.0?)
     //
     UnicodeString mPropName = propName;
     if (mPropName.caseCompare(UnicodeString("InGreek", -1, UnicodeString::kInvariant), 0) == 0) {
         mPropName = UnicodeString("InGreek and Coptic", -1 ,UnicodeString::kInvariant);
     }
+    if (mPropName.caseCompare(UnicodeString("InCombining Marks for Symbols", -1, UnicodeString::kInvariant), 0) == 0 ||
+        mPropName.caseCompare(UnicodeString("InCombiningMarksforSymbols", -1, UnicodeString::kInvariant), 0) == 0) {
+        mPropName = UnicodeString("InCombining Diacritical Marks for Symbols", -1 ,UnicodeString::kInvariant);
+    }
     else if (mPropName.compare(UnicodeString("all", -1, UnicodeString::kInvariant)) == 0) {
         mPropName = UnicodeString("javaValidCodePoint", -1 ,UnicodeString::kInvariant);
     }

diff --git a/i18n/regexcmp.h b/i18n/regexcmp.h
index a0248a3..00d932b 100644
--- a/i18n/regexcmp.h
+++ b/i18n/regexcmp.h

@@ -109,7 +109,6 @@
                                int32_t end);
     void        matchStartType();
     void        stripNOPs();
-    void        OptDotStar();
 
     void        setEval(int32_t op);
     void        setPushOp(int32_t op);

diff --git a/i18n/regexcst.h b/i18n/regexcst.h
index 5d31937..8c75310 100644
--- a/i18n/regexcst.h
+++ b/i18n/regexcst.h

@@ -133,20 +133,20 @@
     , {doPatStart, 255, 2,0,  FALSE}     //  1      start
     , {doLiteralChar, 254, 14,0,  TRUE}     //  2      term
     , {doLiteralChar, 129, 14,0,  TRUE}     //  3 
-    , {doSetBegin, 91 /* [ */, 100, 178, TRUE}     //  4 
+    , {doSetBegin, 91 /* [ */, 102, 180, TRUE}     //  4 
     , {doNOP, 40 /* ( */, 27,0,  TRUE}     //  5 
     , {doDotAny, 46 /* . */, 14,0,  TRUE}     //  6 
     , {doCaret, 94 /* ^ */, 14,0,  TRUE}     //  7 
     , {doDollar, 36 /* $ */, 14,0,  TRUE}     //  8 
-    , {doNOP, 92 /* \ */, 80,0,  TRUE}     //  9 
+    , {doNOP, 92 /* \ */, 82,0,  TRUE}     //  9 
     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  10 
     , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  11 
     , {doPatFinish, 253, 2,0,  FALSE}     //  12 
-    , {doRuleError, 255, 179,0,  FALSE}     //  13 
-    , {doNOP, 42 /* * */, 59,0,  TRUE}     //  14      expr-quant
-    , {doNOP, 43 /* + */, 62,0,  TRUE}     //  15 
-    , {doNOP, 63 /* ? */, 65,0,  TRUE}     //  16 
-    , {doIntervalInit, 123 /* { */, 68,0,  TRUE}     //  17 
+    , {doRuleError, 255, 181,0,  FALSE}     //  13 
+    , {doNOP, 42 /* * */, 61,0,  TRUE}     //  14      expr-quant
+    , {doNOP, 43 /* + */, 64,0,  TRUE}     //  15 
+    , {doNOP, 63 /* ? */, 67,0,  TRUE}     //  16 
+    , {doIntervalInit, 123 /* { */, 70,0,  TRUE}     //  17 
     , {doNOP, 40 /* ( */, 23,0,  TRUE}     //  18 
     , {doNOP, 255, 20,0,  FALSE}     //  19 
     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  20      expr-cont
@@ -154,7 +154,7 @@
     , {doNOP, 255, 2,0,  FALSE}     //  22 
     , {doSuppressComments, 63 /* ? */, 25,0,  TRUE}     //  23      open-paren-quant
     , {doNOP, 255, 27,0,  FALSE}     //  24 
-    , {doNOP, 35 /* # */, 47, 14, TRUE}     //  25      open-paren-quant2
+    , {doNOP, 35 /* # */, 48, 14, TRUE}     //  25      open-paren-quant2
     , {doNOP, 255, 29,0,  FALSE}     //  26 
     , {doSuppressComments, 63 /* ? */, 29,0,  TRUE}     //  27      open-paren
     , {doOpenCaptureParen, 255, 2, 14, FALSE}     //  28 
@@ -162,153 +162,155 @@
     , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE}     //  30 
     , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE}     //  31 
     , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE}     //  32 
-    , {doNOP, 60 /* < */, 44,0,  TRUE}     //  33 
-    , {doNOP, 35 /* # */, 47, 2, TRUE}     //  34 
-    , {doBeginMatchMode, 105 /* i */, 50,0,  FALSE}     //  35 
-    , {doBeginMatchMode, 109 /* m */, 50,0,  FALSE}     //  36 
-    , {doBeginMatchMode, 115 /* s */, 50,0,  FALSE}     //  37 
-    , {doBeginMatchMode, 119 /* w */, 50,0,  FALSE}     //  38 
-    , {doBeginMatchMode, 120 /* x */, 50,0,  FALSE}     //  39 
-    , {doBeginMatchMode, 45 /* - */, 50,0,  FALSE}     //  40 
-    , {doConditionalExpr, 40 /* ( */, 179,0,  TRUE}     //  41 
-    , {doPerlInline, 123 /* { */, 179,0,  TRUE}     //  42 
-    , {doBadOpenParenType, 255, 179,0,  FALSE}     //  43 
-    , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE}     //  44      open-paren-lookbehind
-    , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE}     //  45 
-    , {doBadOpenParenType, 255, 179,0,  FALSE}     //  46 
-    , {doNOP, 41 /* ) */, 255,0,  TRUE}     //  47      paren-comment
-    , {doMismatchedParenErr, 253, 179,0,  FALSE}     //  48 
-    , {doNOP, 255, 47,0,  TRUE}     //  49 
-    , {doMatchMode, 105 /* i */, 50,0,  TRUE}     //  50      paren-flag
-    , {doMatchMode, 109 /* m */, 50,0,  TRUE}     //  51 
-    , {doMatchMode, 115 /* s */, 50,0,  TRUE}     //  52 
-    , {doMatchMode, 119 /* w */, 50,0,  TRUE}     //  53 
-    , {doMatchMode, 120 /* x */, 50,0,  TRUE}     //  54 
-    , {doMatchMode, 45 /* - */, 50,0,  TRUE}     //  55 
-    , {doSetMatchMode, 41 /* ) */, 2,0,  TRUE}     //  56 
-    , {doMatchModeParen, 58 /* : */, 2, 14, TRUE}     //  57 
-    , {doBadModeFlag, 255, 179,0,  FALSE}     //  58 
-    , {doNGStar, 63 /* ? */, 20,0,  TRUE}     //  59      quant-star
-    , {doPossessiveStar, 43 /* + */, 20,0,  TRUE}     //  60 
-    , {doStar, 255, 20,0,  FALSE}     //  61 
-    , {doNGPlus, 63 /* ? */, 20,0,  TRUE}     //  62      quant-plus
-    , {doPossessivePlus, 43 /* + */, 20,0,  TRUE}     //  63 
-    , {doPlus, 255, 20,0,  FALSE}     //  64 
-    , {doNGOpt, 63 /* ? */, 20,0,  TRUE}     //  65      quant-opt
-    , {doPossessiveOpt, 43 /* + */, 20,0,  TRUE}     //  66 
-    , {doOpt, 255, 20,0,  FALSE}     //  67 
-    , {doNOP, 128, 70,0,  FALSE}     //  68      interval-open
-    , {doIntervalError, 255, 179,0,  FALSE}     //  69 
-    , {doIntevalLowerDigit, 128, 70,0,  TRUE}     //  70      interval-lower
-    , {doNOP, 44 /* , */, 74,0,  TRUE}     //  71 
-    , {doIntervalSame, 125 /* } */, 77,0,  TRUE}     //  72 
-    , {doIntervalError, 255, 179,0,  FALSE}     //  73 
-    , {doIntervalUpperDigit, 128, 74,0,  TRUE}     //  74      interval-upper
-    , {doNOP, 125 /* } */, 77,0,  TRUE}     //  75 
-    , {doIntervalError, 255, 179,0,  FALSE}     //  76 
-    , {doNGInterval, 63 /* ? */, 20,0,  TRUE}     //  77      interval-type
-    , {doPossessiveInterval, 43 /* + */, 20,0,  TRUE}     //  78 
-    , {doInterval, 255, 20,0,  FALSE}     //  79 
-    , {doBackslashA, 65 /* A */, 2,0,  TRUE}     //  80      backslash
-    , {doBackslashB, 66 /* B */, 2,0,  TRUE}     //  81 
-    , {doBackslashb, 98 /* b */, 2,0,  TRUE}     //  82 
-    , {doBackslashd, 100 /* d */, 14,0,  TRUE}     //  83 
-    , {doBackslashD, 68 /* D */, 14,0,  TRUE}     //  84 
-    , {doBackslashG, 71 /* G */, 2,0,  TRUE}     //  85 
-    , {doNamedChar, 78 /* N */, 14,0,  FALSE}     //  86 
-    , {doProperty, 112 /* p */, 14,0,  FALSE}     //  87 
-    , {doProperty, 80 /* P */, 14,0,  FALSE}     //  88 
-    , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  89 
-    , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  90 
-    , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  91 
-    , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  92 
-    , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  93 
-    , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  94 
-    , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  95 
-    , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  96 
-    , {doBackRef, 128, 14,0,  TRUE}     //  97 
-    , {doEscapeError, 253, 179,0,  FALSE}     //  98 
-    , {doEscapedLiteralChar, 255, 14,0,  TRUE}     //  99 
-    , {doSetNegate, 94 /* ^ */, 103,0,  TRUE}     //  100      set-open
-    , {doSetPosixProp, 58 /* : */, 105,0,  FALSE}     //  101 
-    , {doNOP, 255, 103,0,  FALSE}     //  102 
-    , {doSetLiteral, 93 /* ] */, 118,0,  TRUE}     //  103      set-open2
-    , {doNOP, 255, 108,0,  FALSE}     //  104 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  105      set-posix
-    , {doNOP, 58 /* : */, 108,0,  FALSE}     //  106 
-    , {doRuleError, 255, 179,0,  FALSE}     //  107 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  108      set-start
-    , {doSetBeginUnion, 91 /* [ */, 100, 125, TRUE}     //  109 
-    , {doNOP, 92 /* \ */, 168,0,  TRUE}     //  110 
-    , {doNOP, 45 /* - */, 114,0,  TRUE}     //  111 
-    , {doNOP, 38 /* & */, 116,0,  TRUE}     //  112 
-    , {doSetLiteral, 255, 118,0,  TRUE}     //  113 
-    , {doRuleError, 45 /* - */, 179,0,  FALSE}     //  114      set-start-dash
-    , {doSetAddDash, 255, 118,0,  FALSE}     //  115 
-    , {doRuleError, 38 /* & */, 179,0,  FALSE}     //  116      set-start-amp
-    , {doSetAddAmp, 255, 118,0,  FALSE}     //  117 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  118      set-after-lit
-    , {doSetBeginUnion, 91 /* [ */, 100, 125, TRUE}     //  119 
-    , {doNOP, 45 /* - */, 155,0,  TRUE}     //  120 
-    , {doNOP, 38 /* & */, 146,0,  TRUE}     //  121 
-    , {doNOP, 92 /* \ */, 168,0,  TRUE}     //  122 
-    , {doSetNoCloseError, 253, 179,0,  FALSE}     //  123 
-    , {doSetLiteral, 255, 118,0,  TRUE}     //  124 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  125      set-after-set
-    , {doSetBeginUnion, 91 /* [ */, 100, 125, TRUE}     //  126 
-    , {doNOP, 45 /* - */, 148,0,  TRUE}     //  127 
-    , {doNOP, 38 /* & */, 143,0,  TRUE}     //  128 
-    , {doNOP, 92 /* \ */, 168,0,  TRUE}     //  129 
-    , {doSetNoCloseError, 253, 179,0,  FALSE}     //  130 
-    , {doSetLiteral, 255, 118,0,  TRUE}     //  131 
-    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  132      set-after-range
-    , {doSetBeginUnion, 91 /* [ */, 100, 125, TRUE}     //  133 
-    , {doNOP, 45 /* - */, 151,0,  TRUE}     //  134 
-    , {doNOP, 38 /* & */, 153,0,  TRUE}     //  135 
-    , {doNOP, 92 /* \ */, 168,0,  TRUE}     //  136 
-    , {doSetNoCloseError, 253, 179,0,  FALSE}     //  137 
-    , {doSetLiteral, 255, 118,0,  TRUE}     //  138 
-    , {doSetBeginUnion, 91 /* [ */, 100, 125, TRUE}     //  139      set-after-op
-    , {doSetOpError, 93 /* ] */, 179,0,  FALSE}     //  140 
-    , {doNOP, 92 /* \ */, 168,0,  TRUE}     //  141 
-    , {doSetLiteral, 255, 118,0,  TRUE}     //  142 
-    , {doSetBeginIntersection1, 91 /* [ */, 100, 125, TRUE}     //  143      set-set-amp
-    , {doSetIntersection2, 38 /* & */, 139,0,  TRUE}     //  144 
-    , {doSetAddAmp, 255, 118,0,  FALSE}     //  145 
-    , {doSetIntersection2, 38 /* & */, 139,0,  TRUE}     //  146      set-lit-amp
-    , {doSetAddAmp, 255, 118,0,  FALSE}     //  147 
-    , {doSetBeginDifference1, 91 /* [ */, 100, 125, TRUE}     //  148      set-set-dash
-    , {doSetDifference2, 45 /* - */, 139,0,  TRUE}     //  149 
-    , {doSetAddDash, 255, 118,0,  FALSE}     //  150 
-    , {doSetDifference2, 45 /* - */, 139,0,  TRUE}     //  151      set-range-dash
-    , {doSetAddDash, 255, 118,0,  FALSE}     //  152 
-    , {doSetIntersection2, 38 /* & */, 139,0,  TRUE}     //  153      set-range-amp
-    , {doSetAddAmp, 255, 118,0,  FALSE}     //  154 
-    , {doSetDifference2, 45 /* - */, 139,0,  TRUE}     //  155      set-lit-dash
-    , {doSetAddDash, 91 /* [ */, 118,0,  FALSE}     //  156 
-    , {doSetAddDash, 93 /* ] */, 118,0,  FALSE}     //  157 
-    , {doNOP, 92 /* \ */, 160,0,  TRUE}     //  158 
-    , {doSetRange, 255, 132,0,  TRUE}     //  159 
-    , {doSetAddDash, 115 /* s */, 168,0,  FALSE}     //  160      set-lit-dash-escape
-    , {doSetAddDash, 83 /* S */, 168,0,  FALSE}     //  161 
-    , {doSetAddDash, 119 /* w */, 168,0,  FALSE}     //  162 
-    , {doSetAddDash, 87 /* W */, 168,0,  FALSE}     //  163 
-    , {doSetAddDash, 100 /* d */, 168,0,  FALSE}     //  164 
-    , {doSetAddDash, 68 /* D */, 168,0,  FALSE}     //  165 
-    , {doSetNamedRange, 78 /* N */, 132,0,  FALSE}     //  166 
-    , {doSetRange, 255, 132,0,  TRUE}     //  167 
-    , {doSetProp, 112 /* p */, 125,0,  FALSE}     //  168      set-escape
-    , {doSetProp, 80 /* P */, 125,0,  FALSE}     //  169 
-    , {doSetNamedChar, 78 /* N */, 118,0,  FALSE}     //  170 
-    , {doSetBackslash_s, 115 /* s */, 132,0,  TRUE}     //  171 
-    , {doSetBackslash_S, 83 /* S */, 132,0,  TRUE}     //  172 
-    , {doSetBackslash_w, 119 /* w */, 132,0,  TRUE}     //  173 
-    , {doSetBackslash_W, 87 /* W */, 132,0,  TRUE}     //  174 
-    , {doSetBackslash_d, 100 /* d */, 132,0,  TRUE}     //  175 
-    , {doSetBackslash_D, 68 /* D */, 132,0,  TRUE}     //  176 
-    , {doSetLiteralEscaped, 255, 118,0,  TRUE}     //  177 
-    , {doSetFinish, 255, 14,0,  FALSE}     //  178      set-finish
-    , {doExit, 255, 179,0,  TRUE}     //  179      errorDeath
+    , {doNOP, 60 /* < */, 45,0,  TRUE}     //  33 
+    , {doNOP, 35 /* # */, 48, 2, TRUE}     //  34 
+    , {doBeginMatchMode, 105 /* i */, 51,0,  FALSE}     //  35 
+    , {doBeginMatchMode, 100 /* d */, 51,0,  FALSE}     //  36 
+    , {doBeginMatchMode, 109 /* m */, 51,0,  FALSE}     //  37 
+    , {doBeginMatchMode, 115 /* s */, 51,0,  FALSE}     //  38 
+    , {doBeginMatchMode, 119 /* w */, 51,0,  FALSE}     //  39 
+    , {doBeginMatchMode, 120 /* x */, 51,0,  FALSE}     //  40 
+    , {doBeginMatchMode, 45 /* - */, 51,0,  FALSE}     //  41 
+    , {doConditionalExpr, 40 /* ( */, 181,0,  TRUE}     //  42 
+    , {doPerlInline, 123 /* { */, 181,0,  TRUE}     //  43 
+    , {doBadOpenParenType, 255, 181,0,  FALSE}     //  44 
+    , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE}     //  45      open-paren-lookbehind
+    , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE}     //  46 
+    , {doBadOpenParenType, 255, 181,0,  FALSE}     //  47 
+    , {doNOP, 41 /* ) */, 255,0,  TRUE}     //  48      paren-comment
+    , {doMismatchedParenErr, 253, 181,0,  FALSE}     //  49 
+    , {doNOP, 255, 48,0,  TRUE}     //  50 
+    , {doMatchMode, 105 /* i */, 51,0,  TRUE}     //  51      paren-flag
+    , {doMatchMode, 100 /* d */, 51,0,  TRUE}     //  52 
+    , {doMatchMode, 109 /* m */, 51,0,  TRUE}     //  53 
+    , {doMatchMode, 115 /* s */, 51,0,  TRUE}     //  54 
+    , {doMatchMode, 119 /* w */, 51,0,  TRUE}     //  55 
+    , {doMatchMode, 120 /* x */, 51,0,  TRUE}     //  56 
+    , {doMatchMode, 45 /* - */, 51,0,  TRUE}     //  57 
+    , {doSetMatchMode, 41 /* ) */, 2,0,  TRUE}     //  58 
+    , {doMatchModeParen, 58 /* : */, 2, 14, TRUE}     //  59 
+    , {doBadModeFlag, 255, 181,0,  FALSE}     //  60 
+    , {doNGStar, 63 /* ? */, 20,0,  TRUE}     //  61      quant-star
+    , {doPossessiveStar, 43 /* + */, 20,0,  TRUE}     //  62 
+    , {doStar, 255, 20,0,  FALSE}     //  63 
+    , {doNGPlus, 63 /* ? */, 20,0,  TRUE}     //  64      quant-plus
+    , {doPossessivePlus, 43 /* + */, 20,0,  TRUE}     //  65 
+    , {doPlus, 255, 20,0,  FALSE}     //  66 
+    , {doNGOpt, 63 /* ? */, 20,0,  TRUE}     //  67      quant-opt
+    , {doPossessiveOpt, 43 /* + */, 20,0,  TRUE}     //  68 
+    , {doOpt, 255, 20,0,  FALSE}     //  69 
+    , {doNOP, 128, 72,0,  FALSE}     //  70      interval-open
+    , {doIntervalError, 255, 181,0,  FALSE}     //  71 
+    , {doIntevalLowerDigit, 128, 72,0,  TRUE}     //  72      interval-lower
+    , {doNOP, 44 /* , */, 76,0,  TRUE}     //  73 
+    , {doIntervalSame, 125 /* } */, 79,0,  TRUE}     //  74 
+    , {doIntervalError, 255, 181,0,  FALSE}     //  75 
+    , {doIntervalUpperDigit, 128, 76,0,  TRUE}     //  76      interval-upper
+    , {doNOP, 125 /* } */, 79,0,  TRUE}     //  77 
+    , {doIntervalError, 255, 181,0,  FALSE}     //  78 
+    , {doNGInterval, 63 /* ? */, 20,0,  TRUE}     //  79      interval-type
+    , {doPossessiveInterval, 43 /* + */, 20,0,  TRUE}     //  80 
+    , {doInterval, 255, 20,0,  FALSE}     //  81 
+    , {doBackslashA, 65 /* A */, 2,0,  TRUE}     //  82      backslash
+    , {doBackslashB, 66 /* B */, 2,0,  TRUE}     //  83 
+    , {doBackslashb, 98 /* b */, 2,0,  TRUE}     //  84 
+    , {doBackslashd, 100 /* d */, 14,0,  TRUE}     //  85 
+    , {doBackslashD, 68 /* D */, 14,0,  TRUE}     //  86 
+    , {doBackslashG, 71 /* G */, 2,0,  TRUE}     //  87 
+    , {doNamedChar, 78 /* N */, 14,0,  FALSE}     //  88 
+    , {doProperty, 112 /* p */, 14,0,  FALSE}     //  89 
+    , {doProperty, 80 /* P */, 14,0,  FALSE}     //  90 
+    , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  91 
+    , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  92 
+    , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  93 
+    , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  94 
+    , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  95 
+    , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  96 
+    , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  97 
+    , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  98 
+    , {doBackRef, 128, 14,0,  TRUE}     //  99 
+    , {doEscapeError, 253, 181,0,  FALSE}     //  100 
+    , {doEscapedLiteralChar, 255, 14,0,  TRUE}     //  101 
+    , {doSetNegate, 94 /* ^ */, 105,0,  TRUE}     //  102      set-open
+    , {doSetPosixProp, 58 /* : */, 107,0,  FALSE}     //  103 
+    , {doNOP, 255, 105,0,  FALSE}     //  104 
+    , {doSetLiteral, 93 /* ] */, 120,0,  TRUE}     //  105      set-open2
+    , {doNOP, 255, 110,0,  FALSE}     //  106 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  107      set-posix
+    , {doNOP, 58 /* : */, 110,0,  FALSE}     //  108 
+    , {doRuleError, 255, 181,0,  FALSE}     //  109 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  110      set-start
+    , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE}     //  111 
+    , {doNOP, 92 /* \ */, 170,0,  TRUE}     //  112 
+    , {doNOP, 45 /* - */, 116,0,  TRUE}     //  113 
+    , {doNOP, 38 /* & */, 118,0,  TRUE}     //  114 
+    , {doSetLiteral, 255, 120,0,  TRUE}     //  115 
+    , {doRuleError, 45 /* - */, 181,0,  FALSE}     //  116      set-start-dash
+    , {doSetAddDash, 255, 120,0,  FALSE}     //  117 
+    , {doRuleError, 38 /* & */, 181,0,  FALSE}     //  118      set-start-amp
+    , {doSetAddAmp, 255, 120,0,  FALSE}     //  119 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  120      set-after-lit
+    , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE}     //  121 
+    , {doNOP, 45 /* - */, 157,0,  TRUE}     //  122 
+    , {doNOP, 38 /* & */, 148,0,  TRUE}     //  123 
+    , {doNOP, 92 /* \ */, 170,0,  TRUE}     //  124 
+    , {doSetNoCloseError, 253, 181,0,  FALSE}     //  125 
+    , {doSetLiteral, 255, 120,0,  TRUE}     //  126 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  127      set-after-set
+    , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE}     //  128 
+    , {doNOP, 45 /* - */, 150,0,  TRUE}     //  129 
+    , {doNOP, 38 /* & */, 145,0,  TRUE}     //  130 
+    , {doNOP, 92 /* \ */, 170,0,  TRUE}     //  131 
+    , {doSetNoCloseError, 253, 181,0,  FALSE}     //  132 
+    , {doSetLiteral, 255, 120,0,  TRUE}     //  133 
+    , {doSetEnd, 93 /* ] */, 255,0,  TRUE}     //  134      set-after-range
+    , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE}     //  135 
+    , {doNOP, 45 /* - */, 153,0,  TRUE}     //  136 
+    , {doNOP, 38 /* & */, 155,0,  TRUE}     //  137 
+    , {doNOP, 92 /* \ */, 170,0,  TRUE}     //  138 
+    , {doSetNoCloseError, 253, 181,0,  FALSE}     //  139 
+    , {doSetLiteral, 255, 120,0,  TRUE}     //  140 
+    , {doSetBeginUnion, 91 /* [ */, 102, 127, TRUE}     //  141      set-after-op
+    , {doSetOpError, 93 /* ] */, 181,0,  FALSE}     //  142 
+    , {doNOP, 92 /* \ */, 170,0,  TRUE}     //  143 
+    , {doSetLiteral, 255, 120,0,  TRUE}     //  144 
+    , {doSetBeginIntersection1, 91 /* [ */, 102, 127, TRUE}     //  145      set-set-amp
+    , {doSetIntersection2, 38 /* & */, 141,0,  TRUE}     //  146 
+    , {doSetAddAmp, 255, 120,0,  FALSE}     //  147 
+    , {doSetIntersection2, 38 /* & */, 141,0,  TRUE}     //  148      set-lit-amp
+    , {doSetAddAmp, 255, 120,0,  FALSE}     //  149 
+    , {doSetBeginDifference1, 91 /* [ */, 102, 127, TRUE}     //  150      set-set-dash
+    , {doSetDifference2, 45 /* - */, 141,0,  TRUE}     //  151 
+    , {doSetAddDash, 255, 120,0,  FALSE}     //  152 
+    , {doSetDifference2, 45 /* - */, 141,0,  TRUE}     //  153      set-range-dash
+    , {doSetAddDash, 255, 120,0,  FALSE}     //  154 
+    , {doSetIntersection2, 38 /* & */, 141,0,  TRUE}     //  155      set-range-amp
+    , {doSetAddAmp, 255, 120,0,  FALSE}     //  156 
+    , {doSetDifference2, 45 /* - */, 141,0,  TRUE}     //  157      set-lit-dash
+    , {doSetAddDash, 91 /* [ */, 120,0,  FALSE}     //  158 
+    , {doSetAddDash, 93 /* ] */, 120,0,  FALSE}     //  159 
+    , {doNOP, 92 /* \ */, 162,0,  TRUE}     //  160 
+    , {doSetRange, 255, 134,0,  TRUE}     //  161 
+    , {doSetOpError, 115 /* s */, 181,0,  FALSE}     //  162      set-lit-dash-escape
+    , {doSetOpError, 83 /* S */, 181,0,  FALSE}     //  163 
+    , {doSetOpError, 119 /* w */, 181,0,  FALSE}     //  164 
+    , {doSetOpError, 87 /* W */, 181,0,  FALSE}     //  165 
+    , {doSetOpError, 100 /* d */, 181,0,  FALSE}     //  166 
+    , {doSetOpError, 68 /* D */, 181,0,  FALSE}     //  167 
+    , {doSetNamedRange, 78 /* N */, 134,0,  FALSE}     //  168 
+    , {doSetRange, 255, 134,0,  TRUE}     //  169 
+    , {doSetProp, 112 /* p */, 127,0,  FALSE}     //  170      set-escape
+    , {doSetProp, 80 /* P */, 127,0,  FALSE}     //  171 
+    , {doSetNamedChar, 78 /* N */, 120,0,  FALSE}     //  172 
+    , {doSetBackslash_s, 115 /* s */, 134,0,  TRUE}     //  173 
+    , {doSetBackslash_S, 83 /* S */, 134,0,  TRUE}     //  174 
+    , {doSetBackslash_w, 119 /* w */, 134,0,  TRUE}     //  175 
+    , {doSetBackslash_W, 87 /* W */, 134,0,  TRUE}     //  176 
+    , {doSetBackslash_d, 100 /* d */, 134,0,  TRUE}     //  177 
+    , {doSetBackslash_D, 68 /* D */, 134,0,  TRUE}     //  178 
+    , {doSetLiteralEscaped, 255, 120,0,  TRUE}     //  179 
+    , {doSetFinish, 255, 14,0,  FALSE}     //  180      set-finish
+    , {doExit, 255, 181,0,  TRUE}     //  181      errorDeath
  };
 static const char * const RegexStateNames[] = {    0,
      "start",
@@ -354,6 +356,7 @@
     0,
     0,
     0,
+    0,
      "open-paren-lookbehind",
     0,
     0,
@@ -369,6 +372,7 @@
     0,
     0,
     0,
+    0,
      "quant-star",
     0,
     0,

diff --git a/i18n/regexcst.txt b/i18n/regexcst.txt
index 888a0c4..304ac57 100644
--- a/i18n/regexcst.txt
+++ b/i18n/regexcst.txt

@@ -133,6 +133,7 @@
     '<'                  n  open-paren-lookbehind
     '#'                  n  paren-comment   ^term
     'i'                     paren-flag                              doBeginMatchMode
+    'd'                     paren-flag                              doBeginMatchMode
     'm'                     paren-flag                              doBeginMatchMode
     's'                     paren-flag                              doBeginMatchMode
     'w'                     paren-flag                              doBeginMatchMode
@@ -161,6 +162,7 @@
 #
 paren-flag:
     'i'                  n  paren-flag                              doMatchMode
+    'd'                  n  paren-flag                              doMatchMode
     'm'                  n  paren-flag                              doMatchMode
     's'                  n  paren-flag                              doMatchMode
     'w'                  n  paren-flag                              doMatchMode
@@ -398,7 +400,7 @@
 #  set-lit-dash
 #     Have scanned "[literals-" Could be a range or a -- operator or a literal
 #     In [abc-[def]], the '-' is a literal (confirmed with a Java test)
-#        [abc-\p{xx}  the '-' is a literal
+#        [abc-\p{xx}  the '-' is an error
 #        [abc-]       the '-' is a literal
 #        [ab-xy]      the '-' is a range
 #
@@ -416,12 +418,12 @@
 #    Could be a literal '-', if the '\' introduces a set-like construct e.g. \s aut \p{...}
 #
 set-lit-dash-escape:
-   's'                      set-escape                             doSetAddDash
-   'S'                      set-escape                             doSetAddDash
-   'w'                      set-escape                             doSetAddDash
-   'W'                      set-escape                             doSetAddDash
-   'd'                      set-escape                             doSetAddDash
-   'D'                      set-escape                             doSetAddDash
+   's'                      errorDeath                             doSetOpError
+   'S'                      errorDeath                             doSetOpError
+   'w'                      errorDeath                             doSetOpError
+   'W'                      errorDeath                             doSetOpError
+   'd'                      errorDeath                             doSetOpError
+   'D'                      errorDeath                             doSetOpError
    'N'                      set-after-range                        doSetNamedRange
    default               n  set-after-range                        doSetRange
 

diff --git a/i18n/regeximp.h b/i18n/regeximp.h
index 6944c08..225822d 100644
--- a/i18n/regeximp.h
+++ b/i18n/regeximp.h

@@ -1,5 +1,5 @@
 //
-//   Copyright (C) 2002-2005 International Business Machines Corporation
+//   Copyright (C) 2002-2007 International Business Machines Corporation
 //   and others. All rights reserved.
 //
 //   file:  regeximp.h
@@ -57,7 +57,7 @@
 enum {
      URX_RESERVED_OP   = 0,    // For multi-operand ops, most non-first words.
      URX_RESERVED_OP_N = 255,  // For multi-operand ops, negative operand values.
-     URX_BACKTRACK     = 1,
+     URX_BACKTRACK     = 1,    // Force a backtrack, as if a match test had failed.
      URX_END           = 2,
      URX_ONECHAR       = 3,    // Value field is the 21 bit unicode char to match
      URX_STRING        = 4,    // Value field is index of string start
@@ -96,13 +96,14 @@
                                //    3rd   Operand:  Minimum count.
                                //    4th   Operand:  Max count, -1 for unbounded.
 
-     URX_DOTANY_PL     = 27,   // .+, match rest of the line.  Fail already at end.
+     URX_DOTANY_UNIX   = 27,   // '.' operator in UNIX_LINES mode, only \n marks end of line.
 
      URX_CTR_LOOP      = 28,   // Loop Ops for {interval} loops.
      URX_CTR_LOOP_NG   = 29,   //   Also in three flavors.
                                //   Operand is loc of corresponding CTR_INIT.
 
-     URX_DOTANY_ALL_PL = 30,   // .+, match rest of the Input.  Fail if already at end
+     URX_CARET_M_UNIX  = 30,   // '^' operator, test for start of line in multi-line
+                               //      plus UNIX_LINES mode.
 
      URX_RELOC_OPRND   = 31,   // Operand value in multi-operand ops that refers
                                //   back into compiled pattern code, and thus must
@@ -166,10 +167,16 @@
                                //   Must always immediately follow  LOOP_x_I instruction.
      URX_LOOP_DOT_I    = 52,   // .*, initialization of the optimized loop.
                                //   Operand value:
-                               //      0:  Normal (. doesn't match new-line) mode.
-                               //      1:  . matches new-line mode.
-     URX_BACKSLASH_BU  = 53    // \b or \B in UREGEX_UWORD mode, using Unicode style
+                               //      bit 0:
+                               //         0:  Normal (. doesn't match new-line) mode.
+                               //         1:  . matches new-line mode.
+                               //      bit 1:  controls what new-lines are recognized by this operation.
+                               //         0:  All Unicode New-lines
+                               //         1:  UNIX_LINES, \u000a only.
+     URX_BACKSLASH_BU  = 53,   // \b or \B in UREGEX_UWORD mode, using Unicode style
                                //   word boundaries.
+     URX_DOLLAR_D      = 54,   // $ end of input test, in UNIX_LINES mode.
+     URX_DOLLAR_MD     = 55    // $ end of input test, in MULTI_LINE and UNIX_LINES mode.
 
 };
 
@@ -203,10 +210,10 @@
         "DOLLAR",              \
         "CTR_INIT",            \
         "CTR_INIT_NG",         \
-        "DOTANY_PL",           \
+        "DOTANY_UNIX",         \
         "CTR_LOOP",            \
         "CTR_LOOP_NG",         \
-        "DOTANY_ALL_PL",       \
+        "URX_CARET_M_UNIX",    \
         "RELOC_OPRND",         \
         "STO_SP",              \
         "LD_SP",               \
@@ -229,7 +236,9 @@
         "LOOP_SR_I",           \
         "LOOP_C",              \
         "LOOP_DOT_I",          \
-        "BACKSLASH_BU"
+        "BACKSLASH_BU",        \
+        "DOLLAR_D",            \
+        "DOLLAR_MD"
 
 
 //

diff --git a/i18n/rematch.cpp b/i18n/rematch.cpp
index 2fe46a9..9439e8a 100644
--- a/i18n/rematch.cpp
+++ b/i18n/rematch.cpp

@@ -324,7 +324,7 @@
     //          Watch for interactions with replace operations when fixing.
     int32_t startPos = fMatchEnd;
     if (startPos==0) {
-        startPos = fRegionStart;
+        startPos = fActiveStart;
     }
 
     if (fMatch) {
@@ -334,8 +334,9 @@
         if (fMatchStart == fMatchEnd) {
             // Previous match had zero length.  Move start position up one position
             //  to avoid sending find() into a loop on zero-length matches.
-            if (startPos >= fRegionLimit) {
+            if (startPos >= fActiveLimit) {
                 fMatch = FALSE;
+                fHitEnd = TRUE;
                 return FALSE;
             }
             startPos = fInput->moveIndex32(startPos, 1);
@@ -345,6 +346,7 @@
             // A previous find() failed to match.  Don't try again.
             //   (without this test, a pattern with a zero-length match
             //    could match again at the end of an input string.)
+            fHitEnd = TRUE;
             return FALSE;
         }
     }
@@ -352,9 +354,12 @@
 
     // Compute the position in the input string beyond which a match can not begin, because
     //   the minimum length match would extend past the end of the input.
-    int32_t testLen  = fRegionLimit - fPattern->fMinMatchLen;
+    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
+    //          Be aware of possible overflows if making changes here.
+    int32_t testLen  = fActiveLimit - fPattern->fMinMatchLen;
     if (startPos > testLen) {
         fMatch = FALSE;
+        fHitEnd = TRUE;
         return FALSE;
     }
 
@@ -378,7 +383,7 @@
                 fHitEnd = TRUE;
                 return FALSE;
             }
-            U16_FWD_1(inputBuf, startPos, fRegionLimit);
+            U16_FWD_1(inputBuf, startPos, fActiveLimit);
             // Note that it's perfectly OK for a pattern to have a zero-length
             //   match at the end of a string, so we must make sure that the loop
             //   runs with startPos == testLen the last time through.
@@ -388,7 +393,7 @@
     case START_START:
         // Matches are only possible at the start of the input string
         //   (pattern begins with ^ or \A)
-        if (startPos > fRegionStart) {
+        if (startPos > fActiveStart) {
             fMatch = FALSE;
             return FALSE;
         }
@@ -406,7 +411,7 @@
             U_ASSERT(fPattern->fMinMatchLen > 0);
             for (;;) {
                 int32_t pos = startPos;
-                U16_NEXT(inputBuf, startPos, fRegionLimit, c);  // like c = inputBuf[startPos++];
+                U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
                 if (c<256 && fPattern->fInitialChars8->contains(c) ||
                     c>=256 && fPattern->fInitialChars->contains(c)) {
                     MatchAt(pos, FALSE, fDeferredStatus);
@@ -419,6 +424,7 @@
                 }
                 if (pos >= testLen) {
                     fMatch = FALSE;
+                    fHitEnd = TRUE;
                     return FALSE;
                 }
             }
@@ -433,7 +439,7 @@
             UChar32 theChar = fPattern->fInitialChar;
             for (;;) {
                 int32_t pos = startPos;
-                U16_NEXT(inputBuf, startPos, fRegionLimit, c);  // like c = inputBuf[startPos++];
+                U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
                 if (c == theChar) {
                     MatchAt(pos, FALSE, fDeferredStatus);
                     if (U_FAILURE(fDeferredStatus)) {
@@ -445,6 +451,7 @@
                 }
                 if (pos >= testLen) {
                     fMatch = FALSE;
+                    fHitEnd = TRUE;
                     return FALSE;
                 }
             }
@@ -454,7 +461,7 @@
     case START_LINE:
         {
             UChar32  c;
-            if (startPos == 0) {
+            if (startPos == fAnchorStart) {
                 MatchAt(startPos, FALSE, fDeferredStatus);
                 if (U_FAILURE(fDeferredStatus)) {
                     return FALSE;
@@ -462,32 +469,57 @@
                 if (fMatch) {
                     return TRUE;
                 }
-                U16_NEXT(inputBuf, startPos, fRegionLimit, c);  // like c = inputBuf[startPos++];
+                U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
             }
 
-            for (;;) {
-                c = inputBuf[startPos-1];
-                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
-                        if (c == 0x0d && startPos < fRegionLimit && inputBuf[startPos] == 0x0a) {
-                            startPos++;
-                        }
-                        MatchAt(startPos, FALSE, fDeferredStatus);
-                        if (U_FAILURE(fDeferredStatus)) {
-                            return FALSE;
-                        }
-                        if (fMatch) {
-                            return TRUE;
-                        }
+            if (fPattern->fFlags & UREGEX_UNIX_LINES) {
+               for (;;) {
+                    c = inputBuf[startPos-1];
+                    if (c == 0x0a) {
+                            MatchAt(startPos, FALSE, fDeferredStatus);
+                            if (U_FAILURE(fDeferredStatus)) {
+                                return FALSE;
+                            }
+                            if (fMatch) {
+                                return TRUE;
+                            }
+                    }
+                    if (startPos >= testLen) {
+                        fMatch = FALSE;
+                        fHitEnd = TRUE;
+                        return FALSE;
+                    }
+                    U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
+                    // Note that it's perfectly OK for a pattern to have a zero-length
+                    //   match at the end of a string, so we must make sure that the loop
+                    //   runs with startPos == testLen the last time through.
                 }
-                if (startPos >= testLen) {
-                    fMatch = FALSE;
-                    return FALSE;
+            } else {
+                for (;;) {
+                    c = inputBuf[startPos-1];
+                    if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
+                        ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
+                            if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
+                                startPos++;
+                            }
+                            MatchAt(startPos, FALSE, fDeferredStatus);
+                            if (U_FAILURE(fDeferredStatus)) {
+                                return FALSE;
+                            }
+                            if (fMatch) {
+                                return TRUE;
+                            }
+                    }
+                    if (startPos >= testLen) {
+                        fMatch = FALSE;
+                        fHitEnd = TRUE;
+                        return FALSE;
+                    }
+                    U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
+                    // Note that it's perfectly OK for a pattern to have a zero-length
+                    //   match at the end of a string, so we must make sure that the loop
+                    //   runs with startPos == testLen the last time through.
                 }
-                U16_NEXT(inputBuf, startPos, fRegionLimit, c);  // like c = inputBuf[startPos++];
-                // Note that it's perfectly OK for a pattern to have a zero-length
-                //   match at the end of a string, so we must make sure that the loop
-                //   runs with startPos == testLen the last time through.
             }
         }
 
@@ -511,7 +543,7 @@
     }
     this->reset();                        // Note:  Reset() is specified by Java Matcher documentation.
                                           //        This will reset the region to be the full input length.
-    if (start < fRegionStart || start > fRegionLimit) {
+    if (start < fActiveStart || start > fActiveLimit) {
         status = U_INDEX_OUTOFBOUNDS_ERROR;
         return FALSE;
     }
@@ -613,7 +645,7 @@
         return FALSE;
     }
     resetPreserveRegion();
-    MatchAt(fRegionStart, FALSE, status);
+    MatchAt(fActiveStart, FALSE, status);
     return fMatch;
 }
 
@@ -627,7 +659,7 @@
         return FALSE;
     }
     reset();
-    if (start < fRegionStart || start > fRegionLimit) {
+    if (start < fActiveStart || start > fActiveLimit) {
         status = U_INDEX_OUTOFBOUNDS_ERROR;
         return FALSE;
     }
@@ -651,7 +683,7 @@
         return FALSE;
     }
     resetPreserveRegion();
-    MatchAt(fRegionStart, TRUE, status);
+    MatchAt(fActiveStart, TRUE, status);
     return fMatch;
 }
 
@@ -665,7 +697,7 @@
         return FALSE;
     }
     reset();
-    if (start < fRegionStart || start > fRegionLimit) {
+    if (start < fActiveStart || start > fActiveLimit) {
         status = U_INDEX_OUTOFBOUNDS_ERROR;
         return FALSE;
     }
@@ -701,6 +733,8 @@
     this->reset();
     fRegionStart = start;
     fRegionLimit = limit;
+    fActiveStart = start;
+    fActiveLimit = limit;
     if (!fTransparentBounds) {
         fLookStart = start;
         fLookLimit = limit;
@@ -805,6 +839,8 @@
 RegexMatcher &RegexMatcher::reset() {
     fRegionStart    = 0;
     fRegionLimit    = fInput->length();
+    fActiveStart    = 0;
+    fActiveLimit    = fRegionLimit;
     fAnchorStart    = 0;
     fAnchorLimit    = fRegionLimit;
     fLookStart      = 0;
@@ -849,7 +885,7 @@
         return *this;
     }
     reset();       // Reset also resets the region to be the entire string.
-    if (position < 0 || position >= fRegionLimit) {
+    if (position < 0 || position >= fActiveLimit) {
         status = U_INDEX_OUTOFBOUNDS_ERROR;
         return *this;
     }
@@ -899,7 +935,7 @@
     //
     reset(input);
     int32_t   nextOutputStringStart = 0;
-    if (fRegionLimit == 0) {
+    if (fActiveLimit == 0) {
         return 0;
     }
 
@@ -917,7 +953,7 @@
             //    last capture group saved in favor of the unprocessed remainder of the
             //    input string.)
             i = destCapacity-1;
-            int32_t remainingLength = fRegionLimit-nextOutputStringStart;
+            int32_t remainingLength = fActiveLimit-nextOutputStringStart;
             if (remainingLength > 0) {
                 dest[i].setTo(input, nextOutputStringStart, remainingLength);
             }
@@ -941,7 +977,7 @@
                 dest[i] = group(groupNum, status);
             }
 
-            if (nextOutputStringStart == fRegionLimit) {
+            if (nextOutputStringStart == fActiveLimit) {
                 // The delimiter was at the end of the string.  We're done.
                 break;
             }
@@ -951,7 +987,7 @@
         {
             // We ran off the end of the input while looking for the next delimiter.
             // All the remaining text goes into the current output string.
-            dest[i].setTo(input, nextOutputStringStart, fRegionLimit-nextOutputStringStart);
+            dest[i].setTo(input, nextOutputStringStart, fActiveLimit-nextOutputStringStart);
             break;
         }
     }
@@ -1290,9 +1326,9 @@
 
 
         case URX_ONECHAR:
-            if (fp->fInputIdx < fRegionLimit) {
+            if (fp->fInputIdx < fActiveLimit) {
                 UChar32   c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (c == opValue) {
                     break;
                 }
@@ -1318,7 +1354,7 @@
                 U_ASSERT(opType == URX_STRING_LEN);
                 U_ASSERT(stringLen >= 2);
 
-                if (fp->fInputIdx + stringLen > fRegionLimit) {
+                if (fp->fInputIdx + stringLen > fActiveLimit) {
                     // No match.  String is longer than the remaining input text.
                     //   TODO:  Should fHitEnd only be set if the string matches for whatever amount
                     //          of input is actually available?  Probably, although one could argue
@@ -1360,7 +1396,7 @@
         case URX_END:
             // The match loop will exit via this path on a successful match,
             //   when we reach the end of the pattern.
-            if (toEnd && fp->fInputIdx != fRegionLimit) {
+            if (toEnd && fp->fInputIdx != fActiveLimit) {
                 // The pattern matched, but not to the end of input.  Try some more.
                 fp = (REStackFrame *)fStack->popFrame(frameSize);
                 break;
@@ -1392,6 +1428,7 @@
                                            //     or for position before new line at end of input
             if (fp->fInputIdx < fAnchorLimit-2) {
                 // We are no where near the end of input.  Fail.
+                //   This is the common case.  Keep it first.
                 fp = (REStackFrame *)fStack->popFrame(frameSize);
                 break;
             }
@@ -1405,23 +1442,22 @@
             //   end of input, succeed.
             if (fp->fInputIdx == fAnchorLimit-1) {
                 UChar32 c = fInput->char32At(fp->fInputIdx);
-                if ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029) {
+                if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
                     // If not in the middle of a CR/LF sequence
                     if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
-                        break;
+                        // At new-line at end of input. Success
                         fHitEnd = TRUE;
                         fRequireEnd = TRUE;
-                        // At new-line at end of input. Success
+                        break;
                     }
                 }
             }
 
-            if (fp->fInputIdx == fAnchorLimit-2) {
-                if (fInput->char32At(fp->fInputIdx) == 0x0d && fInput->char32At(fp->fInputIdx+1) == 0x0a) {
+            if (fp->fInputIdx == fAnchorLimit-2 &&
+                 fInput->char32At(fp->fInputIdx) == 0x0d && fInput->char32At(fp->fInputIdx+1) == 0x0a) {
                     fHitEnd = TRUE;
                     fRequireEnd = TRUE;
                     break;                         // At CR/LF at end of input.  Success
-                }
             }
 
             fp = (REStackFrame *)fStack->popFrame(frameSize);
@@ -1429,6 +1465,29 @@
             break;
 
 
+         case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
+            if (fp->fInputIdx >= fAnchorLimit-1) {
+                // Either at the last character of input, or off the end.
+                if (fp->fInputIdx == fAnchorLimit-1) {
+                    // At last char of input.  Success if it's a new line.
+                    if (fInput->char32At(fp->fInputIdx) == 0x0a) {
+                        fHitEnd = TRUE;
+                        fRequireEnd = TRUE;
+                        break;
+                    }
+                } else {
+                    // Off the end of input.  Success.
+                    fHitEnd = TRUE;
+                    fRequireEnd = TRUE;
+                    break;
+                }
+            }
+
+            // Not at end of input.  Back-track out.
+            fp = (REStackFrame *)fStack->popFrame(frameSize);
+            break;
+
+
          case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
              {
                  if (fp->fInputIdx >= fAnchorLimit) {
@@ -1440,7 +1499,7 @@
                  // If we are positioned just before a new-line, succeed.
                  // It makes no difference where the new-line is within the input.
                  UChar32 c = inputBuf[fp->fInputIdx];
-                 if ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029) {
+                 if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
                      // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
                      //  In multi-line mode, hitting a new-line just before the end of input does not
                      //   set the hitEnd or requireEnd flags
@@ -1454,6 +1513,23 @@
              break;
 
 
+         case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
+             {
+                 if (fp->fInputIdx >= fAnchorLimit) {
+                     // We really are at the end of input.  Success.
+                     fHitEnd = TRUE;
+                     fRequireEnd = TRUE;  // TODO:  should require end be set in multi-line mode?
+                     break;
+                 }
+                 // If we are not positioned just before a new-line, the test fails; backtrack out.
+                 // It makes no difference where the new-line is within the input.
+                 if (inputBuf[fp->fInputIdx] != 0x0a) {
+                     fp = (REStackFrame *)fStack->popFrame(frameSize);
+                 }
+             }
+             break;
+
+
        case URX_CARET:                    //  ^, test for start of line
             if (fp->fInputIdx != fAnchorStart) {
                 fp = (REStackFrame *)fStack->popFrame(frameSize);
@@ -1473,6 +1549,7 @@
                if ((fp->fInputIdx < fAnchorLimit) && 
                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
                    //  It's a new-line.  ^ is true.  Success.
+                   //  TODO:  what should be done with positions between a CR and LF?
                    break;
                }
                // Not at the start of a line.  Fail.
@@ -1481,6 +1558,23 @@
            break;
 
 
+       case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
+           {
+               U_ASSERT(fp->fInputIdx >= fAnchorStart);
+               if (fp->fInputIdx <= fAnchorStart) {
+                   // We are at the start input.  Success.
+                   break;
+               }
+               // Check whether character just before the current pos is a new-line
+               U_ASSERT(fp->fInputIdx <= fAnchorLimit);
+               UChar  c = inputBuf[fp->fInputIdx - 1]; 
+               if (c != 0x0a) {
+                   // Not at the start of a line.  Back-track out.
+                   fp = (REStackFrame *)fStack->popFrame(frameSize);
+               }
+           }
+           break;
+
         case URX_BACKSLASH_B:          // Test for word boundaries
             {
                 UBool success = isWordBoundary(fp->fInputIdx);
@@ -1505,7 +1599,7 @@
 
         case URX_BACKSLASH_D:            // Test for decimal digit
             {
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
                     break;
@@ -1525,7 +1619,7 @@
 
 
         case URX_BACKSLASH_G:          // Test for position at end of previous match
-            if (!((fMatch && fp->fInputIdx==fMatchEnd) || fMatch==FALSE && fp->fInputIdx==fRegionStart)) {
+            if (!((fMatch && fp->fInputIdx==fMatchEnd) || fMatch==FALSE && fp->fInputIdx==fActiveStart)) {
                 fp = (REStackFrame *)fStack->popFrame(frameSize);
             }
             break;
@@ -1538,7 +1632,7 @@
             {
 
                 // Fail if at end of input
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
                     break;
@@ -1547,7 +1641,7 @@
                 // Examine (and consume) the current char.
                 //   Dispatch into a little state machine, based on the char.
                 UChar32  c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 UnicodeSet **sets = fPattern->fStaticSets;
                 if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
                 if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
@@ -1561,8 +1655,8 @@
 
 
 GC_L:
-                if (fp->fInputIdx >= fRegionLimit)         goto GC_Done;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (sets[URX_GC_L]->contains(c))       goto GC_L;
                 if (sets[URX_GC_LV]->contains(c))      goto GC_V;
                 if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
@@ -1571,16 +1665,16 @@
                 goto GC_Extend;
 
 GC_V:
-                if (fp->fInputIdx >= fRegionLimit)         goto GC_Done;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (sets[URX_GC_V]->contains(c))       goto GC_V;
                 if (sets[URX_GC_T]->contains(c))       goto GC_T;
                 U16_PREV(inputBuf, 0, fp->fInputIdx, c);
                 goto GC_Extend;
 
 GC_T:
-                if (fp->fInputIdx >= fRegionLimit)         goto GC_Done;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (sets[URX_GC_T]->contains(c))       goto GC_T;
                 U16_PREV(inputBuf, 0, fp->fInputIdx, c);
                 goto GC_Extend;
@@ -1588,26 +1682,26 @@
 GC_Extend:
                 // Combining characters are consumed here
                 for (;;) {
-                    if (fp->fInputIdx >= fRegionLimit) {
+                    if (fp->fInputIdx >= fActiveLimit) {
                         break;
                     }
-                    U16_GET(inputBuf, 0, fp->fInputIdx, fRegionLimit, c);
+                    U16_GET(inputBuf, 0, fp->fInputIdx, fActiveLimit, c);
                     if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
                         break;
                     }
-                    U16_FWD_1(inputBuf, fp->fInputIdx, fRegionLimit);
+                    U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit);
                 }
                 goto GC_Done;
 
 GC_Control:
                 // Most control chars stand alone (don't combine with combining chars),  
                 //   except for that CR/LF sequence is a single grapheme cluster.
-                if (c == 0x0d && fp->fInputIdx < fRegionLimit && inputBuf[fp->fInputIdx] == 0x0a) {
+                if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) {
                     fp->fInputIdx++;
                 }
 
 GC_Done:
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     fHitEnd = TRUE;
                 }
                 break;
@@ -1632,7 +1726,7 @@
                 // The high bit of the op value is a flag for the match polarity.
                 //    0:   success if input char is in set.
                 //    1:   success if input char is not in set.
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
                     break;
@@ -1642,7 +1736,7 @@
                 opValue &= ~URX_NEG_SET;
                 U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
                 UChar32  c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (c < 256) {
                     Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
                     if (s8->contains(c)) {
@@ -1665,7 +1759,7 @@
             {
                 // Test input character for NOT being a member of  one of 
                 //    the predefined sets (Word Characters, for example)
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
                     break;
@@ -1673,7 +1767,7 @@
 
                 U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
                 UChar32  c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (c < 256) {
                     Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
                     if (s8->contains(c) == FALSE) {
@@ -1692,14 +1786,14 @@
             
 
         case URX_SETREF:
-            if (fp->fInputIdx >= fRegionLimit) {
+            if (fp->fInputIdx >= fActiveLimit) {
                 fHitEnd = TRUE;
                 fp = (REStackFrame *)fStack->popFrame(frameSize);
                 break;
             }
             // There is input left.  Pick up one char and test it for set membership.
             UChar32   c;
-            U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
             U_ASSERT(opValue > 0 && opValue < sets->size());
             if (c<256) {
                 Regex8BitSet *s8 = &fPattern->fSets8[opValue];
@@ -1721,7 +1815,7 @@
         case URX_DOTANY:
             {
                 // . matches anything, but stops at end-of-line.
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     // At end of input.  Match failed.  Backtrack out.
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
@@ -1729,7 +1823,7 @@
                 }
                 // There is input left.  Advance over one char, unless we've hit end-of-line
                 UChar32 c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
                     ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
                     // End of line in normal mode.   . does not match.
@@ -1738,12 +1832,12 @@
                 }
             }
             break;
-            
-            
+
+
         case URX_DOTANY_ALL:
             {
                 // ., in dot-matches-all (including new lines) mode
-                if (fp->fInputIdx >= fRegionLimit) {
+                if (fp->fInputIdx >= fActiveLimit) {
                     // At end of input.  Match failed.  Backtrack out.
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
@@ -1752,8 +1846,8 @@
                 // There is input left.  Advance over one char, except if we are
                 //   at a cr/lf, advance over both of them.
                 UChar32 c; 
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
-                if (c==0x0d && fp->fInputIdx < fRegionLimit) {
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
                     // In the case of a CR/LF, we need to advance over both.
                     UChar nextc = inputBuf[fp->fInputIdx];
                     if (nextc == 0x0a) {
@@ -1763,51 +1857,24 @@
             }
             break;
 
-        case URX_DOTANY_PL:
-            // Match all up to and end-of-line or end-of-input.
+
+        case URX_DOTANY_UNIX:
             {
-                //  Fail if input already exhausted.
-                if (fp->fInputIdx >= fRegionLimit) {
+                // '.' operator, matches all, but stops at end-of-line.
+                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
+                if (fp->fInputIdx >= fActiveLimit) {
+                    // At end of input.  Match failed.  Backtrack out.
                     fHitEnd = TRUE;
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
                     break;
                 }
-
-                // There is input left. Fail if we are  at the end of a line.
+                // There is input left.  Advance over one char, unless we've hit end-of-line
                 UChar32 c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
-                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
-                    // End of line in normal mode.   . does not match.
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
+                if (c == 0x0a) {
+                    // End of line in normal mode.   '.' does not match the \n
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
-                    break;
                 }
-                
-                // There was input left.  Consume it until we hit the end of a line,
-                //   or until it's exhausted.
-                for (;;) {
-                    if (fp->fInputIdx >=  fRegionLimit) {
-                        fHitEnd = TRUE;
-                        break;
-                    }
-                    U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
-                    if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
-                        ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
-                        U16_BACK_1(inputBuf, 0, fp->fInputIdx)
-                            // Scan has reached a line-end.  We are done.
-                            break;
-                    }
-                }
-            }
-            break;
-
-        case URX_DOTANY_ALL_PL:
-            // Match up to end of input.  Fail if already at end of input.
-            fHitEnd = TRUE;
-            if (fp->fInputIdx >= fRegionLimit) {
-                fp = (REStackFrame *)fStack->popFrame(frameSize);
-            } else {
-                fp->fInputIdx = fRegionLimit;
             }
             break;
 
@@ -2006,7 +2073,7 @@
                     }
 
                 UBool  haveMatch = FALSE;
-                if (fp->fInputIdx + len <= fRegionLimit) {
+                if (fp->fInputIdx + len <= fActiveLimit) {
                     if (opType == URX_BACKREF) {
                         if (u_strncmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx, len) == 0) {
                             haveMatch = TRUE;
@@ -2054,13 +2121,14 @@
             break;
 
         case URX_LA_START:
-            //  TODO:  setup for trnsaparent bounds, 
             {
                 // Entering a lookahead block.
                 // Save Stack Ptr, Input Pos.
                 U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
                 fData[opValue]   = fStack->size();
                 fData[opValue+1] = fp->fInputIdx;
+                fActiveStart     = fLookStart;          // Set the match region change for
+                fActiveLimit     = fLookLimit;          //   transparent bounds.
             }
             break;
 
@@ -2068,12 +2136,14 @@
             {
                 // Leaving a look-ahead block.
                 //  restore Stack Ptr, Input Pos to positions they had on entry to block.
-                //  TODO:  will need to restore Region bounds as well, for Transparent Bounds.
                 U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
                 int32_t stackSize = fStack->size();
                 int32_t newStackSize = fData[opValue];
                 U_ASSERT(stackSize >= newStackSize);
                 if (stackSize > newStackSize) {
+                    // Copy the current top frame back to the new (cut back) top frame.
+                    //   This makes the capture groups from within the look-ahead
+                    //   expression available.
                     int32_t *newFP = fStack->getBuffer() + newStackSize - frameSize;
                     int32_t i;
                     for (i=0; i<frameSize; i++) {
@@ -2083,13 +2153,18 @@
                     fStack->setSize(newStackSize);
                 }
                 fp->fInputIdx = fData[opValue+1];
+
+                // Restore the active region bounds in the input string; they may have
+                //    been changed because of transparent bounds on a Region.
+                fActiveStart = fRegionStart;        // TODO:  handle nested look-around blocks.
+                fActiveLimit = fRegionLimit;
             }
             break;
 
         case URX_ONECHAR_I:
-            if (fp->fInputIdx < fRegionLimit) {
+            if (fp->fInputIdx < fActiveLimit) {
                 UChar32   c;
-                U16_NEXT(inputBuf, fp->fInputIdx, fRegionLimit, c);
+                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
                 if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
                     break;
                 }
@@ -2115,7 +2190,7 @@
                 stringLen = opValue;
                 
                 int32_t stringEndIndex = fp->fInputIdx + stringLen;
-                if (stringEndIndex <= fRegionLimit) {
+                if (stringEndIndex <= fActiveLimit) {
                     if (u_strncasecmp(inputBuf+fp->fInputIdx, litText+stringStartIdx,
                         stringLen, U_FOLD_CASE_DEFAULT) == 0) {
                         // Success.  Advance the current input position.
@@ -2144,8 +2219,8 @@
                 fData[opValue+2] = -1;
                 // Save input string length, then reset to pin any matches to end at
                 //   the current position.
-                fData[opValue+3] = fRegionLimit;
-                fRegionLimit     = fp->fInputIdx;
+                fData[opValue+3] = fActiveLimit;
+                fActiveLimit     = fp->fInputIdx;
             }
             break;
 
@@ -2184,9 +2259,9 @@
                     //   Look Behind altogether.
                     fp = (REStackFrame *)fStack->popFrame(frameSize);
                     int32_t restoreInputLen = fData[opValue+3];
-                    U_ASSERT(restoreInputLen >= fRegionLimit);
+                    U_ASSERT(restoreInputLen >= fActiveLimit);
                     U_ASSERT(restoreInputLen <= fInput->length());
-                    fRegionLimit = restoreInputLen;
+                    fActiveLimit = restoreInputLen;
                     break;
                 }
 
@@ -2201,7 +2276,7 @@
             // End of a look-behind block, after a successful match.
             {
                 U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
-                if (fp->fInputIdx != fRegionLimit) {
+                if (fp->fInputIdx != fActiveLimit) {
                     //  The look-behind expression matched, but the match did not
                     //    extend all the way to the point that we are looking behind from.
                     //  FAIL out of here, which will take us back to the LB_CONT, which
@@ -2215,9 +2290,9 @@
                 //   which had been truncated to pin the end of the lookbehind match to the 
                 //   position being looked-behind.
                 int32_t originalInputLen = fData[opValue+3];
-                U_ASSERT(originalInputLen >= fRegionLimit);
+                U_ASSERT(originalInputLen >= fActiveLimit);
                 U_ASSERT(originalInputLen <= fInput->length());
-                fRegionLimit = originalInputLen;
+                fActiveLimit = originalInputLen;
             }
             break;
 
@@ -2257,9 +2332,9 @@
                     //  getting a match, which means that the negative lookbehind as
                     //  a whole has succeeded.  Jump forward to the continue location
                     int32_t restoreInputLen = fData[opValue+3];
-                    U_ASSERT(restoreInputLen >= fRegionLimit);
+                    U_ASSERT(restoreInputLen >= fActiveLimit);
                     U_ASSERT(restoreInputLen <= fInput->length());
-                    fRegionLimit = restoreInputLen;
+                    fActiveLimit = restoreInputLen;
                     fp->fPatIdx = continueLoc;
                     break;
                 }
@@ -2275,7 +2350,7 @@
             // End of a negative look-behind block, after a successful match.
             {
                 U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
-                if (fp->fInputIdx != fRegionLimit) {
+                if (fp->fInputIdx != fActiveLimit) {
                     //  The look-behind expression matched, but the match did not
                     //    extend all the way to the point that we are looking behind from.
                     //  FAIL out of here, which will take us back to the LB_CONT, which
@@ -2292,9 +2367,9 @@
                 //   inorder to pin the end of the lookbehind match  
                 //   to the position being looked-behind.
                 int32_t originalInputLen = fData[opValue+3];
-                U_ASSERT(originalInputLen >= fRegionLimit);
+                U_ASSERT(originalInputLen >= fActiveLimit);
                 U_ASSERT(originalInputLen <= fInput->length());
-                fRegionLimit = originalInputLen;
+                fActiveLimit = originalInputLen;
 
                 // Restore original stack position, discarding any state saved
                 //   by the successful pattern match.
@@ -2324,12 +2399,12 @@
                 //   we reach a character that is not a member of the set.
                 int32_t ix = fp->fInputIdx;
                 for (;;) {
-                    if (ix >= fRegionLimit) {
+                    if (ix >= fActiveLimit) {
                         fHitEnd = TRUE;
                         break;
                     }
                     UChar32   c;
-                    U16_NEXT(inputBuf, ix, fRegionLimit, c);
+                    U16_NEXT(inputBuf, ix, fActiveLimit, c);
                     if (c<256) {
                         if (s8->contains(c) == FALSE) {
                             U16_BACK_1(inputBuf, 0, ix);
@@ -2377,32 +2452,35 @@
                 // Loop through input until the input is exhausted (we reach an end-of-line)
                 // In multi-line mode, we can just go straight to the end of the input.
                 int32_t ix;
-                if (opValue == 1) {
+                if ((opValue & 1) == 1) {
                     // Multi-line mode.
-                    ix = fRegionLimit;
+                    ix = fActiveLimit;
                     fHitEnd = TRUE;
                 } else {
                     // NOT multi-line mode.  Line endings do not match '.'
                     // Scan forward until a line ending or end of input.
                     ix = fp->fInputIdx;
                     for (;;) {
-                        if (ix >= fRegionLimit) {
+                        if (ix >= fActiveLimit) {
                             fHitEnd = TRUE;
-                            ix = fRegionLimit;
+                            ix = fActiveLimit;
                             break;
                         }
                         UChar32   c;
-                        U16_NEXT(inputBuf, ix, fRegionLimit, c);   // c = inputBuf[ix++]
-                        if (((c & 0x7f) <= 0x29) &&     
-                            ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
-                            //  char is a line ending.  Put the input pos back to the
-                            //    line ending char, and exit the scanning loop.
-                            U16_BACK_1(inputBuf, 0, ix);
-                            break;
+                        U16_NEXT(inputBuf, ix, fActiveLimit, c);   // c = inputBuf[ix++]
+                        if ((c & 0x7f) <= 0x29) {        // Fast filter of non-new-line-s
+                            if ((c == 0x0a) ||            //  0x0a is newline in both modes.
+                               ((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
+                                    (c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029) {
+                                //  char is a line ending.  Put the input pos back to the
+                                //    line ending char, and exit the scanning loop.
+                                U16_BACK_1(inputBuf, 0, ix);
+                                break;
+                            }
                         }
                     }
                 }
-                
+
                 // If there were no matching characters, skip over the loop altogether.
                 //   The loop doesn't run at all, a * op always succeeds.
                 if (ix == fp->fInputIdx) {
@@ -2412,7 +2490,7 @@
 
                 // Peek ahead in the compiled pattern, to the URX_LOOP_C that
                 //   must follow.  It's operand is the stack location
-                //   that holds the starting input index for the match of this [set]*
+                //   that holds the starting input index for the match of this .*
                 int32_t loopcOp = pat[fp->fPatIdx];
                 U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
                 int32_t stackLoc = URX_VAL(loopcOp);

diff --git a/i18n/repattrn.cpp b/i18n/repattrn.cpp
index 8cf55d7..17b721e 100644
--- a/i18n/repattrn.cpp
+++ b/i18n/repattrn.cpp

@@ -244,7 +244,8 @@
     }
 
     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
-            UREGEX_DOTALL   | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
+                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
+                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES;
 
     if ((flags & ~allFlags) != 0) {
         status = U_REGEX_INVALID_FLAG;
@@ -451,8 +452,6 @@
     case URX_NOP:
     case URX_DOTANY:
     case URX_DOTANY_ALL:
-    case URX_DOTANY_PL:
-    case URX_DOTANY_ALL_PL:
     case URX_FAIL:
     case URX_CARET:
     case URX_DOLLAR:

diff --git a/i18n/unicode/regex.h b/i18n/unicode/regex.h
index e2af308..cabf1a5 100644
--- a/i18n/unicode/regex.h
+++ b/i18n/unicode/regex.h

@@ -16,7 +16,7 @@
 #ifndef REGEX_H
 #define REGEX_H
 
-// #define REGEX_DEBUG
+#define REGEX_DEBUG
 
 /**
  * \file
@@ -1067,6 +1067,12 @@
     int32_t              fLookStart;       // Region bounds for look-ahead/behind and
     int32_t              fLookLimit;       //   and other boundary tests.  See
                                            //   useTransparentBounds
+
+    int32_t              fActiveStart;     // Currently active bouonds for matching.
+    int32_t              fActiveLimit;     //   Usually is the same as region, but
+                                           //   is changed to fLookStart/Limit when
+                                           //   entering look around regions.
+
     UBool                fTransparentBounds;  // True if using transparent bounds.
     UBool                fAnchoringBounds; // True if using anchoring bounds.
 

diff --git a/i18n/unicode/uregex.h b/i18n/unicode/uregex.h
index 660076a..3e04f7a 100644
--- a/i18n/unicode/uregex.h
+++ b/i18n/unicode/uregex.h

@@ -71,7 +71,7 @@
       *         early (\u) we should still do.
       * @draft ICU 4.0
       */
-    UREGEG_LITERAL = 16,
+    UREGEX_LITERAL = 16,
 
     /**   Control behavior of "$" and "^"
       *    If set, recognize line terminators within string,
@@ -84,7 +84,7 @@
       *    in the behavior of ., ^, and $.
       *   @draft ICU 4.0
       */
-    URGEGX_UNIX_LINES = 1,
+    UREGEX_UNIX_LINES = 1,
 
     /**  Unicode word boundaries.
       *     If set, \b uses the Unicode TR 29 definition of word boundaries.
@@ -455,6 +455,7 @@
   *  uregex_start(), uregex_end() and uregex_group() to return an error 
   *  indicating that there is no match information available.  Clears any
   *  match region that may have been set.
+  *    TODO:  reset(-1) to preserve regions?
   *
   *    @param   regexp      The compiled regular expression.
   *    @param   index       The position in the text at which a

diff --git a/test/cintltst/reapits.c b/test/cintltst/reapits.c
index 955bdd2..367d555 100644
--- a/test/cintltst/reapits.c
+++ b/test/cintltst/reapits.c

@@ -34,6 +34,22 @@
 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
 log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}}
 
+#define TEST_SETUP(pattern, testString, flags) {  \
+    status = U_ZERO_ERROR; \
+    re = uregex_openC(pattern, flags, NULL, &status);  \
+    TEST_ASSERT_SUCCESS(status);   \
+    UChar   *srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
+    u_uastrncpy(srcString, testString,  strlen(testString)+1); \
+    uregex_setText(re, srcString, -1, &status); \
+    TEST_ASSERT_SUCCESS(status);
+    
+#define TEST_TEARDOWN  \
+    TEST_ASSERT_SUCCESS(status);  \
+    uregex_close(re);  \
+    free(srcString);   \
+    }
+
+
 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
      char     buf_inside_macro[120];
      int32_t  len = (int32_t)strlen(expected);
@@ -548,22 +564,6 @@
     /*
      *  Regions
      */
-     #define TEST_SETUP(pattern, testString, flags) {  \
-         status = U_ZERO_ERROR; \
-         re = uregex_openC(pattern, flags, NULL, &status);  \
-         TEST_ASSERT_SUCCESS(status);   \
-         UChar   *srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
-         u_uastrncpy(srcString, testString,  strlen(testString)+1); \
-         uregex_setText(re, srcString, -1, &status); \
-         TEST_ASSERT_SUCCESS(status);
-         
-     #define TEST_TEARDOWN  \
-         TEST_ASSERT_SUCCESS(status);  \
-         uregex_close(re);  \
-         free(srcString);   \
-         }
-         
-         
         
         
         // SetRegion(), getRegion() do something
@@ -656,14 +656,41 @@
         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
         TEST_TEARDOWN;
 
-        // requireEnd
         TEST_SETUP("abcd$", "abcd", 0);
         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
         TEST_TEARDOWN;
-
-         
         
+        // anchoringBounds
+        TEST_SETUP("abc$", "abcdef", 0);
+        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
+        uregex_useAnchoringBounds(re, FALSE, &status);
+        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
+        
+        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
+        uregex_useAnchoringBounds(re, TRUE, &status);
+        uregex_setRegion(re, 0, 3, &status);
+        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
+        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
+        TEST_TEARDOWN;
+        
+        // Transparent Bounds
+        TEST_SETUP("abc(?=def)", "abcdef", 0);
+        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
+        uregex_useTransparentBounds(re, TRUE, &status);
+        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
+        
+        uregex_useTransparentBounds(re, FALSE, &status);
+        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    // No Region
+        uregex_setRegion(re, 0, 3, &status);
+        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   // with region, opaque bounds
+        uregex_useTransparentBounds(re, TRUE, &status);
+        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    // with region, transparent bounds
+        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
+        TEST_TEARDOWN;
+        
+        
+
 #if 0
         status = U_ZERO_ERROR;
         uregex_reset(re, 0, &status);

diff --git a/test/intltest/regextst.cpp b/test/intltest/regextst.cpp
index e984c2a..6ab96db 100644
--- a/test/intltest/regextst.cpp
+++ b/test/intltest/regextst.cpp

@@ -1360,7 +1360,7 @@
 
     RegexMatcher    quotedStuffMat("\\s*([\\'\\\"/])(.*?)\\1", 0, status);
     RegexMatcher    commentMat    ("\\s*(#.*)?$", 0, status);
-    RegexMatcher    flagsMat      ("\\s*([ixsmdteEGMvatyYzZ2-9]*)([:letter:]*)", 0, status);
+    RegexMatcher    flagsMat      ("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)", 0, status);
 
     RegexMatcher    lineMat("(.*?)\\r?\\n", testString, 0, status);
     UnicodeString   testPattern;   // The pattern for test from the test file.
@@ -1506,8 +1506,9 @@
     int32_t             numFinds;
     int32_t             i;
     UBool               useMatchesFunc   = FALSE;
-    int32_t             regionStart    = -1;
-    int32_t             regionEnd      = -1;
+    UBool               useLookingAtFunc = FALSE;
+    int32_t             regionStart      = -1;
+    int32_t             regionEnd        = -1;
 
     //
     //  Compile the caller's pattern
@@ -1529,6 +1530,9 @@
     if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
         bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
     }
+    if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag
+        bflags |= UREGEX_UNIX_LINES;
+    }
 
 
     callerPattern = RegexPattern::compile(pattern, bflags, pe, status);
@@ -1581,6 +1585,9 @@
     if (flags.indexOf((UChar)0x4d) >= 0) {
         useMatchesFunc = TRUE;
     }
+    if (flags.indexOf((UChar)0x4c) >= 0) {
+        useLookingAtFunc = TRUE;
+    }
 
     //
     //  Find the tags in the input data, remove them, and record the group boundary
@@ -1644,10 +1651,14 @@
 
     //
     // Do a find on the de-tagged input using the caller's pattern
+    //     TODO: error on count>1 and not find().
+    //           error on both matches() and lookingAt().
     //
     for (i=0; i<numFinds; i++) {
         if (useMatchesFunc) {
             isMatch = matcher->matches(status);
+        } else  if (useLookingAtFunc) {
+            isMatch = matcher->lookingAt(status);
         } else {
             isMatch = matcher->find();
         }
@@ -1702,22 +1713,22 @@
 
     if ((flags.indexOf((UChar)0x59) >= 0) &&   //  'Y' flag:  RequireEnd() == false
         matcher->requireEnd() == TRUE) {
-        errln("requireEnd() returned TRUE.  Expected FALSE");
+        errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE", line);
         failed = TRUE;
     }
     if ((flags.indexOf((UChar)0x79) >= 0) &&   //  'y' flag:  RequireEnd() == true
         matcher->requireEnd() == FALSE) {
-        errln("requireEnd() returned FALSE.  Expected TRUE");
+        errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE", line);
         failed = TRUE;
     }
     if ((flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
         matcher->hitEnd() == TRUE) {
-        errln("hitEnd() returned TRUE.  Expected FALSE");
+        errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE", line);
         failed = TRUE;
     }
     if ((flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
         matcher->hitEnd() == FALSE) {
-        errln("hitEnd() returned FALSE.  Expected TRUE");
+        errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE", line);
         failed = TRUE;
     }
 

diff --git a/test/testdata/re_tests.txt b/test/testdata/re_tests.txt
index b863571..c18b638 100644
--- a/test/testdata/re_tests.txt
+++ b/test/testdata/re_tests.txt

@@ -822,7 +822,7 @@
 .[X](.+)+[X][X]	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
 .[X][X](.+)+[X]	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
 tt+$	xxxtt	y	-	-
-([a-\d]+)	za-9z	y	$1	a-9
+([a-\d]+)	za-9z	yi	$1	a-9
 ([\d-z]+)	a0-za	y	$1	0-z
 ([\d-\s]+)	a0- z	y	$1	0- 
 ([a-[:digit:]]+)	za-9z	y	$1	a-9

diff --git a/test/testdata/regextst.txt b/test/testdata/regextst.txt
index 3a51386..01a867e 100644
--- a/test/testdata/regextst.txt
+++ b/test/testdata/regextst.txt

@@ -22,6 +22,7 @@
 #                                   s      dot-matches-all mode
 #                                   m      multi-line mode.  
 #                                            ($ and ^ match at embedded new-lines)
+#                                   D      Unix Lines mode (only recognize 0x0a as new-line)
 #                                   v      If icu configured without break iteration, this
 #                                          regex test pattern should not compile.
 #                                   e      set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag
@@ -32,6 +33,7 @@
 #                                          for the last find() in the sequence.
 #                                   G      Only check match / no match.  Do not check capture groups.
 #                                   E      Pattern compilation error expected
+#                                   L      Use LookingAt() rather than find()
 #                                   M      Use matches() rather than find().
 #
 #                                   a      Use non-Anchoring Bounds.
@@ -45,6 +47,114 @@
 #                                 White space must be present between the flags and the match string.
 #
 
+# Look-ahead expressions
+#
+"abc(?=def)"                   "<0>abc</0>def"
+"(.*)(?=c)"                    "<0><1>ab</1></0>cdef"
+
+"(?:.*)(?=c)"                  "<r>ab</r>cdef"
+"(?:.*)(?=c)"             b    "<r><0>ab</0></r>cdef"      # transparent bounds
+"(?:.*)(?=c)"             bM   "<r><0>ab</0></r>cdef"      # transparent bounds
+
+"(?:.*)(?=(c))"           b    "<0>ab</0><1>c</1>def"      # Capture in look-ahead
+"(?=(.)\1\1)\1"                "abcc<0><1>d</1></0>ddefg"  # Backrefs to look-ahead capture
+
+".(?!\p{L})"                   "abc<0>d</0> "              # Negated look-ahead
+".(?!(\p{L}))"                 "abc<0>d</0> "              # Negated look-ahead, no capture
+                                                           #   visible outside of look-ahead
+
+#
+# Negated Lookahead, various regions and region transparency
+#
+"abc(?!def)"                   "<0>abc</0>xyz"
+"abc(?!def)"                   "abcdef"
+"abc(?!def)"                   "<r><0>abc</0></r>def"
+"abc(?!def)"              b    "<r>abc</r>def"
+"abc(?!def)"              b    "<r><0>abc</0></r>xyz"
+
+#
+#  Anchoring Bounds
+#
+"^def$"                        "abc<r><0>def</0></r>ghi"           # anchoring (default) bounds
+"^def$"                  a     "abc<r>def</r>ghi"                  # non-anchoring bounds
+"^def"                   a     "<r><0>def</0></r>ghi"              # non-anchoring bounds
+"def$"                   a     "abc<r><0>def</0></r>"              # non-anchoring bounds
+
+"^.*$"                   m     "<0>line 1</0>\n line 2"
+"^.*$"                   m2    "line 1\n<0> line 2</0>"
+"^.*$"                   m3    "line 1\n line 2"
+"^.*$"                   m     "li<r><0>ne </0></r>1\n line 2"     # anchoring bounds
+"^.*$"                   m2    "li<r>ne </r>1\n line 2"            # anchoring bounds
+"^.*$"                  am     "li<r>ne </r>1\n line 2"            # non-anchoring bounds
+"^.*$"                  am     "li\n<r><0>ne </0></r>\n1\n line 2" # non-anchoring bounds
+
+#
+#  HitEnd and RequireEnd for new-lines just before end-of-input
+#
+"xyz$"                  yz     "<0>xyz</0>\n"
+"xyz$"                  yz     "<0>xyz</0>\x{d}\x{a}"
+
+"xyz$"                 myz     "<0>xyz</0>"                        # multi-line mode
+"xyz$"                 mYZ     "<0>xyz</0>\n" 
+"xyz$"                 mYZ     "<0>xyz</0>\r\n"
+"xyz$"                 mYZ     "<0>xyz</0>\x{85}abcd"
+
+"xyz$"                  Yz     "xyz\nx"
+"xyz$"                  Yz     "xyza"
+"xyz$"                  yz     "<0>xyz</0>"
+
+#
+#  All Unicode line endings recognized.
+#     0a, 0b, 0c, 0d, 0x85, 0x2028, 0x2029
+#     Multi-line and non-multiline mode take different paths, so repeated tests.
+#
+"^def$"                 mYZ    "abc\x{a}<0>def</0>\x{a}ghi"
+"^def$"                 mYZ    "abc\x{b}<0>def</0>\x{b}ghi"
+"^def$"                 mYZ    "abc\x{c}<0>def</0>\x{c}ghi"
+"^def$"                 mYZ    "abc\x{d}<0>def</0>\x{d}ghi"
+"^def$"                 mYZ    "abc\x{85}<0>def</0>\x{85}ghi"
+"^def$"                 mYZ    "abc\x{2028}<0>def</0>\x{2028}ghi"
+"^def$"                 mYZ    "abc\x{2029}<0>def</0>\x{2029}ghi"
+"^def$"                 mYZ    "abc\r\n<0>def</0>\r\nghi"
+
+"^def$"                 yz     "<0>def</0>\x{a}"
+"^def$"                 yz     "<0>def</0>\x{b}"
+"^def$"                 yz     "<0>def</0>\x{c}"
+"^def$"                 yz     "<0>def</0>\x{d}"
+"^def$"                 yz     "<0>def</0>\x{85}"
+"^def$"                 yz     "<0>def</0>\x{2028}"
+"^def$"                 yz     "<0>def</0>\x{2029}"
+"^def$"                 yz     "<0>def</0>\r\n"
+"^def$"                 yz     "<0>def</0>"
+
+
+"^def$"                       "<0>def</0>\x{2028"    #TODO: should be an error of some sort.
+
+#
+#  UNIX_LINES mode
+#
+"abc$"                 D      "<0>abc</0>\n"
+"abc$"                 D      "abc\r"
+"abc$"                 D      "abc\u0085"
+"a.b"                  D      "<0>a\rb</0>"
+"a.b"                  D      "a\nb"
+"(?d)abc$"                    "<0>abc</0>\n"
+"(?d)abc$"                    "abc\r"
+"abc$"                 mD     "<0>abc</0>\ndef"
+"abc$"                 mD     "abc\rdef"
+
+".*def"                L      "abc\r def xyz"          # Normal mode, LookingAt() stops at \r
+".*def"                DL     "<0>abc\r def</0> xyz"   # Unix Lines mode, \r not line end.
+".*def"                DL     "abc\n def xyz"   
+
+"(?d)a.b"                     "a\nb"
+"(?d)a.b"                     "<0>a\rb</0>"
+
+"^abc"                 m      "xyz\r<0>abc</0>"
+"^abc"                 Dm     "xyz\rabc"
+"^abc"                 Dm     "xyz\n<0>abc</0>"
+
+
 
 # Capturing parens
 ".(..)."                       "<0>a<1>bc</1>d</0>"
@@ -442,9 +552,10 @@
 
 #
 #  Octal Escaping.   This conforms to Java conventions, not Perl.
-"\0101\0\03\073\0154\01442"      "<0>A\u0000\u0003\u003b\u006c\u0064\u0032</0>"
+"\0101\00\03\073\0154\01442"      "<0>A\u0000\u0003\u003b\u006c\u0064\u0032</0>"
 "\0776"                          "<0>\u003f\u0036</0>"  # overflow, the 6 is literal.
 "\0376xyz"                       "<0>\u00fexyz</0>"
+"\08"                        E   "<0>\u00008</0>"
 
 #
 #  \u Surrogate Pairs
@@ -454,6 +565,12 @@
 "\ud800\ud800\udc00"              "<0>\ud800\U00010000</0>\U00010000\U00010000\U00010001"
 "(\ud800)(\udc00)"                "\U00010000"
 
+#
+# hitEnd with find()
+#
+"abc"                        Z    "aa<0>abc</0>  abcab"
+"abc"                       2Z    "aaabc  <0>abc</0>ab"
+"abc"                       3z    "aa>abc  abcab"
 
 #
 # Bug 3225
@@ -652,7 +769,7 @@
 "[abcd-[bc]]+"                    "<0>bad--dac</0>xyz"
 "[abcd-]+"                        "<0>bad--dac</0>xyz"
 
-"[abcd-\s]+"                      "xyz<0>abcd  --</0>xyz"      # set-lit-dash-esc
+"[abcd-\s]+"                 E    "xyz<0>abcd  --</0>xyz"      # set-lit-dash-esc
 "[abcd-\N{LATIN SMALL LETTER G}]+"  "xyz-<0>abcdefg</0>hij-"
 "[bcd-\{]+"                       "a<0>bcdefyz{</0>|}"
 
@@ -682,6 +799,8 @@
 "\p{InBasicLatin}+"                "ΓΔΕΖΗΘ<0>hello, world.</0>ニヌネノハバパ"
 "\P{InBasicLatin}+"                "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
 "\p{InGreek}+"                     "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InCombining Marks for Symbols}" "<0>\u20d0</0>"
+"\p{Incombiningmarksforsymbols}"    "<0>\u20d0</0>"
 
 
 "\p{javaDefined}+"                 "\uffff<0>abcd</0>\U00045678"
@@ -717,6 +836,11 @@
 "\Q\Y\E"                       e   "<0>\\Y</0>"
 
 #
+# Reported problem
+#
+"[a-\w]"                       E  "x"
+
+#
 # Bug 4045
 #
 "A*"                              "<0>AAAA</0>"
@@ -785,6 +909,7 @@
 # bug 5386  "^.*$" should match empty input
 #
 "^.*$"                            "<0></0>"
+"^.*$"                     m      "<0></0>"
 "^.*$"                            "<0></0>\n"
 "(?s)^.*$"                        "<0>\n</0>"
commit	a733874a9800264e7b41d880049ac4bf9d7f405f	[log] [tgz]
author	Jean-Baptiste Queru <jbq@google.com>	Fri Jul 17 17:41:21 2009 -0700
committer	Jean-Baptiste Queru <jbq@google.com>	Fri Jul 17 17:41:21 2009 -0700
tree	441afd6147fa76fab98f166726c0f4f192081ca8
parent	a09443ea25f9d6e6cfae4b8e438c7a6ced388a06 [diff]