Improve character range generation (#508)

This patch improve the 8 bit and \p{Any} handling of the code.

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index dd9b053..4c90a11 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -5276,17 +5276,15 @@
   start         start of range character
   end           end of range character
 
-Returns:        the number of < 256 characters added
-                the pointer to extra data is updated
+Returns:        cb->classbits is updated
 */
 
-static unsigned int
+static void
 add_to_class(uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
 {
 uint8_t *classbits = cb->classbits;
 uint32_t c;
 uint32_t classbits_end = (end <= 0xff ? end : 0xff);
-unsigned int n8 = 0;
 
 /* If caseless matching is required, scan the range and process alternate
 cases. In Unicode, there are 8-bit characters that have alternate cases that
@@ -5300,22 +5298,14 @@
 #endif  /* SUPPORT_UNICODE */
     /* Not UTF mode */
     for (c = start; c <= classbits_end; c++)
-      {
       SETBIT(classbits, cb->fcc[c]);
-      n8++;
-      }
   }
 
 /* Use the bitmap for characters < 256. Otherwise use extra data.*/
 
 for (c = start; c <= classbits_end; c++)
-  {
   /* Regardless of start, c will always be <= 255. */
   SETBIT(classbits, c);
-  n8++;
-  }
-
-return n8;    /* Number of 8-bit characters */
 }
 
 
@@ -5335,24 +5325,21 @@
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR
 
-Returns:        the number of < 256 characters added
-                the pointer to extra data is updated
+Returns:        cb->classbits is updated
 */
 
-static unsigned int
+static void
 add_list_to_class(uint32_t options, compile_block *cb, const uint32_t *p)
 {
-unsigned int n8 = 0;
 while (p[0] < 256)
   {
   unsigned int n = 0;
 
   while(p[n+1] == p[0] + n + 1) n++;
-  n8 += add_to_class(options, cb, p[0], p[n]);
+  add_to_class(options, cb, p[0], p[n]);
 
   p += n + 1;
   }
-return n8;
 }
 #endif
 
@@ -5372,24 +5359,20 @@
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR
 
-Returns:        the number of < 256 characters added
-                the pointer to extra data is updated
+Returns:        cb->classbits is updated
 */
 
-static unsigned int
+static void
 add_not_list_to_class(uint32_t options, compile_block *cb, const uint32_t *p)
 {
-unsigned int n8 = 0;
 if (p[0] > 0)
-  n8 += add_to_class(options, cb, 0, p[0] - 1);
+  add_to_class(options, cb, 0, p[0] - 1);
 while (p[0] < 256)
   {
   while (p[1] == p[0] + 1) p++;
-  n8 += add_to_class(options, cb,
-    p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
+  add_to_class(options, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
   p++;
   }
-return n8;
 }
 #endif
 
@@ -5466,6 +5449,23 @@
 
 
 
+#ifdef SUPPORT_WIDE_CHARS
+
+/*************************************************
+*  Extended Class (xclass) related properties    *
+*************************************************/
+
+/* XClass needs to be generated. */
+#define XCLASS_REQUIRED 0x1
+/* XClass has 8 bit character. */
+#define XCLASS_HAS_8BIT_CHARS 0x2
+/* XClass has properties. */
+#define XCLASS_HAS_PROPS 0x4
+/* XClass matches to all >= 256 characters. */
+#define XCLASS_HIGH_ANY 0x8
+
+#endif
+
 /*************************************************
 *           Compile one branch                   *
 *************************************************/
@@ -5550,8 +5550,8 @@
 /* Helper variables for OP_XCLASS opcode (for characters > 255). */
 
 #ifdef SUPPORT_WIDE_CHARS
+uint32_t xclass_props;
 PCRE2_UCHAR *class_uchardata;
-BOOL xclass;
 class_ranges* cranges;
 #endif
 
@@ -5584,14 +5584,10 @@
 
 for (;; pptr++)
   {
-#ifdef SUPPORT_WIDE_CHARS
-  BOOL xclass_has_prop;
-#endif
   BOOL negate_class;
   BOOL should_flip_negation;
   BOOL possessive_quantifier;
   BOOL note_group_empty;
-  int class_has_8bitchar;
   uint32_t mclength;
   uint32_t skipunits;
   uint32_t subreqcu, subfirstcu;
@@ -5864,6 +5860,8 @@
     might match. */
 
 #ifdef SUPPORT_WIDE_CHARS
+    xclass_props = 0;
+
 #if PCRE2_CODE_UNIT_WIDTH == 8
     cranges = NULL;
 
@@ -5895,22 +5893,23 @@
         PCRE2_ASSERT(cranges != NULL);
         cb->cranges = cranges->next;
         }
+
+      if (cranges->range_list_size > 0)
+        {
+        uint32_t *ranges = (uint32_t*)(cranges + 1);
+
+        if (ranges[0] <= 255)
+          xclass_props |= XCLASS_HAS_8BIT_CHARS;
+
+        if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
+            ranges[cranges->range_list_size - 2] <= 256)
+          xclass_props |= XCLASS_HIGH_ANY;
+        }
       }
 
-    xclass = FALSE;
     class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
 #endif
 
-    /* For optimization purposes, we track some properties of the class:
-    class_has_8bitchar will be non-zero if the class contains at least one
-    character with a code point less than 256; xclass_has_prop will be TRUE if
-    Unicode property checks are present in the class. */
-
-    class_has_8bitchar = 0;
-#ifdef SUPPORT_WIDE_CHARS
-    xclass_has_prop = FALSE;
-#endif
-
     /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
     in a temporary bit of memory, in case the class contains fewer than two
     8-bit characters because in that case the compiled code doesn't use the bit
@@ -5956,17 +5955,28 @@
             case PC_PUNCT:
 
             if (lengthptr != NULL)
-              *lengthptr += 3;
+              {
+              if ((xclass_props & XCLASS_HIGH_ANY) == 0)
+                {
+                *lengthptr += 3;
+                xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
+                }
+              }
             else
               {
-              *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
-              *class_uchardata++ = (PCRE2_UCHAR)
-                ((posix_class == PC_GRAPH)? PT_PXGRAPH :
-                 (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
-              *class_uchardata++ = 0;
+              uint32_t ptype = ((posix_class == PC_GRAPH)? PT_PXGRAPH :
+                (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
+
+              PRIV(update_classbits)(ptype, 0, !local_negate, classbits);
+
+              if ((xclass_props & XCLASS_HIGH_ANY) == 0)
+                {
+                *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
+                *class_uchardata++ = (PCRE2_UCHAR)ptype;
+                *class_uchardata++ = 0;
+                xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
+                }
               }
-            xclass = TRUE;
-            xclass_has_prop = TRUE;
             continue;
 
             /* For the other POSIX classes (ex: ascii) we are going to
@@ -6032,9 +6042,10 @@
         else
           for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
 
+#ifdef SUPPORT_UNICODE
         /* Every class contains at least one < 256 character. */
-
-        class_has_8bitchar = 1;
+        xclass_props |= XCLASS_HAS_8BIT_CHARS;
+#endif
         continue;               /* End of POSIX handling */
         }
 
@@ -6062,10 +6073,6 @@
           }
         escape = META_DATA(meta);
 
-        /* Every class contains at least one < 256 character. */
-
-        class_has_8bitchar++;
-
         switch(escape)
           {
           case ESC_d:
@@ -6116,7 +6123,7 @@
 #ifdef SUPPORT_UNICODE
           if (cranges != NULL) break;
 #endif
-          (void)add_list_to_class(options & ~PCRE2_CASELESS,
+          add_list_to_class(options & ~PCRE2_CASELESS,
             cb, PRIV(hspace_list));
 #else
           PCRE2_ASSERT(cranges != NULL);
@@ -6128,7 +6135,7 @@
 #ifdef SUPPORT_UNICODE
           if (cranges != NULL) break;
 #endif
-          (void)add_not_list_to_class(options & ~PCRE2_CASELESS,
+          add_not_list_to_class(options & ~PCRE2_CASELESS,
             cb, PRIV(hspace_list));
 #else
           PCRE2_ASSERT(cranges != NULL);
@@ -6140,7 +6147,7 @@
 #ifdef SUPPORT_UNICODE
           if (cranges != NULL) break;
 #endif
-          (void)add_list_to_class(options & ~PCRE2_CASELESS,
+          add_list_to_class(options & ~PCRE2_CASELESS,
             cb, PRIV(vspace_list));
 #else
           PCRE2_ASSERT(cranges != NULL);
@@ -6152,7 +6159,7 @@
 #ifdef SUPPORT_UNICODE
           if (cranges != NULL) break;
 #endif
-          (void)add_not_list_to_class(options & ~PCRE2_CASELESS,
+          add_not_list_to_class(options & ~PCRE2_CASELESS,
             cb, PRIV(vspace_list));
 #else
           PCRE2_ASSERT(cranges != NULL);
@@ -6181,24 +6188,35 @@
               }
 
             if (lengthptr != NULL)
-              *lengthptr += 3;
+              {
+              if ((xclass_props & XCLASS_HIGH_ANY) == 0)
+                {
+                *lengthptr += 3;
+                xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
+                }
+              }
             else
               {
-              PRIV(update_classbits)(ptype, pdata, (escape == ESC_P),
-                classbits);
+              PRIV(update_classbits)(ptype, pdata,
+                (escape == ESC_P), classbits);
 
-              *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
-              *class_uchardata++ = ptype;
-              *class_uchardata++ = pdata;
+              if ((xclass_props & XCLASS_HIGH_ANY) == 0)
+                {
+                *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
+                *class_uchardata++ = ptype;
+                *class_uchardata++ = pdata;
+                xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
+                }
               }
-            xclass = TRUE;
-            xclass_has_prop = TRUE;
-            class_has_8bitchar--;                /* Undo! */
             }
-          break;
+          continue;
 #endif
           }
 
+#ifdef SUPPORT_WIDE_CHARS
+        /* Every non-property class contains at least one < 256 character. */
+        xclass_props |= XCLASS_HAS_8BIT_CHARS;
+#endif
         continue;
         }  /* End handling \d-type escapes */
 
@@ -6236,6 +6254,7 @@
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
           if (cranges != NULL) continue;
+          xclass_props |= XCLASS_HAS_8BIT_CHARS;
 #endif
 
           /* In an EBCDIC environment, Perl treats alphabetic ranges specially
@@ -6255,31 +6274,26 @@
 
             if (C <= CHAR_i)
               {
-              class_has_8bitchar +=
-                add_to_class(options, cb, C + uc,
-                  ((D < CHAR_i)? D : CHAR_i) + uc);
+              add_to_class(options, cb, C + uc,
+                ((D < CHAR_i)? D : CHAR_i) + uc);
               C = CHAR_j;
               }
 
             if (C <= D && C <= CHAR_r)
               {
-              class_has_8bitchar +=
-                add_to_class(options, cb, C + uc,
-                  ((D < CHAR_r)? D : CHAR_r) + uc);
+              add_to_class(options, cb, C + uc,
+                ((D < CHAR_r)? D : CHAR_r) + uc);
               C = CHAR_s;
               }
 
             if (C <= D)
-              {
-              class_has_8bitchar +=
-                add_to_class(options, cb, C + uc, D + uc);
-              }
+              add_to_class(options, cb, C + uc, D + uc);
             }
           else
 #endif
           /* Not an EBCDIC special range */
 
-          class_has_8bitchar += add_to_class(options, cb, c, d);
+          add_to_class(options, cb, c, d);
 #else
           PCRE2_ASSERT(cranges != NULL);
 #endif
@@ -6290,10 +6304,11 @@
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #ifdef SUPPORT_UNICODE
         if (cranges != NULL) continue;
+        xclass_props |= XCLASS_HAS_8BIT_CHARS;
 #endif
         /* Handle a single character. */
 
-        class_has_8bitchar += add_to_class(options, cb, meta, meta);
+        add_to_class(options, cb, meta, meta);
 #else
         PCRE2_ASSERT(cranges != NULL);
         continue;
@@ -6301,26 +6316,49 @@
         }
       }   /* End of main class-processing loop */
 
+    /* If this class is the first thing in the branch, there can be no first
+    char setting, whatever the repeat count. Any reqcu setting must remain
+    unchanged after any kind of repeat. */
+
+    if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+    zerofirstcu = firstcu;
+    zerofirstcuflags = firstcuflags;
+    zeroreqcu = reqcu;
+    zeroreqcuflags = reqcuflags;
+
 #ifdef SUPPORT_WIDE_CHARS
+    PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
+                 (xclass_props & XCLASS_HIGH_ANY) == 0);
+
     if (cranges != NULL)
       {
       uint32_t *range = (uint32_t*)(cranges + 1);
       uint32_t *end = range + cranges->range_list_size;
 
+      if (!negate_class && (xclass_props & XCLASS_HIGH_ANY) != 0 &&
+          range + 2 == end && range[0] == 0)
+        {
+        *code++ = OP_ALLANY;
+
+        if (lengthptr == NULL)
+          cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
+        break;  /* End of class processing */
+        }
+
       while (range < end && range[0] < 256)
         {
+        PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
         /* Add range to bitset. */
-        class_has_8bitchar +=
-          add_to_class(options, cb, range[0], range[1]);
+        add_to_class(options, cb, range[0], range[1]);
 
         if (range[1] > 255) break;
         range += 2;
         }
 
-      if (!xclass_has_prop && range < end && range[0] <= 256 &&
-          range[1] >= (utf ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE))
+      if ((xclass_props & XCLASS_HIGH_ANY) != 0)
         {
-        PCRE2_ASSERT(range + 2 == end);
+        PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
+          range[1] >= GET_MAX_CHAR_VALUE(utf));
         should_flip_negation = TRUE;
         range = end;
         }
@@ -6331,7 +6369,7 @@
         uint32_t range_end = range[1];
 
         range += 2;
-        xclass = TRUE;
+        xclass_props |= XCLASS_REQUIRED;
 
         if (range_start < 256) range_start = 256;
 
@@ -6390,16 +6428,6 @@
       }
 #endif
 
-    /* If this class is the first thing in the branch, there can be no first
-    char setting, whatever the repeat count. Any reqcu setting must remain
-    unchanged after any kind of repeat. */
-
-    if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
-    zerofirstcu = firstcu;
-    zerofirstcuflags = firstcuflags;
-    zeroreqcu = reqcu;
-    zeroreqcuflags = reqcuflags;
-
     /* If there are characters with values > 255, or Unicode property settings
     (\p or \P), we have to compile an extended class, with its own opcode,
     unless there were no property settings and there was a negated special such
@@ -6424,18 +6452,18 @@
     the bitmap in the actual compiled code. */
 
 #ifdef SUPPORT_WIDE_CHARS  /* Defined for 16/32 bits, or 8-bit with Unicode */
-    if (xclass)
+    if ((xclass_props & XCLASS_REQUIRED) != 0)
       {
       *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
       *code++ = OP_XCLASS;
       code += LINK_SIZE;
       *code = negate_class? XCL_NOT:0;
-      if (xclass_has_prop) *code |= XCL_HASPROP;
+      if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;
 
       /* If the map is required, move up the extra data to make room for it;
       otherwise just move the code pointer to the end of the extra data. */
 
-      if (class_has_8bitchar > 0)
+      if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
         {
         *code++ |= XCL_MAP;
         (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
@@ -6468,9 +6496,9 @@
       {
       if (negate_class)
         {
-       /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
-       for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
-       }
+        /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
+        for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+        }
       memcpy(code, classbits, 32);
       }
     code += 32 / sizeof(PCRE2_UCHAR);
diff --git a/src/pcre2_compile.h b/src/pcre2_compile.h
index 36ba6eb..d760b59 100644
--- a/src/pcre2_compile.h
+++ b/src/pcre2_compile.h
@@ -183,6 +183,9 @@
 #define MAX_UCHAR_VALUE 0xffffffffu
 #endif
 
+#define GET_MAX_CHAR_VALUE(utf) \
+  ((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
+
 /* Macro for setting individual bits in class bitmaps. */
 
 #define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
diff --git a/src/pcre2_compile_class.c b/src/pcre2_compile_class.c
index 27cd424..36fab67 100644
--- a/src/pcre2_compile_class.c
+++ b/src/pcre2_compile_class.c
@@ -250,7 +250,7 @@
 return MAX_UTF_CODE_POINT;
 #else
 #ifdef SUPPORT_UNICODE
-return (options & PARSE_CLASS_UTF) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE;
+return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
 #else
 return MAX_UCHAR_VALUE;
 #endif
@@ -356,6 +356,16 @@
         case ESC_p:
         case ESC_P:
         ptr++;
+        if (meta_arg == ESC_p && *ptr == PT_ANY)
+          {
+          if (buffer != NULL)
+            {
+            buffer[0] = 0;
+            buffer[1] = get_highest_char(options);
+            buffer += 2;
+            }
+          total_size += 2;
+          }
         break;
         }
       ptr++;
diff --git a/testdata/testinput11 b/testdata/testinput11
index e0ad891..a4957b3 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -383,4 +383,8 @@
 
 /[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B
 
+/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B
+
+/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B
+
 # End of testinput11
diff --git a/testdata/testinput5 b/testdata/testinput5
index 0c9e167..7a7e347 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
Binary files differ
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 297e36f..9c21a19 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -691,4 +691,18 @@
         End
 ------------------------------------------------------------------
 
+/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B
+------------------------------------------------------------------
+        Bra
+        AllAny
+        #
+        AllAny{5}
+        AllAny{0,2}?
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B
+Failed: error 134 at offset 34: character code point value in \x{} or \o{} is too large
+
 # End of testinput11
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 3a0b101..ad5c17f 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -697,4 +697,25 @@
         End
 ------------------------------------------------------------------
 
+/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B
+------------------------------------------------------------------
+        Bra
+        [\x00-\xff\x{100}-\x{ffff}]
+        #
+        [\x00-\xff\x{100}-\x{ffff}]{5,7}?
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B
+------------------------------------------------------------------
+        Bra
+        AllAny
+        #
+        AllAny{5}
+        AllAny{0,2}?
+        Ket
+        End
+------------------------------------------------------------------
+
 # End of testinput11
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 8027972..0257094 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1365,7 +1365,7 @@
 /[\W\p{Any}]/B
 ------------------------------------------------------------------
         Bra
-        [\x00-\xff\p{Any}\x{100}-\x{ffff}]
+        AllAny
         Ket
         End
 ------------------------------------------------------------------
@@ -1377,7 +1377,7 @@
 /[\W\pL]/B
 ------------------------------------------------------------------
         Bra
-        [\x00-/:-^`-\xff\p{L}\x{100}-\x{ffff}]
+        [\x00-/:-^`-\xff] (neg)
         Ket
         End
 ------------------------------------------------------------------
@@ -1394,7 +1394,7 @@
 /[\s[:^ascii:]]/B,ucp
 ------------------------------------------------------------------
         Bra
-        [\x09-\x0d \x80-\xff\p{Xsp}\x{100}-\x{ffff}]
+        [\x09-\x0d \x80-\xff] (neg)
         Ket
         End
 ------------------------------------------------------------------
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 32cbd04..8e4a3f2 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1359,7 +1359,7 @@
 /[\W\p{Any}]/B
 ------------------------------------------------------------------
         Bra
-        [\x00-\xff\p{Any}\x{100}-\x{ffffffff}]
+        AllAny
         Ket
         End
 ------------------------------------------------------------------
@@ -1371,7 +1371,7 @@
 /[\W\pL]/B
 ------------------------------------------------------------------
         Bra
-        [\x00-/:-^`-\xff\p{L}\x{100}-\x{ffffffff}]
+        [\x00-/:-^`-\xff] (neg)
         Ket
         End
 ------------------------------------------------------------------
@@ -1388,7 +1388,7 @@
 /[\s[:^ascii:]]/B,ucp
 ------------------------------------------------------------------
         Bra
-        [\x09-\x0d \x80-\xff\p{Xsp}\x{100}-\x{ffffffff}]
+        [\x09-\x0d \x80-\xff] (neg)
         Ket
         End
 ------------------------------------------------------------------
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index c9a0993..0789ce0 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
Binary files differ