Implement vreverse in jit (#319)
Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 538e7b0..00d33a0 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -338,6 +338,12 @@
BOOL inlined_pattern;
} recurse_backtrack;
+typedef struct vreverse_backtrack {
+ backtrack_common common;
+ /* Return to the matching path. */
+ struct sljit_label *matchingpath;
+} vreverse_backtrack;
+
#define OP_THEN_TRAP OP_TABLE_LENGTH
typedef struct then_trap_backtrack {
@@ -858,6 +864,21 @@
return count;
}
+static BOOL find_vreverse(PCRE2_SPTR cc)
+{
+ SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
+
+ do
+ {
+ if (cc[1 + LINK_SIZE] == OP_VREVERSE)
+ return TRUE;
+ cc += GET(cc, 1);
+ }
+ while (*cc == OP_ALT);
+
+ return FALSE;
+}
+
/* Functions whose might need modification for all new supported opcodes:
next_opcode
check_opcode_types
@@ -928,6 +949,7 @@
case OP_KETRMIN:
case OP_KETRPOS:
case OP_REVERSE:
+ case OP_VREVERSE:
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -1067,9 +1089,6 @@
case OP_THEN_ARG:
return cc + 1 + 2 + cc[1];
- case OP_VREVERSE:
- return NULL;
-
default:
SLJIT_UNREACHABLE();
return NULL;
@@ -1803,7 +1822,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
- case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRAPOS:
@@ -1815,6 +1833,19 @@
bracketlen = 1 + LINK_SIZE;
break;
+ case OP_ASSERTBACK_NA:
+ common->private_data_ptrs[cc - common->start] = private_data_ptr;
+ private_data_ptr += sizeof(sljit_sw);
+
+ if (find_vreverse(cc))
+ {
+ common->private_data_ptrs[cc + 1 - common->start] = 1;
+ private_data_ptr += sizeof(sljit_sw);
+ }
+
+ bracketlen = 1 + LINK_SIZE;
+ break;
+
case OP_CBRAPOS:
case OP_SCBRAPOS:
common->private_data_ptrs[cc - common->start] = private_data_ptr;
@@ -4105,6 +4136,37 @@
#endif /* SUPPORT_UNICODE */
}
+static void skip_valid_char(compiler_common *common)
+{
+DEFINE_COMPILER;
+#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+struct sljit_jump *jump;
+#endif
+
+#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+ if (common->utf)
+ {
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+ JUMPHERE(jump);
+ return;
+ }
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+}
+
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
@@ -6685,16 +6747,17 @@
#ifdef SUPPORT_UNICODE
if (ucp)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ucp_Mn);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ucp_Pc);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- JUMPHERE(jump);
OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
}
else
@@ -6731,16 +6794,17 @@
if (ucp)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ucp_Mn);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ucp_Pc);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- JUMPHERE(jump);
}
else
#endif /* SUPPORT_UNICODE */
@@ -7637,6 +7701,7 @@
case PT_CLIST:
case PT_UCNC:
+ case PT_PXXDIGIT:
unicode_status |= XCLASS_SAVE_CHAR;
break;
@@ -8090,8 +8155,10 @@
break;
case PT_WORD:
- OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Mn - typeoffset);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Pc - typeoffset);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
/* Fall through. */
case PT_ALNUM:
@@ -8235,6 +8302,40 @@
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
+ case PT_PXXDIGIT:
+ SET_CHAR_OFFSET(CHAR_0);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(CHAR_A);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_F - CHAR_A);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(CHAR_a);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_f - CHAR_a);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff10);
+ jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
+
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff21);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff41);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(CHAR_a);
+
+ JUMPHERE(jump);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
+ jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+ break;
+
default:
SLJIT_UNREACHABLE();
break;
@@ -8259,11 +8360,7 @@
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
{
DEFINE_COMPILER;
-int length;
struct sljit_jump *jump[4];
-#ifdef SUPPORT_UNICODE
-struct sljit_label *label;
-#endif /* SUPPORT_UNICODE */
switch(type)
{
@@ -8498,36 +8595,6 @@
}
JUMPHERE(jump[0]);
return cc;
-
- case OP_REVERSE:
- length = GET(cc, 0);
- if (length == 0)
- return cc + LINK_SIZE;
- if (HAS_VIRTUAL_REGISTERS)
- {
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- }
- else
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
-#ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
- label = LABEL();
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
- move_back(common, backtracks, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
-#endif
- {
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
- }
- check_start_used_ptr(common);
- return cc + LINK_SIZE;
}
SLJIT_UNREACHABLE();
return cc;
@@ -8826,35 +8893,14 @@
if (check_str_ptr)
detect_partial_match(common, backtracks);
#ifdef SUPPORT_UNICODE
- if (common->utf)
+ if (common->utf && common->invalid_utf)
{
- if (common->invalid_utf)
- {
- read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
- return cc;
- }
-
-#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if PCRE2_CODE_UNIT_WIDTH == 8
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#elif PCRE2_CODE_UNIT_WIDTH == 16
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- JUMPHERE(jump[0]);
+ read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
return cc;
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
}
#endif /* SUPPORT_UNICODE */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+ skip_valid_char(common);
return cc;
case OP_ANYBYTE:
@@ -9840,6 +9886,106 @@
#undef CALLOUT_ARG_SIZE
#undef CALLOUT_ARG_OFFSET
+static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack = NULL;
+jump_list **reverse_failed;
+unsigned int lmin, lmax;
+struct sljit_jump *jump;
+#ifdef SUPPORT_UNICODE
+struct sljit_label *label;
+#endif
+
+SLJIT_ASSERT(parent->top == NULL);
+
+if (*cc == OP_REVERSE)
+ {
+ reverse_failed = &parent->topbacktracks;
+ lmin = GET(cc, 1);
+ lmax = lmin;
+ cc += 1 + LINK_SIZE;
+
+ SLJIT_ASSERT(lmin > 0);
+ }
+else
+ {
+ SLJIT_ASSERT(*cc == OP_VREVERSE);
+ PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
+
+ reverse_failed = &backtrack->topbacktracks;
+ lmin = GET(cc, 1);
+ lmax = GET(cc, 1 + IMM2_SIZE);
+ cc += 1 + 2 * LINK_SIZE;
+
+ SLJIT_ASSERT(lmin < lmax);
+ }
+
+if (HAS_VIRTUAL_REGISTERS)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ }
+else
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
+
+#ifdef SUPPORT_UNICODE
+if (common->utf)
+ {
+ if (lmin > 0)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
+ label = LABEL();
+ add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
+ move_back(common, reverse_failed, FALSE);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+
+ if (lmin < lmax)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
+ label = LABEL();
+ jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+ move_back(common, reverse_failed, FALSE);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+
+ JUMPHERE(jump);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
+ }
+ }
+else
+#endif
+ {
+ if (lmin > 0)
+ {
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
+ add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
+ }
+
+ if (lmin < lmax)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
+ SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
+
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
+ }
+ }
+
+check_start_used_ptr(common);
+
+if (lmin < lmax)
+ BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
+
+return cc;
+}
+
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
{
while (TRUE)
@@ -9879,6 +10025,8 @@
int extrasize;
BOOL local_quit_available = FALSE;
BOOL needs_control_head;
+BOOL end_block_size = 0;
+BOOL has_vreverse;
int private_data_ptr;
backtrack_common altbacktrack;
PCRE2_SPTR ccbegin;
@@ -9908,6 +10056,7 @@
bra = *cc;
cc++;
}
+
private_data_ptr = PRIVATE_DATA(cc);
SLJIT_ASSERT(private_data_ptr != 0);
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
@@ -9927,12 +10076,17 @@
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
}
+if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
+ end_block_size = 3;
+
if (framesize < 0)
{
extrasize = 1;
if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
extrasize = 0;
+ extrasize += end_block_size;
+
if (needs_control_head)
extrasize++;
@@ -9950,18 +10104,19 @@
if (needs_control_head)
{
- SLJIT_ASSERT(extrasize == 2);
+ SLJIT_ASSERT(extrasize == end_block_size + 2);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
}
}
else
{
- extrasize = needs_control_head ? 3 : 2;
+ extrasize = (needs_control_head ? 3 : 2) + end_block_size;
+
+ OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
allocate_stack(common, framesize + extrasize);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
if (needs_control_head)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
@@ -9969,16 +10124,22 @@
if (needs_control_head)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
}
else
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
}
+if (end_block_size > 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
+ OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
+ }
+
memset(&altbacktrack, 0, sizeof(backtrack_common));
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
{
@@ -10003,7 +10164,13 @@
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
altbacktrack.cc = ccbegin;
- compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
+ ccbegin += 1 + LINK_SIZE;
+
+ has_vreverse = (*ccbegin == OP_VREVERSE);
+ if (*ccbegin == OP_REVERSE || has_vreverse)
+ ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
+
+ compile_matchingpath(common, ccbegin, cc, &altbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
if (local_quit_available)
@@ -10019,6 +10186,13 @@
common->accept = save_accept;
return NULL;
}
+
+ if (has_vreverse)
+ {
+ SLJIT_ASSERT(altbacktrack.top != NULL);
+ add_jump(compiler, &altbacktrack.top->nextbacktracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ }
+
common->accept_label = LABEL();
if (common->accept != NULL)
set_jumps(common->accept, common->accept_label);
@@ -10031,6 +10205,9 @@
else if (extrasize > 0)
free_stack(common, extrasize);
+ if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
@@ -10040,12 +10217,20 @@
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+
+ if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
else
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
+ if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
@@ -10131,8 +10316,11 @@
JUMPHERE(jump);
}
+if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+
if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
@@ -10145,8 +10333,8 @@
/* The topmost item should be 0. */
if (bra == OP_BRAZERO)
{
- if (extrasize == 2)
- free_stack(common, 1);
+ if (extrasize >= 2)
+ free_stack(common, extrasize - 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else if (extrasize > 0)
@@ -10180,8 +10368,9 @@
/* Keep the STR_PTR on the top of the stack. */
if (bra == OP_BRAZERO)
{
+ /* This allocation is always successful. */
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- if (extrasize == 2)
+ if (extrasize >= 2)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
}
else if (bra == OP_BRAMINZERO)
@@ -10201,8 +10390,9 @@
else
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
- if (extrasize == 2)
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
+
+ if (extrasize == 2 + end_block_size)
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
if (bra == OP_BRAMINZERO)
@@ -10210,7 +10400,7 @@
}
else
{
- SLJIT_ASSERT(extrasize == 3);
+ SLJIT_ASSERT(extrasize == 3 + end_block_size);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
}
@@ -10247,8 +10437,8 @@
if (bra != OP_BRA)
{
- if (extrasize == 2)
- free_stack(common, 1);
+ if (extrasize >= 2)
+ free_stack(common, extrasize - 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else if (extrasize > 0)
@@ -10390,7 +10580,7 @@
#endif /* SUPPORT_UNICODE */
-static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
+static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
{
DEFINE_COMPILER;
@@ -10479,6 +10669,7 @@
assert_backtrack *assert;
BOOL has_alternatives;
BOOL needs_control_head = FALSE;
+BOOL has_vreverse = FALSE;
struct sljit_jump *jump;
struct sljit_jump *skip;
struct sljit_label *rmax_label = NULL;
@@ -10728,6 +10919,21 @@
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
}
}
+else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ allocate_stack(common, 4);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
+
+ has_vreverse = (*matchingpath == OP_VREVERSE);
+ if (*matchingpath == OP_REVERSE || has_vreverse)
+ matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
+ }
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
/* Saving the previous value. */
@@ -10735,6 +10941,9 @@
allocate_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+
+ if (*matchingpath == OP_REVERSE)
+ matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
}
else if (has_alternatives)
{
@@ -10854,14 +11063,28 @@
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return NULL;
-if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+switch (opcode)
+ {
+ case OP_ASSERTBACK_NA:
+ if (has_vreverse)
+ {
+ SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
+ add_jump(compiler, &backtrack->top->nextbacktracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ }
-if (opcode == OP_ONCE)
- match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
-
-if (opcode == OP_SCRIPT_RUN)
- match_script_run_common(common, private_data_ptr, backtrack);
+ if (PRIVATE_DATA(ccbegin + 1))
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ break;
+ case OP_ASSERT_NA:
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ break;
+ case OP_ONCE:
+ match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+ break;
+ case OP_SCRIPT_RUN:
+ match_script_run_common(common, private_data_ptr, backtrack);
+ break;
+ }
stacksize = 0;
if (repeat_type == OP_MINUPTO)
@@ -12134,7 +12357,6 @@
case OP_DOLLM:
case OP_CIRC:
case OP_CIRCM:
- case OP_REVERSE:
case OP_NOT_UCP_WORD_BOUNDARY:
case OP_UCP_WORD_BOUNDARY:
cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
@@ -12692,6 +12914,7 @@
assert_backtrack *assert;
BOOL has_alternatives;
BOOL needs_control_head = FALSE;
+BOOL has_vreverse;
struct sljit_jump *brazero = NULL;
struct sljit_jump *next_alt = NULL;
struct sljit_jump *once = NULL;
@@ -12912,7 +13135,18 @@
{
ccprev = cc + 1 + LINK_SIZE;
cc += GET(cc, 1);
- if (opcode != OP_COND && opcode != OP_SCOND)
+
+ has_vreverse = FALSE;
+ if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
+ {
+ SLJIT_ASSERT(private_data_ptr != 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
+ has_vreverse = (*ccprev == OP_VREVERSE);
+ if (*ccprev == OP_REVERSE || has_vreverse)
+ ccprev = compile_reverse_matchingpath(common, ccprev, current);
+ }
+ else if (opcode != OP_COND && opcode != OP_SCOND)
{
if (opcode != OP_ONCE)
{
@@ -12924,15 +13158,30 @@
else
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
}
+
compile_matchingpath(common, ccprev, cc, current);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
- if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ switch (opcode)
+ {
+ case OP_ASSERTBACK_NA:
+ if (has_vreverse)
+ {
+ SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
+ add_jump(compiler, ¤t->top->nextbacktracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ }
- if (opcode == OP_SCRIPT_RUN)
- match_script_run_common(common, private_data_ptr, current);
+ if (PRIVATE_DATA(ccbegin + 1))
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ break;
+ case OP_ASSERT_NA:
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ break;
+ case OP_SCRIPT_RUN:
+ match_script_run_common(common, private_data_ptr, current);
+ break;
+ }
}
/* Instructions after the current alternative is successfully matched. */
@@ -13063,6 +13312,15 @@
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
}
+else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
+ free_stack(common, 4);
+ }
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
@@ -13270,6 +13528,23 @@
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
}
+static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *label;
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
+skip_valid_char(common);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
+JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
+
+label = LABEL();
+sljit_set_label(jump, label);
+set_jumps(current->topbacktracks, label);
+}
+
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
@@ -13480,6 +13755,10 @@
set_jumps(current->topbacktracks, LABEL());
break;
+ case OP_VREVERSE:
+ compile_vreverse_backtrackingpath(common, current);
+ break;
+
case OP_THEN_TRAP:
/* A virtual opcode for then traps. */
compile_then_trap_backtrackingpath(common, current);
diff --git a/testdata/testinput4 b/testdata/testinput4
index 8a20991..34f187a 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -1497,7 +1497,7 @@
Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
/^[[:xdigit:]]*/utf,ucp
- 1a\x{660}\x{bef}\x{16ee}\=no_jit
+ 1a\x{660}\x{bef}\x{16ee}
/^\d+/utf,ucp
1\x{660}\x{bef}\x{16ee}
@@ -2845,21 +2845,21 @@
23AB56
/\w+/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
/[\w]+/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
/[[:word:]]+/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
/[[:xdigit:]]+/utf,ucp
- --123ef\x{ff10}\x{ff19}\x{ff21}\x{ff26}\x{ff1a}\=no_jit
+ --123ef\x{ff10}\x{ff19}\x{ff21}\x{ff26}\x{ff1a}
/\b.+?\b/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
/caf\B.+?\B/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
# End of testinput4
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 4b89fb8..cb121fa 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -2450,7 +2450,7 @@
0: Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
/^[[:xdigit:]]*/utf,ucp
- 1a\x{660}\x{bef}\x{16ee}\=no_jit
+ 1a\x{660}\x{bef}\x{16ee}
0: 1a
/^\d+/utf,ucp
@@ -4552,27 +4552,27 @@
0: A
/\w+/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
0: cafe\x{300}_au\x{203f}lait
/[\w]+/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
0: cafe\x{300}_au\x{203f}lait
/[[:word:]]+/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
0: cafe\x{300}_au\x{203f}lait
/[[:xdigit:]]+/utf,ucp
- --123ef\x{ff10}\x{ff19}\x{ff21}\x{ff26}\x{ff1a}\=no_jit
+ --123ef\x{ff10}\x{ff19}\x{ff21}\x{ff26}\x{ff1a}
0: 123ef\x{ff10}\x{ff19}\x{ff21}\x{ff26}
/\b.+?\b/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
0: cafe\x{300}_au\x{203f}lait
/caf\B.+?\B/utf,ucp
- --cafe\x{300}_au\x{203f}lait!\=no_jit
+ --cafe\x{300}_au\x{203f}lait!
0: cafe
# End of testinput4