Make data for OP_REVERSE use IMM2_SIZE instead of LINK_SIZE, for consistency with OP_VREVERSE
diff --git a/ChangeLog b/ChangeLog
index 0dfb0e1..003d3ad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -153,6 +153,9 @@
39. Add a test for ridiculous ovector offset values to the substring extraction
functions.
+40. Make OP_REVERSE use IMM2_SIZE for its data instead of LINK_SIZE, for
+consistency with OP_VREVERSE.
+
Version 10.42 11-December-2022
------------------------------
diff --git a/HACKING b/HACKING
index 3c450c9..b806a6f 100644
--- a/HACKING
+++ b/HACKING
@@ -735,15 +735,16 @@
Backward assertions use the opcodes OP_ASSERTBACK, OP_ASSERTBACK_NA, and
OP_ASSERTBACK_NOT. If all the branches of a backward assertion are of fixed
length (not necessarily the same), the first opcode inside each branch is
-OP_REVERSE, followed by a LINK_SIZE count of the number of characters to move
+OP_REVERSE, followed by an IMM2_SIZE count of the number of characters to move
back the pointer in the subject string, thus allowing each branch to have a
different (but fixed) length.
Variable-length backward assertions whose maximum matching length is limited
are also supported. For such assertions, the first opcode inside each branch is
OP_VREVERSE, followed by the minimum and maximum lengths for that branch,
-unless these happen to be equal, in which case OP_REVERSE is used. These values
-occupy two code units each in 8-bit mode, and 1 code unit in 16/32 bit modes.
+unless these happen to be equal, in which case OP_REVERSE is used. These
+IMM2_SIZE values occupy two code units each in 8-bit mode, and 1 code unit in
+16/32 bit modes.
In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are
also the number of code units, but in UTF-8/16 mode each character may occupy
@@ -849,4 +850,4 @@
opcode are the correct length, in order to catch updating errors.
Philip Hazel
-July 2023
+November 2023
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index a7abcc7..feb5bcd 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -8450,8 +8450,8 @@
lookbehindminlength == lookbehindlength)
{
*code++ = OP_REVERSE;
- PUTINC(code, 0, lookbehindlength);
- length += 1 + LINK_SIZE;
+ PUT2INC(code, 0, lookbehindlength);
+ length += 1 + IMM2_SIZE;
}
else
{
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 9eb4fca..e90c984 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -635,8 +635,8 @@
end_code = this_start_code;
do
{
- uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
- size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
+ uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0;
+ size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE);
if (back <= gone_back)
{
int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index b10dcad..e580818 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1778,7 +1778,7 @@
1+LINK_SIZE, /* KetRmax */ \
1+LINK_SIZE, /* KetRmin */ \
1+LINK_SIZE, /* KetRpos */ \
- 1+LINK_SIZE, /* Reverse */ \
+ 1+IMM2_SIZE, /* Reverse */ \
1+2*IMM2_SIZE, /* VReverse */ \
1+LINK_SIZE, /* Assert */ \
1+LINK_SIZE, /* Assert not */ \
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index c5fbbc0..7fac32b 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -5718,7 +5718,7 @@
we move back a number of characters, not bytes. */
case OP_REVERSE:
- number = GET(Fecode, 1);
+ number = GET2(Fecode, 1);
#ifdef SUPPORT_UNICODE
if (utf)
{
@@ -5742,7 +5742,7 @@
/* Save the earliest consulted character, then skip to next opcode */
if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
- Fecode += 1 + LINK_SIZE;
+ Fecode += 1 + IMM2_SIZE;
break;
diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c
index 4eb04db..c2d8b3e 100644
--- a/src/pcre2_printint.c
+++ b/src/pcre2_printint.c
@@ -429,7 +429,7 @@
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
- if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+ if (print_lengths) fprintf(f, "%3d ", GET2(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;