Fix auto-possessification bug in variable length lookbehinds
diff --git a/ChangeLog b/ChangeLog
index 299bd04..765de17 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -56,10 +56,16 @@
of a partial match in some cases. It involves partial matching when (*F) is
present so is unlikely to have actually affected anyone.
-9. Tidy the wording and formatting of some pcre2test error messages concerned
+9. Tidy the wording and formatting of some pcre2test error messages concerned
with bad modifiers. Also restrict single-letter modifier sequences to the first
item in a modifier list, as documented and always intended.
+10. An iterator at the end of many assertions can always be auto-possessified,
+but not at the end of variable-length lookbehinds. THere was a bug in the code
+that checks for such a lookbehind; it was looking only at the first branch,
+which is wrong because some branches can be fixed length when others are not,
+for example (?<=AB|CD?). Now all branches are checked for variability.
+
Version 10.44 07-June-2024
--------------------------
diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c
index 566c464..e801a92 100644
--- a/src/pcre2_auto_possess.c
+++ b/src/pcre2_auto_possess.c
@@ -581,7 +581,7 @@
continue;
}
- /* At the end of a branch, skip to the end of the group. */
+ /* At the end of a branch, skip to the end of the group and process it. */
if (c == OP_ALT)
{
@@ -638,19 +638,29 @@
return FALSE;
break;
- /* Atomic sub-patterns and assertions can always auto-possessify their
- last iterator except for variable length lookbehinds. However, if the
- group was entered as a result of checking a previous iterator, this is
- not possible. */
+ /* Atomic sub-patterns and forward assertions can always auto-possessify
+ their last iterator. However, if the group was entered as a result of
+ checking a previous iterator, this is not possible. */
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ONCE:
return !entered_a_group;
+ /* Fixed-length lookbehinds can be treated the same way, but variable
+ length lookbehinds must not auto-possessify their last iterator. Note
+ that in order to identify a variable length lookbehind we must check
+ through all branches, because some may be of fixed length. */
+
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
- return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group;
+ do
+ {
+ if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE; /* Variable */
+ bracode += GET(bracode, 1);
+ }
+ while (*bracode == OP_ALT);
+ return !entered_a_group; /* Not variable length */
/* Non-atomic assertions - don't possessify last iterator. This needs
more thought. */
diff --git a/testdata/testinput1 b/testdata/testinput1
index 46ad307..69457b3 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6668,4 +6668,7 @@
\= Expect no match
xaax
+/(?<=PQ|Pc.b?)(.?)(b?)/
+ Pc.b
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 2f6d70b..5ebd787 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -6128,5 +6128,15 @@
xa{1,2}x
\= Expect no match
xaax
+
+# --------------
+
+/(?<=|b?)./B
+
+/(?=|b?)./B
+
+/(?>|b?)./B
+
+/(?<=xy|a.b?|cd)/B
# End of testinput2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 06a64dd..bfaa544 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10522,4 +10522,10 @@
xaax
No match
+/(?<=PQ|Pc.b?)(.?)(b?)/
+ Pc.b
+ 0: b
+ 1: b
+ 2:
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 4767db7..22ee248 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -18081,6 +18081,64 @@
\= Expect no match
xaax
No match
+
+# --------------
+
+/(?<=|b?)./B
+------------------------------------------------------------------
+ Bra
+ Assert back
+ Alt
+ VReverse
+ b?
+ Ket
+ Any
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?=|b?)./B
+------------------------------------------------------------------
+ Bra
+ Assert
+ Alt
+ b?+
+ Ket
+ Any
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?>|b?)./B
+------------------------------------------------------------------
+ Bra
+ Once
+ Alt
+ b?+
+ Ket
+ Any
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?<=xy|a.b?|cd)/B
+------------------------------------------------------------------
+ Bra
+ Assert back
+ Reverse
+ xy
+ Alt
+ VReverse
+ a
+ Any
+ b?
+ Alt
+ Reverse
+ cd
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)