Snap for 11685116 from 899a2f6ca30351cefdc39bda1142ab5ccaf85276 to mainline-adservices-release

Change-Id: Ie3fd1ce9a91932060c08d71e4652bb2060987b52
diff --git a/apf2java/apf2java.sh b/apf2java/apf2java.sh
index 77f1c6f..b46fea6 100755
--- a/apf2java/apf2java.sh
+++ b/apf2java/apf2java.sh
@@ -13,7 +13,7 @@
 's@: swap +@: gen.addSwap();@;'\
 's@: neg +r([01])@: gen.addNeg(R\1);@;'\
 's@: jmp +(PASS|DROP)@: gen.addJump(\1_LABEL);@;'\
-'s@: jnebs +r0, 0x([0-9a-f]+), ([0-9]+), ([0-9a-f]+)@: gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("\3"), LABEL_\2);@;'\
+'s@: jbsne +r0, 0x([0-9a-f]+), ([0-9]+), ([0-9a-f]+)@: gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("\3"), LABEL_\2);@;'\
 's@: jeq +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1Equals(0x\2, LABEL_\3);@;'\
 's@: jne +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1NotEquals(0x\2, LABEL_\3);@;'\
 's@: jlt +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1LessThan(0x\2, LABEL_\3);@;'\
diff --git a/apf2java/apf2java.txt b/apf2java/apf2java.txt
index 761a8e1..6012332 100644
--- a/apf2java/apf2java.txt
+++ b/apf2java/apf2java.txt
@@ -15,7 +15,7 @@
       47: jne         r0, 0x806, 122
       52: li          r0, 14
       54: li          r1, -152
-      57: jnebs       r0, 0x6, 570, 000108000604
+      57: jbsne       r0, 0x6, 570, 000108000604
       68: ldh         r0, [20]
       70: jeq         r0, 0x1, 104
       73: li          r1, -156
@@ -25,10 +25,10 @@
       85: jeq         r0, 0x0, 570
       90: li          r0, 0
       91: li          r1, -56
-      93: jnebs       r0, 0x6, 564, ffffffffffff
+      93: jbsne       r0, 0x6, 564, ffffffffffff
      104: li          r0, 38
      106: li          r1, -80
-     108: jnebs       r0, 0x4, 570, c0a801ec
+     108: jbsne       r0, 0x4, 570, c0a801ec
      117: li          r1, -20
      119: jmp         564
      122: ldh         r0, [12]
@@ -42,7 +42,7 @@
      145: jne         r0, 0x44, 165
      148: li          r0, 50
      150: add         r0, r1
-     151: jnebs       r0, 0x6, 165, 7e9046bc7008
+     151: jbsne       r0, 0x6, 165, 7e9046bc7008
      160: li          r1, -24
      162: jmp         564
      165: ldb         r0, [30]
@@ -65,7 +65,7 @@
      222: jmp         570
      225: li          r1, -36
      227: li          r0, 0
-     228: jnebs       r0, 0x6, 564, ffffffffffff
+     228: jbsne       r0, 0x6, 564, ffffffffffff
      239: li          r1, -84
      241: jmp         570
      244: li          r1, -28
@@ -73,7 +73,7 @@
      249: jeq         r0, 0x86dd, 273
      254: li          r0, 0
      255: li          r1, -60
-     257: jnebs       r0, 0x6, 564, ffffffffffff
+     257: jbsne       r0, 0x6, 564, ffffffffffff
      268: li          r1, -68
      270: jmp         570
      273: ldb         r0, [20]
@@ -89,7 +89,7 @@
      301: jeq         r0, 0x85, 570
      306: jne         r0, 0x88, 333
      309: li          r0, 38
-     311: jnebs       r0, 0xf, 333, ff0200000000000000000000000000
+     311: jbsne       r0, 0xf, 333, ff0200000000000000000000000000
      329: li          r1, -104
      331: jmp         570
      333: ldm         r0, m[14]
@@ -97,18 +97,18 @@
      338: ldm         r0, m[15]
      340: jgt         r0, 0x48e, 562
      345: li          r0, 0
-     346: jnebs       r0, 0xf, 562, 7e9046bc700828c68e23672c86dd60
+     346: jbsne       r0, 0xf, 562, 7e9046bc700828c68e23672c86dd60
      364: li          r0, 18
-     366: jnebs       r0, 0x14, 562, 00603afffe800000000000002ac68efffe23672c
+     366: jbsne       r0, 0x14, 562, 00603afffe800000000000002ac68efffe23672c
      389: li          r0, 54
-     391: jnebs       r0, 0x2, 562, 8600
+     391: jbsne       r0, 0x2, 562, 8600
      396: li          r0, 58
-     398: jnebs       r0, 0x2, 562, 40c0
+     398: jbsne       r0, 0x2, 562, 40c0
      403: ldh         r0, [60]
      405: jlt         r0, 0x91e, 562
      410: jgt         r0, 0x1b58, 562
      415: li          r0, 62
-     417: jnebs       r0, 0x1c, 562, 0000000000000000010128c68e23672c05010000000005dc030440c0
+     417: jbsne       r0, 0x1c, 562, 0000000000000000010128c68e23672c05010000000005dc030440c0
      448: ldw         r0, [90]
      450: jlt         r0, 0x55555555, 562
      459: jgt         r0, 0xffffffff, 562
@@ -116,14 +116,14 @@
      470: jlt         r0, 0x55555555, 562
      479: jgt         r0, 0xffffffff, 562
      488: li          r0, 98
-     490: jnebs       r0, 0x18, 562, 000000002401fa000480f000000000000000000019030000
+     490: jbsne       r0, 0x18, 562, 000000002401fa000480f000000000000000000019030000
      517: ldw         r0, [122]
      519: jeq         r0, 0x0, 562
      522: jlt         r0, 0x78, 535
      525: jlt         r0, 0x91e, 562
      530: jgt         r0, 0x1b58, 562
      535: li          r0, 126
-     537: jnebs       r0, 0x12, 562, 2401fa000480f00000000000000000010701
+     537: jbsne       r0, 0x12, 562, 2401fa000480f00000000000000000010701
      558: li          r1, -72
      560: jmp         570
      562: li          r1, -40
diff --git a/apf_checksum_test.cc b/apf_checksum_test.cc
index b6526c9..7e54f3d 100644
--- a/apf_checksum_test.cc
+++ b/apf_checksum_test.cc
@@ -3,10 +3,12 @@
 #include <gtest/gtest.h>
 #include <linux/icmpv6.h>
 #include <linux/if_ether.h>
+#include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/udp.h>
+#include <linux/igmp.h>
 #include "apf_defs.h"
 #include "apf_utils.h"
 #include "apf_checksum.h"
@@ -190,4 +192,48 @@
     EXPECT_EQ(read_be16((uint8_t *)&ether_ipv6_hopopts_icmp6_pkt.pkt.icmp6hdr.icmp6_cksum), 0xf760);
 }
 
+TEST(ApfChecksumTest, CalcIGMPv2Checksum) {
+    // An IGMPv2 packet with ip checksum field set to 0
+    union packed {
+        uint8_t data[46];
+        struct packed {
+          struct ethhdr ethhdr;
+          struct iphdr iphdr;
+          uint8_t router_alert_option[4];
+          struct igmphdr igmphdr;
+        } pkt;
+    } ether_ipv4_igmpv2_pkt = {{
+        0x01, 0x00, 0x5e, 0x00, 0x00, 0xfb,
+        0xa2, 0x29, 0xae, 0xb3, 0x56, 0x6b,
+        0x08, 0x00, // end of ethernet header
+        0x46,
+        0x00,
+        0x00, 0x20,
+        0xf8, 0xf3,
+        0x00, 0x00,
+        0x01,
+        0x02,
+        0x00, 0x00,
+        0xc0, 0xa8, 0x01, 0xed,
+        0xe0, 0x00, 0x00, 0xfb, // end of ipv4 header without option
+        0x94, 0x04, 0x00, 0x00, // router alert option
+        0x16,
+        0x00,
+        0x09, 0x04,
+        0xe0, 0x00, 0x00, 0xfb // end of igmp payload
+    }};
+
+    // Set IPv4 checksum to 0x9404 + 0x0000 = 0x9404
+    ether_ipv4_igmpv2_pkt.pkt.iphdr.check = htons(0x9404);
+    uint8_t dscp = csum_and_return_dscp((uint8_t *)&ether_ipv4_igmpv2_pkt,
+                                        sizeof(ether_ipv4_igmpv2_pkt),
+                                        ETH_HLEN /* ip_ofs */, IPPROTO_IGMP /* partial_csum */,
+                                        0 /* csum_start */,
+                                        255 /* csum_ofs */,
+                                        false /* udp */);
+    EXPECT_EQ(dscp, 0);
+    // Verify IPv4 header checksum
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv4_igmpv2_pkt.pkt.iphdr.check), 0x8853);
+}
+
 }  // namespace apf
diff --git a/apf_defs.h b/apf_defs.h
index af63025..e8426e6 100644
--- a/apf_defs.h
+++ b/apf_defs.h
@@ -16,22 +16,6 @@
 #define ETH_P_IP	0x0800
 #define ETH_P_IPV6	0x86DD
 
-#ifndef IPPROTO_ICMP
-#define IPPROTO_ICMP	1
-#endif
-
-#ifndef IPPROTO_TCP
-#define IPPROTO_TCP	6
-#endif
-
-#ifndef IPPROTO_UDP
-#define IPPROTO_UDP	17
-#endif
-
-#ifndef IPPROTO_ICMPV6
-#define IPPROTO_ICMPV6	58
-#endif
-
 #define ETH_HLEN	14
 #define IPV4_HLEN	20
 #define IPV6_HLEN	40
diff --git a/apf_interpreter.c b/apf_interpreter.c
new file mode 120000
index 0000000..7844a0d
--- /dev/null
+++ b/apf_interpreter.c
@@ -0,0 +1 @@
+v4/apf_interpreter.c
\ No newline at end of file
diff --git a/apf_interpreter.h b/apf_interpreter.h
new file mode 120000
index 0000000..a07ceaf
--- /dev/null
+++ b/apf_interpreter.h
@@ -0,0 +1 @@
+v4/apf_interpreter.h
\ No newline at end of file
diff --git a/apf_interpreter.c b/v4/apf_interpreter.c
similarity index 100%
rename from apf_interpreter.c
rename to v4/apf_interpreter.c
diff --git a/apf_interpreter.h b/v4/apf_interpreter.h
similarity index 100%
rename from apf_interpreter.h
rename to v4/apf_interpreter.h
diff --git a/v5/apf.h b/v5/apf.h
index b1bafce..d94dba6 100644
--- a/v5/apf.h
+++ b/v5/apf.h
@@ -93,7 +93,8 @@
  *    When the APF program begins execution, six of the sixteen memory slots
  *    are pre-filled by the interpreter with values that may be useful for
  *    programs:
- *      #0 to #8 are zero initialized.
+ *      #0 to #7 are zero initialized.
+ *      Slot #8  is initialized with apf version (on APF >4).
  *      Slot #9  this is slot #15 with greater resolution (1/16384ths of a second)
  *      Slot #10 starts at zero, implicitly used as tx buffer output pointer.
  *      Slot #11 contains the size (in bytes) of the APF program.
@@ -138,7 +139,8 @@
 
 typedef union {
   struct {
-    u32 pad[9];               // 0..8
+    u32 pad[8];               // 0..7
+    u32 apf_version;          // 8:  Initialized with apf_version()
     u32 filter_age_16384ths;  // 9:  Age since filter installed in 1/16384 seconds.
     u32 tx_buf_offset;        // 10: Offset in tx_buf where next byte will be written
     u32 program_size;         // 11: Size of program (in bytes)
@@ -183,8 +185,10 @@
 #define JLT_OPCODE 18   // Compare less than and branch, e.g. "jlt R0,5,label"
 #define JSET_OPCODE 19  // Compare any bits set and branch, e.g. "jset R0,5,label"
 #define JBSMATCH_OPCODE 20 // Compare byte sequence [R=0 not] equal, e.g. "jbsne R0,2,label,0x1122"
+                           // NOTE: Only APFv6+ implements R=1 'jbseq' version
 #define EXT_OPCODE 21   // Immediate value is one of *_EXT_OPCODE
 #define LDDW_OPCODE 22  // Load 4 bytes from data address (register + signed imm): "lddw R0, [5+R1]"
+                        // LDDW/STDW in APFv6+ *mode* load/store from counter specified in imm.
 #define STDW_OPCODE 23  // Store 4 bytes to data address (register + signed imm): "stdw R0, [5+R1]"
 
 /* Write 1, 2 or 4 byte immediate to the output buffer and auto-increment the output buffer pointer.
@@ -279,6 +283,17 @@
 #define JDNSAMATCH_EXT_OPCODE 44
 #define JDNSAMATCHSAFE_EXT_OPCODE 46
 
+/* Jump if register is [not] one of the list of values
+ * R bit - specifies the register (R0/R1) to test
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(u8): top 5 bits - number of following u8/be16/be32 values - 1
+ *        middle 2 bits - 1..4 length of immediates
+ *        bottom 1 bit  - =0 jmp if in set, =1 if not in set
+ * imm4(imm3 * 1/2/3/4 bytes): the *UNIQUE* values to compare against
+ */
+#define JONEOF_EXT_OPCODE 47
+
 // This extended opcode is used to implement PKTDATACOPY_OPCODE
 #define PKTDATACOPYIMM_EXT_OPCODE 65536
 
diff --git a/v5/apf_interpreter.c b/v5/apf_interpreter.c
index 68d2f6b..16fce48 100644
--- a/v5/apf_interpreter.c
+++ b/v5/apf_interpreter.c
@@ -45,22 +45,6 @@
 #define ETH_P_IP	0x0800
 #define ETH_P_IPV6	0x86DD
 
-#ifndef IPPROTO_ICMP
-#define IPPROTO_ICMP	1
-#endif
-
-#ifndef IPPROTO_TCP
-#define IPPROTO_TCP	6
-#endif
-
-#ifndef IPPROTO_UDP
-#define IPPROTO_UDP	17
-#endif
-
-#ifndef IPPROTO_ICMPV6
-#define IPPROTO_ICMPV6	58
-#endif
-
 #define ETH_HLEN	14
 #define IPV4_HLEN	20
 #define IPV6_HLEN	40
@@ -165,7 +149,8 @@
  *    When the APF program begins execution, six of the sixteen memory slots
  *    are pre-filled by the interpreter with values that may be useful for
  *    programs:
- *      #0 to #8 are zero initialized.
+ *      #0 to #7 are zero initialized.
+ *      Slot #8  is initialized with apf version (on APF >4).
  *      Slot #9  this is slot #15 with greater resolution (1/16384ths of a second)
  *      Slot #10 starts at zero, implicitly used as tx buffer output pointer.
  *      Slot #11 contains the size (in bytes) of the APF program.
@@ -210,7 +195,8 @@
 
 typedef union {
   struct {
-    u32 pad[9];               /* 0..8 */
+    u32 pad[8];               /* 0..7 */
+    u32 apf_version;          /* 8:  Initialized with apf_version() */
     u32 filter_age_16384ths;  /* 9:  Age since filter installed in 1/16384 seconds. */
     u32 tx_buf_offset;        /* 10: Offset in tx_buf where next byte will be written */
     u32 program_size;         /* 11: Size of program (in bytes) */
@@ -255,8 +241,10 @@
 #define JLT_OPCODE 18   /* Compare less than and branch, e.g. "jlt R0,5,label" */
 #define JSET_OPCODE 19  /* Compare any bits set and branch, e.g. "jset R0,5,label" */
 #define JBSMATCH_OPCODE 20 /* Compare byte sequence [R=0 not] equal, e.g. "jbsne R0,2,label,0x1122" */
+                           /* NOTE: Only APFv6+ implements R=1 'jbseq' version */
 #define EXT_OPCODE 21   /* Immediate value is one of *_EXT_OPCODE */
 #define LDDW_OPCODE 22  /* Load 4 bytes from data address (register + signed imm): "lddw R0, [5+R1]" */
+                        /* LDDW/STDW in APFv6+ *mode* load/store from counter specified in imm. */
 #define STDW_OPCODE 23  /* Store 4 bytes to data address (register + signed imm): "stdw R0, [5+R1]" */
 
 /* Write 1, 2 or 4 byte immediate to the output buffer and auto-increment the output buffer pointer.
@@ -351,6 +339,17 @@
 #define JDNSAMATCH_EXT_OPCODE 44
 #define JDNSAMATCHSAFE_EXT_OPCODE 46
 
+/* Jump if register is [not] one of the list of values
+ * R bit - specifies the register (R0/R1) to test
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(u8): top 5 bits - number of following u8/be16/be32 values - 1
+ *        middle 2 bits - 1..4 length of immediates
+ *        bottom 1 bit  - =0 jmp if in set, =1 if not in set
+ * imm4(imm3 * 1/2/3/4 bytes): the *UNIQUE* values to compare against
+ */
+#define JONEOF_EXT_OPCODE 47
+
 /* This extended opcode is used to implement PKTDATACOPY_OPCODE */
 #define PKTDATACOPYIMM_EXT_OPCODE 65536
 
@@ -592,7 +591,7 @@
 #define ENFORCE_UNSIGNED(c) ((c)==(u32)(c))
 
 u32 apf_version(void) {
-    return 20240226;
+    return 20240315;
 }
 
 typedef struct {
@@ -657,19 +656,19 @@
 /* Accept packet if not within data bounds */
 #define ASSERT_IN_DATA_BOUNDS(p, size) ASSERT_RETURN(IN_DATA_BOUNDS(p, size))
 
-  /* Counters start at end of RAM and count *backwards* so this array takes negative integers. */
-  u32 *counter = (u32*)(ctx->program + ctx->ram_len);
+    /* Counters start at end of RAM and count *backwards* so this array takes negative integers. */
+    u32 *counter = (u32*)(ctx->program + ctx->ram_len);
 
-  ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
-  /* Only populate if IP version is IPv4. */
-  if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
-      ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
-  }
-  /* Count of instructions remaining to execute. This is done to ensure an */
-  /* upper bound on execution time. It should never be hit and is only for */
-  /* safety. Initialize to the number of bytes in the program which is an */
-  /* upper bound on the number of instructions in the program. */
-  u32 instructions_remaining = ctx->program_len;
+    ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
+    /* Only populate if IP version is IPv4. */
+    if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
+        ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
+    }
+    /* Count of instructions remaining to execute. This is done to ensure an */
+    /* upper bound on execution time. It should never be hit and is only for */
+    /* safety. Initialize to the number of bytes in the program which is an */
+    /* upper bound on the number of instructions in the program. */
+    u32 instructions_remaining = ctx->program_len;
 
 /* Is access to offset |p| length |size| within output buffer bounds? */
 #define IN_OUTPUT_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
@@ -679,38 +678,43 @@
 /* Accept packet if not write within allocated output buffer */
 #define ASSERT_IN_OUTPUT_BOUNDS(p, size) ASSERT_RETURN(IN_OUTPUT_BOUNDS(p, size))
 
-  do {
-      APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
-                     ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
-      if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
-      if (ctx->pc >= ctx->program_len) return PASS_PACKET;
+    do {
+        APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
+                       ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
+        if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
+        if (ctx->pc >= ctx->program_len) return PASS_PACKET;
 
-      const u8 bytecode = ctx->program[ctx->pc++];
-      const u32 opcode = EXTRACT_OPCODE(bytecode);
-      const u32 reg_num = EXTRACT_REGISTER(bytecode);
+        const u8 bytecode = ctx->program[ctx->pc++];
+        const u32 opcode = EXTRACT_OPCODE(bytecode);
+        const u32 reg_num = EXTRACT_REGISTER(bytecode);
 #define REG (ctx->R[reg_num])
 #define OTHER_REG (ctx->R[reg_num ^ 1])
-      /* All instructions have immediate fields, so load them now. */
-      const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
-      u32 imm = 0;
-      s32 signed_imm = 0;
-      if (len_field != 0) {
-          const u32 imm_len = 1 << (len_field - 1);
-          imm = decode_imm(ctx, imm_len); /* 1st imm, at worst bytes 1-4 past opcode/program_len */
-          /* Sign extend imm into signed_imm. */
-          signed_imm = (s32)(imm << ((4 - imm_len) * 8));
-          signed_imm >>= (4 - imm_len) * 8;
-      }
+        /* All instructions have immediate fields, so load them now. */
+        const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+        u32 imm = 0;
+        s32 signed_imm = 0;
+        if (len_field != 0) {
+            const u32 imm_len = 1 << (len_field - 1);
+            imm = decode_imm(ctx, imm_len); /* 1st imm, at worst bytes 1-4 past opcode/program_len */
+            /* Sign extend imm into signed_imm. */
+            signed_imm = (s32)(imm << ((4 - imm_len) * 8));
+            signed_imm >>= (4 - imm_len) * 8;
+        }
 
-      u32 pktcopy_src_offset = 0;  /* used for various pktdatacopy opcodes */
-      switch (opcode) {
-          case PASSDROP_OPCODE: {
-              if (len_field > 2) return PASS_PACKET;  /* max 64K counters (ie. imm < 64K) */
-              if (imm) {
-                  if (4 * imm > ctx->ram_len) return PASS_PACKET;
-                  counter[-(s32)imm]++;
-              }
-              return reg_num ? DROP_PACKET : PASS_PACKET;
+        /* See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm. */
+#define ARITH_REG (ctx->R[reg_num & ctx->v6])
+        u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
+        s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
+
+        u32 pktcopy_src_offset = 0;  /* used for various pktdatacopy opcodes */
+        switch (opcode) {
+          case PASSDROP_OPCODE: {  /* APFv6+ */
+            if (len_field > 2) return PASS_PACKET;  /* max 64K counters (ie. imm < 64K) */
+            if (imm) {
+                if (4 * imm > ctx->ram_len) return PASS_PACKET;
+                counter[-(s32)imm]++;
+            }
+            return reg_num ? DROP_PACKET : PASS_PACKET;
           }
           case LDB_OPCODE:
           case LDH_OPCODE:
@@ -718,324 +722,356 @@
           case LDBX_OPCODE:
           case LDHX_OPCODE:
           case LDWX_OPCODE: {
-              u32 offs = imm;
-              /* Note: this can overflow and actually decrease offs. */
-              if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
-              ASSERT_IN_PACKET_BOUNDS(offs);
-              u32 load_size = 0;
-              switch (opcode) {
-                  case LDB_OPCODE:
-                  case LDBX_OPCODE:
-                    load_size = 1;
-                    break;
-                  case LDH_OPCODE:
-                  case LDHX_OPCODE:
-                    load_size = 2;
-                    break;
-                  case LDW_OPCODE:
-                  case LDWX_OPCODE:
-                    load_size = 4;
-                    break;
-                  /* Immediately enclosing switch statement guarantees */
-                  /* opcode cannot be any other value. */
-              }
-              const u32 end_offs = offs + (load_size - 1);
-              /* Catch overflow/wrap-around. */
-              ASSERT_RETURN(end_offs >= offs);
-              ASSERT_IN_PACKET_BOUNDS(end_offs);
-              u32 val = 0;
-              while (load_size--) val = (val << 8) | ctx->packet[offs++];
-              REG = val;
-              break;
+            u32 offs = imm;
+            /* Note: this can overflow and actually decrease offs. */
+            if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
+            ASSERT_IN_PACKET_BOUNDS(offs);
+            u32 load_size = 0;
+            switch (opcode) {
+              case LDB_OPCODE:
+              case LDBX_OPCODE:
+                load_size = 1;
+                break;
+              case LDH_OPCODE:
+              case LDHX_OPCODE:
+                load_size = 2;
+                break;
+              case LDW_OPCODE:
+              case LDWX_OPCODE:
+                load_size = 4;
+                break;
+              /* Immediately enclosing switch statement guarantees */
+              /* opcode cannot be any other value. */
+            }
+            const u32 end_offs = offs + (load_size - 1);
+            /* Catch overflow/wrap-around. */
+            ASSERT_RETURN(end_offs >= offs);
+            ASSERT_IN_PACKET_BOUNDS(end_offs);
+            u32 val = 0;
+            while (load_size--) val = (val << 8) | ctx->packet[offs++];
+            REG = val;
+            break;
           }
           case JMP_OPCODE:
-              if (reg_num && !ctx->v6) {
+            if (reg_num && !ctx->v6) {  /* APFv6+ */
                 /* First invocation of APFv6 jmpdata instruction */
                 counter[-1] = 0x12345678;  /* endianness marker */
                 counter[-2]++;  /* total packets ++ */
                 ctx->v6 = (u8)True;
-              }
-              /* This can jump backwards. Infinite looping prevented by instructions_remaining. */
-              ctx->pc += imm;
-              break;
+            }
+            /* This can jump backwards. Infinite looping prevented by instructions_remaining. */
+            ctx->pc += imm;
+            break;
           case JEQ_OPCODE:
           case JNE_OPCODE:
           case JGT_OPCODE:
           case JLT_OPCODE:
-          case JSET_OPCODE:
+          case JSET_OPCODE: {
+            /* with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op */
+            if (len_field == 0) break;
+            /* Load second immediate field. */
+            u32 cmp_imm = 0;
+            if (reg_num == 1) {
+                cmp_imm = ctx->R[1];
+            } else {
+                u32 cmp_imm_len = 1 << (len_field - 1);
+                cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+            }
+            switch (opcode) {
+              case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
+              case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
+              case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
+              case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
+              case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
+            }
+            break;
+          }
           case JBSMATCH_OPCODE: {
-              /* Load second immediate field. */
-              u32 cmp_imm = 0;
-              if (reg_num == 1) {
-                  cmp_imm = ctx->R[1];
-              } else if (len_field != 0) {
-                  u32 cmp_imm_len = 1 << (len_field - 1);
-                  cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
-              }
-              switch (opcode) {
-                  case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
-                  case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
-                  case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
-                  case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
-                  case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
-                  case JBSMATCH_OPCODE: {
-                      /* cmp_imm is size in bytes of data to compare. */
-                      /* pc is offset of program bytes to compare. */
-                      /* imm is jump target offset. */
-                      /* REG is offset of packet bytes to compare. */
-                      if (len_field > 2) return PASS_PACKET; /* guarantees cmp_imm <= 0xFFFF */
-                      /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
-                      if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
-                      ASSERT_IN_PACKET_BOUNDS(REG);
-                      const u32 last_packet_offs = REG + cmp_imm - 1;
-                      ASSERT_RETURN(last_packet_offs >= REG);
-                      ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                      if (memcmp(ctx->program + ctx->pc, ctx->packet + REG, cmp_imm))
-                          ctx->pc += imm;
-                      /* skip past comparison bytes */
-                      ctx->pc += cmp_imm;
-                      break;
-                  }
-              }
-              break;
+            /* with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op */
+            if (len_field == 0) break;
+            /* Load second immediate field. */
+            u32 cmp_imm_len = 1 << (len_field - 1);
+            u32 cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+            /* cmp_imm is size in bytes of data to compare. */
+            /* pc is offset of program bytes to compare. */
+            /* imm is jump target offset. */
+            /* R0 is offset of packet bytes to compare. */
+            if (cmp_imm > 0xFFFF) return PASS_PACKET;
+            Boolean do_jump = !reg_num;
+            /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
+            if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
+            ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+            const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+            ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+            ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+            do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+            /* skip past comparison bytes */
+            ctx->pc += cmp_imm;
+            if (do_jump) ctx->pc += imm;
+            break;
           }
-          case ADD_OPCODE: ctx->R[0] += reg_num ? ctx->R[1] : imm; break;
-          case MUL_OPCODE: ctx->R[0] *= reg_num ? ctx->R[1] : imm; break;
-          case AND_OPCODE: ctx->R[0] &= reg_num ? ctx->R[1] : imm; break;
-          case OR_OPCODE:  ctx->R[0] |= reg_num ? ctx->R[1] : imm; break;
-          case DIV_OPCODE: {
-              const u32 div_operand = reg_num ? ctx->R[1] : imm;
-              ASSERT_RETURN(div_operand);
-              ctx->R[0] /= div_operand;
-              break;
+          /* There is a difference in APFv4 and APFv6 arithmetic behaviour! */
+          /* APFv4:  R[0] op= Rbit ? R[1] : imm;  (and it thus doesn't make sense to have R=1 && len_field>0) */
+          /* APFv6+: REG  op= len_field ? imm : OTHER_REG;  (note: this is *DIFFERENT* with R=1 len_field==0) */
+          /* Furthermore APFv4 uses unsigned imm (except SH), while APFv6 uses signed_imm for ADD/AND/SH. */
+          case ADD_OPCODE: ARITH_REG += (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case MUL_OPCODE: ARITH_REG *= arith_imm; break;
+          case AND_OPCODE: ARITH_REG &= (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case OR_OPCODE:  ARITH_REG |= arith_imm; break;
+          case DIV_OPCODE: {  /* see above comment! */
+            const u32 div_operand = arith_imm;
+            ASSERT_RETURN(div_operand);
+            ARITH_REG /= div_operand;
+            break;
           }
-          case SH_OPCODE: {
-              const s32 shift_val = reg_num ? (s32)ctx->R[1] : signed_imm;
-              if (shift_val > 0)
-                  ctx->R[0] <<= shift_val;
-              else
-                  ctx->R[0] >>= -shift_val;
-              break;
+          case SH_OPCODE: {  /* see above comment! */
+            if (arith_signed_imm >= 0)
+                ARITH_REG <<= arith_signed_imm;
+            else
+                ARITH_REG >>= -arith_signed_imm;
+            break;
           }
           case LI_OPCODE:
-              REG = (u32)signed_imm;
-              break;
+            REG = (u32)signed_imm;
+            break;
           case PKTDATACOPY_OPCODE:
-              pktcopy_src_offset = imm;
-              imm = PKTDATACOPYIMM_EXT_OPCODE;
-              FALLTHROUGH;
+            pktcopy_src_offset = imm;
+            imm = PKTDATACOPYIMM_EXT_OPCODE;
+            FALLTHROUGH;
           case EXT_OPCODE:
-              if (/* imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0 */
-                  imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
+            if (/* imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0 */
+                imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
                 REG = ctx->mem.slot[imm - LDM_EXT_OPCODE];
-              } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
+            } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
                 ctx->mem.slot[imm - STM_EXT_OPCODE] = REG;
-              } else switch (imm) {
-                  case NOT_EXT_OPCODE: REG = ~REG;      break;
-                  case NEG_EXT_OPCODE: REG = -REG;      break;
-                  case MOV_EXT_OPCODE: REG = OTHER_REG; break;
-                  case SWAP_EXT_OPCODE: {
-                    u32 tmp = REG;
-                    REG = OTHER_REG;
-                    OTHER_REG = tmp;
-                    break;
-                  }
-                  case ALLOCATE_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf == NULL);
-                    if (reg_num == 0) {
-                        ctx->tx_buf_len = REG;
-                    } else {
-                        ctx->tx_buf_len = decode_be16(ctx); /* 2nd imm, at worst 6 B past prog_len */
-                    }
-                    /* checksumming functions requires minimum 266 byte buffer for correctness */
-                    if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
-                    ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
-                    if (!ctx->tx_buf) {  /* allocate failure */
-                        ctx->tx_buf_len = 0;
-                        counter[-3]++;
-                        return PASS_PACKET;
-                    }
-                    memset(ctx->tx_buf, 0, ctx->tx_buf_len);
-                    ctx->mem.named.tx_buf_offset = 0;
-                    break;
-                  case TRANSMIT_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 pkt_len = ctx->mem.named.tx_buf_offset;
-                    /* If pkt_len > allocate_buffer_len, it means sth. wrong */
-                    /* happened and the tx_buf should be deallocated. */
-                    if (pkt_len > ctx->tx_buf_len) {
-                        do_discard_buffer(ctx);
-                        return PASS_PACKET;
-                    }
-                    /* tx_buf_len cannot be large because we'd run out of RAM, */
-                    /* so the above unsigned comparison effectively guarantees casting pkt_len */
-                    /* to a signed value does not result in it going negative. */
-                    u8 ip_ofs = DECODE_U8();              /* 2nd imm, at worst 5 B past prog_len */
-                    u8 csum_ofs = DECODE_U8();            /* 3rd imm, at worst 6 B past prog_len */
-                    u8 csum_start = 0;
-                    u16 partial_csum = 0;
-                    if (csum_ofs < 255) {
-                        csum_start = DECODE_U8();         /* 4th imm, at worst 7 B past prog_len */
-                        partial_csum = decode_be16(ctx);  /* 5th imm, at worst 9 B past prog_len */
-                    }
-                    int dscp = apf_internal_csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
-                                                    partial_csum, csum_start, csum_ofs,
-                                                    (Boolean)reg_num);
-                    int ret = apf_internal_do_transmit_buffer(ctx, pkt_len, dscp);
-                    if (ret) { counter[-4]++; return PASS_PACKET; } /* transmit failure */
-                    break;
-                  case EPKTDATACOPYIMM_EXT_OPCODE:  /* 41 */
-                  case EPKTDATACOPYR1_EXT_OPCODE:   /* 42 */
-                    pktcopy_src_offset = ctx->R[0];
-                    FALLTHROUGH;
-                  case PKTDATACOPYIMM_EXT_OPCODE: { /* 65536 */
-                    u32 copy_len = ctx->R[1];
-                    if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
-                        copy_len = DECODE_U8();  /* 2nd imm, at worst 8 bytes past prog_len */
-                    }
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 dst_offs = ctx->mem.named.tx_buf_offset;
-                    ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
-                    if (reg_num == 0) {  /* copy from packet */
-                        ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
-                        const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
-                        ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
-                        ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
-                    } else {  /* copy from data */
-                        ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
-                    }
-                    dst_offs += copy_len;
-                    ctx->mem.named.tx_buf_offset = dst_offs;
-                    break;
-                  }
-                  case JDNSQMATCH_EXT_OPCODE:       /* 43 */
-                  case JDNSAMATCH_EXT_OPCODE:       /* 44 */
-                  case JDNSQMATCHSAFE_EXT_OPCODE:   /* 45 */
-                  case JDNSAMATCHSAFE_EXT_OPCODE: { /* 46 */
-                    const u32 imm_len = 1 << (len_field - 1);
-                    u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
-                    int qtype = -1;
-                    if (imm & 1) { /* JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes */
-                        qtype = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
-                    }
-                    u32 udp_payload_offset = ctx->R[0];
-                    match_result_type match_rst = apf_internal_match_names(ctx->program + ctx->pc,
-                                                              ctx->program + ctx->program_len,
-                                                              ctx->packet + udp_payload_offset,
-                                                              ctx->packet_len - udp_payload_offset,
-                                                              qtype);
-                    if (match_rst == error_program) return PASS_PACKET;
-                    if (match_rst == error_packet) {
-                        counter[-5]++; /* increment error dns packet counter */
-                        return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
-                    }
-                    while (ctx->pc + 1 < ctx->program_len &&
-                           (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
-                        ctx->pc++;
-                    }
-                    ctx->pc += 2;  /* skip the final double 0 needle end */
-                    /* relies on reg_num in {0,1} and match_rst being {False=0, True=1} */
-                    if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
-                    break;
-                  }
-                  case EWRITE1_EXT_OPCODE:
-                  case EWRITE2_EXT_OPCODE:
-                  case EWRITE4_EXT_OPCODE: {
-                    ASSERT_RETURN(ctx->tx_buf);
-                    const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
-                    ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-                    u32 i;
-                    for (i = 0; i < write_len; ++i) {
-                        ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                            (u8)(REG >> (write_len - 1 - i) * 8);
-                    }
-                    break;
-                  }
-                  default:  /* Unknown extended opcode */
-                    return PASS_PACKET;  /* Bail out */
+            } else switch (imm) {
+              case NOT_EXT_OPCODE: REG = ~REG;      break;
+              case NEG_EXT_OPCODE: REG = -REG;      break;
+              case MOV_EXT_OPCODE: REG = OTHER_REG; break;
+              case SWAP_EXT_OPCODE: {
+                u32 tmp = REG;
+                REG = OTHER_REG;
+                OTHER_REG = tmp;
+                break;
               }
-              break;
-          case LDDW_OPCODE: {
-              u32 offs = OTHER_REG + (u32)signed_imm;
-              u32 size = 4;
-              u32 val = 0;
-              /* Negative offsets wrap around the end of the address space. */
-              /* This allows us to efficiently access the end of the */
-              /* address space with one-byte immediates without using %=. */
-              if (offs & 0x80000000) offs += ctx->ram_len;  /* unsigned overflow intended */
-              ASSERT_IN_DATA_BOUNDS(offs, size);
-              while (size--) val = (val << 8) | ctx->program[offs++];
-              REG = val;
-              break;
-          }
-          case STDW_OPCODE: {
-              u32 offs = OTHER_REG + (u32)signed_imm;
-              u32 size = 4;
-              u32 val = REG;
-              /* Negative offsets wrap around the end of the address space. */
-              /* This allows us to efficiently access the end of the */
-              /* address space with one-byte immediates without using %=. */
-              if (offs & 0x80000000) offs += ctx->ram_len;  /* unsigned overflow intended */
-              ASSERT_IN_DATA_BOUNDS(offs, size);
-              while (size--) {
-                  ctx->program[offs++] = (val >> 24);
-                  val <<= 8;
+              case ALLOCATE_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf == NULL);
+                if (reg_num == 0) {
+                    ctx->tx_buf_len = REG;
+                } else {
+                    ctx->tx_buf_len = decode_be16(ctx); /* 2nd imm, at worst 6 B past prog_len */
+                }
+                /* checksumming functions requires minimum 266 byte buffer for correctness */
+                if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
+                ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
+                if (!ctx->tx_buf) {  /* allocate failure */
+                    ctx->tx_buf_len = 0;
+                    counter[-3]++;
+                    return PASS_PACKET;
+                }
+                memset(ctx->tx_buf, 0, ctx->tx_buf_len);
+                ctx->mem.named.tx_buf_offset = 0;
+                break;
+              case TRANSMIT_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 pkt_len = ctx->mem.named.tx_buf_offset;
+                /* If pkt_len > allocate_buffer_len, it means sth. wrong */
+                /* happened and the tx_buf should be deallocated. */
+                if (pkt_len > ctx->tx_buf_len) {
+                    do_discard_buffer(ctx);
+                    return PASS_PACKET;
+                }
+                /* tx_buf_len cannot be large because we'd run out of RAM, */
+                /* so the above unsigned comparison effectively guarantees casting pkt_len */
+                /* to a signed value does not result in it going negative. */
+                u8 ip_ofs = DECODE_U8();              /* 2nd imm, at worst 5 B past prog_len */
+                u8 csum_ofs = DECODE_U8();            /* 3rd imm, at worst 6 B past prog_len */
+                u8 csum_start = 0;
+                u16 partial_csum = 0;
+                if (csum_ofs < 255) {
+                    csum_start = DECODE_U8();         /* 4th imm, at worst 7 B past prog_len */
+                    partial_csum = decode_be16(ctx);  /* 5th imm, at worst 9 B past prog_len */
+                }
+                int dscp = apf_internal_csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
+                                                partial_csum, csum_start, csum_ofs,
+                                                (Boolean)reg_num);
+                int ret = apf_internal_do_transmit_buffer(ctx, pkt_len, dscp);
+                if (ret) { counter[-4]++; return PASS_PACKET; } /* transmit failure */
+                break;
+              case EPKTDATACOPYIMM_EXT_OPCODE:  /* 41 */
+              case EPKTDATACOPYR1_EXT_OPCODE:   /* 42 */
+                pktcopy_src_offset = ctx->R[0];
+                FALLTHROUGH;
+              case PKTDATACOPYIMM_EXT_OPCODE: { /* 65536 */
+                u32 copy_len = ctx->R[1];
+                if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
+                    copy_len = DECODE_U8();  /* 2nd imm, at worst 8 bytes past prog_len */
+                }
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 dst_offs = ctx->mem.named.tx_buf_offset;
+                ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
+                if (reg_num == 0) {  /* copy from packet */
+                    ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
+                    const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
+                    ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
+                    ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
+                } else {  /* copy from data */
+                    ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
+                }
+                dst_offs += copy_len;
+                ctx->mem.named.tx_buf_offset = dst_offs;
+                break;
               }
-              break;
-          }
+              case JDNSQMATCH_EXT_OPCODE:       /* 43 */
+              case JDNSAMATCH_EXT_OPCODE:       /* 44 */
+              case JDNSQMATCHSAFE_EXT_OPCODE:   /* 45 */
+              case JDNSAMATCHSAFE_EXT_OPCODE: { /* 46 */
+                const u32 imm_len = 1 << (len_field - 1); /* EXT_OPCODE, thus len_field > 0 */
+                u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
+                int qtype = -1;
+                if (imm & 1) { /* JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes */
+                    qtype = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
+                }
+                u32 udp_payload_offset = ctx->R[0];
+                match_result_type match_rst = apf_internal_match_names(ctx->program + ctx->pc,
+                                                          ctx->program + ctx->program_len,
+                                                          ctx->packet + udp_payload_offset,
+                                                          ctx->packet_len - udp_payload_offset,
+                                                          qtype);
+                if (match_rst == error_program) return PASS_PACKET;
+                if (match_rst == error_packet) {
+                    counter[-5]++; /* increment error dns packet counter */
+                    return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
+                }
+                while (ctx->pc + 1 < ctx->program_len &&
+                       (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
+                    ctx->pc++;
+                }
+                ctx->pc += 2;  /* skip the final double 0 needle end */
+                /* relies on reg_num in {0,1} and match_rst being {False=0, True=1} */
+                if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
+                break;
+              }
+              case EWRITE1_EXT_OPCODE:
+              case EWRITE2_EXT_OPCODE:
+              case EWRITE4_EXT_OPCODE: {
+                ASSERT_RETURN(ctx->tx_buf);
+                const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
+                ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+                u32 i;
+                for (i = 0; i < write_len; ++i) {
+                    ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                        (u8)(REG >> (write_len - 1 - i) * 8);
+                }
+                break;
+              }
+              case JONEOF_EXT_OPCODE: {
+                const u32 imm_len = 1 << (len_field - 1); /* ext opcode len_field guaranteed > 0 */
+                u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
+                u8 imm3 = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
+                Boolean jmp = imm3 & 1;  /* =0 jmp on match, =1 jmp on no match */
+                u8 len = ((imm3 >> 1) & 3) + 1;  /* size [1..4] in bytes of an element */
+                u8 cnt = (imm3 >> 3) + 1;  /* number [1..32] of elements in set */
+                if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
+                while (cnt--) {
+                    u32 v = 0;
+                    int i;
+                    for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
+                    if (REG == v) jmp ^= True;
+                }
+                if (jmp) ctx->pc += jump_offs;
+                return PASS_PACKET;
+              }
+              default:  /* Unknown extended opcode */
+                return PASS_PACKET;  /* Bail out */
+            }
+            break;
+          case LDDW_OPCODE:
+          case STDW_OPCODE:
+            if (ctx->v6) {
+                if (!imm) return PASS_PACKET;
+                if (imm > 0xFFFF) return PASS_PACKET;
+                if (imm * 4 > ctx->ram_len) return PASS_PACKET;
+                if (opcode == LDDW_OPCODE) {
+                    REG = counter[-(s32)imm];
+                } else {
+                    counter[-(s32)imm] = REG;
+                }
+            } else {
+                u32 offs = OTHER_REG + (u32)signed_imm;
+                /* Negative offsets wrap around the end of the address space. */
+                /* This allows us to efficiently access the end of the */
+                /* address space with one-byte immediates without using %=. */
+                if (offs & 0x80000000) offs += ctx->ram_len;  /* unsigned overflow intended */
+                u32 size = 4;
+                ASSERT_IN_DATA_BOUNDS(offs, size);
+                if (opcode == LDDW_OPCODE) {
+                    u32 val = 0;
+                    while (size--) val = (val << 8) | ctx->program[offs++];
+                    REG = val;
+                } else {
+                    u32 val = REG;
+                    while (size--) {
+                        ctx->program[offs++] = (val >> 24);
+                        val <<= 8;
+                    }
+                }
+            }
+            break;
           case WRITE_OPCODE: {
-              ASSERT_RETURN(ctx->tx_buf);
-              ASSERT_RETURN(len_field);
-              const u32 write_len = 1 << (len_field - 1);
-              ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-              u32 i;
-              for (i = 0; i < write_len; ++i) {
-                  ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                      (u8)(imm >> (write_len - 1 - i) * 8);
-              }
-              break;
+            ASSERT_RETURN(ctx->tx_buf);
+            ASSERT_RETURN(len_field);
+            const u32 write_len = 1 << (len_field - 1);
+            ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+            u32 i;
+            for (i = 0; i < write_len; ++i) {
+                ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                    (u8)(imm >> (write_len - 1 - i) * 8);
+            }
+            break;
           }
           default:  /* Unknown opcode */
-              return PASS_PACKET;  /* Bail out */
-      }
-  } while (instructions_remaining--);
-  return PASS_PACKET;
+            return PASS_PACKET;  /* Bail out */
+        }
+    } while (instructions_remaining--);
+    return PASS_PACKET;
 }
 
 int apf_run(void* ctx, u32* const program, const u32 program_len,
             const u32 ram_len, const u8* const packet,
             const u32 packet_len, const u32 filter_age_16384ths) {
-  /* Due to direct 32-bit read/write access to counters at end of ram */
-  /* APFv6 interpreter requires program & ram_len to be 4 byte aligned. */
-  if (3 & (uintptr_t)program) return PASS_PACKET;
-  if (3 & ram_len) return PASS_PACKET;
+    /* Due to direct 32-bit read/write access to counters at end of ram */
+    /* APFv6 interpreter requires program & ram_len to be 4 byte aligned. */
+    if (3 & (uintptr_t)program) return PASS_PACKET;
+    if (3 & ram_len) return PASS_PACKET;
 
-  /* We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB */
-  /* Similarly LDDW/STDW have special meaning for negative ram offsets. */
-  /* We also don't want garbage like program_len == 0xFFFFFFFF */
-  if ((program_len | ram_len) >> 31) return PASS_PACKET;
+    /* We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB */
+    /* Similarly LDDW/STDW have special meaning for negative ram offsets. */
+    /* We also don't want garbage like program_len == 0xFFFFFFFF */
+    if ((program_len | ram_len) >> 31) return PASS_PACKET;
 
-  /* APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid */
-  /* This cannot wrap due to previous check. */
-  if (program_len + 20 > ram_len) return PASS_PACKET;
+    /* APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid */
+    /* This cannot wrap due to previous check. */
+    if (program_len + 20 > ram_len) return PASS_PACKET;
 
-  apf_context apf_ctx = {};
-  apf_ctx.caller_ctx = ctx;
-  apf_ctx.program = (u8*)program;
-  apf_ctx.program_len = program_len;
-  apf_ctx.ram_len = ram_len;
-  apf_ctx.packet = packet;
-  apf_ctx.packet_len = packet_len;
-  /* Fill in pre-filled memory slot values. */
-  apf_ctx.mem.named.program_size = program_len;
-  apf_ctx.mem.named.ram_len = ram_len;
-  apf_ctx.mem.named.packet_size = packet_len;
-  apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
-  apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
+    apf_context apf_ctx = {};
+    apf_ctx.caller_ctx = ctx;
+    apf_ctx.program = (u8*)program;
+    apf_ctx.program_len = program_len;
+    apf_ctx.ram_len = ram_len;
+    apf_ctx.packet = packet;
+    apf_ctx.packet_len = packet_len;
+    /* Fill in pre-filled memory slot values. */
+    apf_ctx.mem.named.program_size = program_len;
+    apf_ctx.mem.named.ram_len = ram_len;
+    apf_ctx.mem.named.packet_size = packet_len;
+    apf_ctx.mem.named.apf_version = apf_version();
+    apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
+    apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
 
-  int ret = do_apf_run(&apf_ctx);
-  if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
-  return ret;
+    int ret = do_apf_run(&apf_ctx);
+    if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
+    return ret;
 }
diff --git a/v5/apf_interpreter_source.c b/v5/apf_interpreter_source.c
index 24dbbca..197c152 100644
--- a/v5/apf_interpreter_source.c
+++ b/v5/apf_interpreter_source.c
@@ -61,7 +61,7 @@
 #define ENFORCE_UNSIGNED(c) ((c)==(u32)(c))
 
 u32 apf_version(void) {
-    return 20240226;
+    return 20240315;
 }
 
 typedef struct {
@@ -126,19 +126,19 @@
 // Accept packet if not within data bounds
 #define ASSERT_IN_DATA_BOUNDS(p, size) ASSERT_RETURN(IN_DATA_BOUNDS(p, size))
 
-  // Counters start at end of RAM and count *backwards* so this array takes negative integers.
-  u32 *counter = (u32*)(ctx->program + ctx->ram_len);
+    // Counters start at end of RAM and count *backwards* so this array takes negative integers.
+    u32 *counter = (u32*)(ctx->program + ctx->ram_len);
 
-  ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
-  // Only populate if IP version is IPv4.
-  if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
-      ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
-  }
-  // Count of instructions remaining to execute. This is done to ensure an
-  // upper bound on execution time. It should never be hit and is only for
-  // safety. Initialize to the number of bytes in the program which is an
-  // upper bound on the number of instructions in the program.
-  u32 instructions_remaining = ctx->program_len;
+    ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
+    // Only populate if IP version is IPv4.
+    if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
+        ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
+    }
+    // Count of instructions remaining to execute. This is done to ensure an
+    // upper bound on execution time. It should never be hit and is only for
+    // safety. Initialize to the number of bytes in the program which is an
+    // upper bound on the number of instructions in the program.
+    u32 instructions_remaining = ctx->program_len;
 
 // Is access to offset |p| length |size| within output buffer bounds?
 #define IN_OUTPUT_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
@@ -148,38 +148,43 @@
 // Accept packet if not write within allocated output buffer
 #define ASSERT_IN_OUTPUT_BOUNDS(p, size) ASSERT_RETURN(IN_OUTPUT_BOUNDS(p, size))
 
-  do {
-      APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
-                     ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
-      if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
-      if (ctx->pc >= ctx->program_len) return PASS_PACKET;
+    do {
+        APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
+                       ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
+        if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
+        if (ctx->pc >= ctx->program_len) return PASS_PACKET;
 
-      const u8 bytecode = ctx->program[ctx->pc++];
-      const u32 opcode = EXTRACT_OPCODE(bytecode);
-      const u32 reg_num = EXTRACT_REGISTER(bytecode);
+        const u8 bytecode = ctx->program[ctx->pc++];
+        const u32 opcode = EXTRACT_OPCODE(bytecode);
+        const u32 reg_num = EXTRACT_REGISTER(bytecode);
 #define REG (ctx->R[reg_num])
 #define OTHER_REG (ctx->R[reg_num ^ 1])
-      // All instructions have immediate fields, so load them now.
-      const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
-      u32 imm = 0;
-      s32 signed_imm = 0;
-      if (len_field != 0) {
-          const u32 imm_len = 1 << (len_field - 1);
-          imm = decode_imm(ctx, imm_len); // 1st imm, at worst bytes 1-4 past opcode/program_len
-          // Sign extend imm into signed_imm.
-          signed_imm = (s32)(imm << ((4 - imm_len) * 8));
-          signed_imm >>= (4 - imm_len) * 8;
-      }
+        // All instructions have immediate fields, so load them now.
+        const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+        u32 imm = 0;
+        s32 signed_imm = 0;
+        if (len_field != 0) {
+            const u32 imm_len = 1 << (len_field - 1);
+            imm = decode_imm(ctx, imm_len); // 1st imm, at worst bytes 1-4 past opcode/program_len
+            // Sign extend imm into signed_imm.
+            signed_imm = (s32)(imm << ((4 - imm_len) * 8));
+            signed_imm >>= (4 - imm_len) * 8;
+        }
 
-      u32 pktcopy_src_offset = 0;  // used for various pktdatacopy opcodes
-      switch (opcode) {
-          case PASSDROP_OPCODE: {
-              if (len_field > 2) return PASS_PACKET;  // max 64K counters (ie. imm < 64K)
-              if (imm) {
-                  if (4 * imm > ctx->ram_len) return PASS_PACKET;
-                  counter[-(s32)imm]++;
-              }
-              return reg_num ? DROP_PACKET : PASS_PACKET;
+        // See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm.
+#define ARITH_REG (ctx->R[reg_num & ctx->v6])
+        u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
+        s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
+
+        u32 pktcopy_src_offset = 0;  // used for various pktdatacopy opcodes
+        switch (opcode) {
+          case PASSDROP_OPCODE: {  // APFv6+
+            if (len_field > 2) return PASS_PACKET;  // max 64K counters (ie. imm < 64K)
+            if (imm) {
+                if (4 * imm > ctx->ram_len) return PASS_PACKET;
+                counter[-(s32)imm]++;
+            }
+            return reg_num ? DROP_PACKET : PASS_PACKET;
           }
           case LDB_OPCODE:
           case LDH_OPCODE:
@@ -187,324 +192,356 @@
           case LDBX_OPCODE:
           case LDHX_OPCODE:
           case LDWX_OPCODE: {
-              u32 offs = imm;
-              // Note: this can overflow and actually decrease offs.
-              if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
-              ASSERT_IN_PACKET_BOUNDS(offs);
-              u32 load_size = 0;
-              switch (opcode) {
-                  case LDB_OPCODE:
-                  case LDBX_OPCODE:
-                    load_size = 1;
-                    break;
-                  case LDH_OPCODE:
-                  case LDHX_OPCODE:
-                    load_size = 2;
-                    break;
-                  case LDW_OPCODE:
-                  case LDWX_OPCODE:
-                    load_size = 4;
-                    break;
-                  // Immediately enclosing switch statement guarantees
-                  // opcode cannot be any other value.
-              }
-              const u32 end_offs = offs + (load_size - 1);
-              // Catch overflow/wrap-around.
-              ASSERT_RETURN(end_offs >= offs);
-              ASSERT_IN_PACKET_BOUNDS(end_offs);
-              u32 val = 0;
-              while (load_size--) val = (val << 8) | ctx->packet[offs++];
-              REG = val;
-              break;
+            u32 offs = imm;
+            // Note: this can overflow and actually decrease offs.
+            if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
+            ASSERT_IN_PACKET_BOUNDS(offs);
+            u32 load_size = 0;
+            switch (opcode) {
+              case LDB_OPCODE:
+              case LDBX_OPCODE:
+                load_size = 1;
+                break;
+              case LDH_OPCODE:
+              case LDHX_OPCODE:
+                load_size = 2;
+                break;
+              case LDW_OPCODE:
+              case LDWX_OPCODE:
+                load_size = 4;
+                break;
+              // Immediately enclosing switch statement guarantees
+              // opcode cannot be any other value.
+            }
+            const u32 end_offs = offs + (load_size - 1);
+            // Catch overflow/wrap-around.
+            ASSERT_RETURN(end_offs >= offs);
+            ASSERT_IN_PACKET_BOUNDS(end_offs);
+            u32 val = 0;
+            while (load_size--) val = (val << 8) | ctx->packet[offs++];
+            REG = val;
+            break;
           }
           case JMP_OPCODE:
-              if (reg_num && !ctx->v6) {
+            if (reg_num && !ctx->v6) {  // APFv6+
                 // First invocation of APFv6 jmpdata instruction
                 counter[-1] = 0x12345678;  // endianness marker
                 counter[-2]++;  // total packets ++
                 ctx->v6 = (u8)true;
-              }
-              // This can jump backwards. Infinite looping prevented by instructions_remaining.
-              ctx->pc += imm;
-              break;
+            }
+            // This can jump backwards. Infinite looping prevented by instructions_remaining.
+            ctx->pc += imm;
+            break;
           case JEQ_OPCODE:
           case JNE_OPCODE:
           case JGT_OPCODE:
           case JLT_OPCODE:
-          case JSET_OPCODE:
+          case JSET_OPCODE: {
+            // with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op
+            if (len_field == 0) break;
+            // Load second immediate field.
+            u32 cmp_imm = 0;
+            if (reg_num == 1) {
+                cmp_imm = ctx->R[1];
+            } else {
+                u32 cmp_imm_len = 1 << (len_field - 1);
+                cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+            }
+            switch (opcode) {
+              case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
+              case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
+              case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
+              case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
+              case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
+            }
+            break;
+          }
           case JBSMATCH_OPCODE: {
-              // Load second immediate field.
-              u32 cmp_imm = 0;
-              if (reg_num == 1) {
-                  cmp_imm = ctx->R[1];
-              } else if (len_field != 0) {
-                  u32 cmp_imm_len = 1 << (len_field - 1);
-                  cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
-              }
-              switch (opcode) {
-                  case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
-                  case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
-                  case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
-                  case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
-                  case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
-                  case JBSMATCH_OPCODE: {
-                      // cmp_imm is size in bytes of data to compare.
-                      // pc is offset of program bytes to compare.
-                      // imm is jump target offset.
-                      // REG is offset of packet bytes to compare.
-                      if (len_field > 2) return PASS_PACKET; // guarantees cmp_imm <= 0xFFFF
-                      // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
-                      if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
-                      ASSERT_IN_PACKET_BOUNDS(REG);
-                      const u32 last_packet_offs = REG + cmp_imm - 1;
-                      ASSERT_RETURN(last_packet_offs >= REG);
-                      ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                      if (memcmp(ctx->program + ctx->pc, ctx->packet + REG, cmp_imm))
-                          ctx->pc += imm;
-                      // skip past comparison bytes
-                      ctx->pc += cmp_imm;
-                      break;
-                  }
-              }
-              break;
+            // with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op
+            if (len_field == 0) break;
+            // Load second immediate field.
+            u32 cmp_imm_len = 1 << (len_field - 1);
+            u32 cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+            // cmp_imm is size in bytes of data to compare.
+            // pc is offset of program bytes to compare.
+            // imm is jump target offset.
+            // R0 is offset of packet bytes to compare.
+            if (cmp_imm > 0xFFFF) return PASS_PACKET;
+            bool do_jump = !reg_num;
+            // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
+            if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
+            ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+            const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+            ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+            ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+            do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+            // skip past comparison bytes
+            ctx->pc += cmp_imm;
+            if (do_jump) ctx->pc += imm;
+            break;
           }
-          case ADD_OPCODE: ctx->R[0] += reg_num ? ctx->R[1] : imm; break;
-          case MUL_OPCODE: ctx->R[0] *= reg_num ? ctx->R[1] : imm; break;
-          case AND_OPCODE: ctx->R[0] &= reg_num ? ctx->R[1] : imm; break;
-          case OR_OPCODE:  ctx->R[0] |= reg_num ? ctx->R[1] : imm; break;
-          case DIV_OPCODE: {
-              const u32 div_operand = reg_num ? ctx->R[1] : imm;
-              ASSERT_RETURN(div_operand);
-              ctx->R[0] /= div_operand;
-              break;
+          // There is a difference in APFv4 and APFv6 arithmetic behaviour!
+          // APFv4:  R[0] op= Rbit ? R[1] : imm;  (and it thus doesn't make sense to have R=1 && len_field>0)
+          // APFv6+: REG  op= len_field ? imm : OTHER_REG;  (note: this is *DIFFERENT* with R=1 len_field==0)
+          // Furthermore APFv4 uses unsigned imm (except SH), while APFv6 uses signed_imm for ADD/AND/SH.
+          case ADD_OPCODE: ARITH_REG += (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case MUL_OPCODE: ARITH_REG *= arith_imm; break;
+          case AND_OPCODE: ARITH_REG &= (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case OR_OPCODE:  ARITH_REG |= arith_imm; break;
+          case DIV_OPCODE: {  // see above comment!
+            const u32 div_operand = arith_imm;
+            ASSERT_RETURN(div_operand);
+            ARITH_REG /= div_operand;
+            break;
           }
-          case SH_OPCODE: {
-              const s32 shift_val = reg_num ? (s32)ctx->R[1] : signed_imm;
-              if (shift_val > 0)
-                  ctx->R[0] <<= shift_val;
-              else
-                  ctx->R[0] >>= -shift_val;
-              break;
+          case SH_OPCODE: {  // see above comment!
+            if (arith_signed_imm >= 0)
+                ARITH_REG <<= arith_signed_imm;
+            else
+                ARITH_REG >>= -arith_signed_imm;
+            break;
           }
           case LI_OPCODE:
-              REG = (u32)signed_imm;
-              break;
+            REG = (u32)signed_imm;
+            break;
           case PKTDATACOPY_OPCODE:
-              pktcopy_src_offset = imm;
-              imm = PKTDATACOPYIMM_EXT_OPCODE;
-              FALLTHROUGH;
+            pktcopy_src_offset = imm;
+            imm = PKTDATACOPYIMM_EXT_OPCODE;
+            FALLTHROUGH;
           case EXT_OPCODE:
-              if (// imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0
-                  imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
+            if (// imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0
+                imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
                 REG = ctx->mem.slot[imm - LDM_EXT_OPCODE];
-              } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
+            } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
                 ctx->mem.slot[imm - STM_EXT_OPCODE] = REG;
-              } else switch (imm) {
-                  case NOT_EXT_OPCODE: REG = ~REG;      break;
-                  case NEG_EXT_OPCODE: REG = -REG;      break;
-                  case MOV_EXT_OPCODE: REG = OTHER_REG; break;
-                  case SWAP_EXT_OPCODE: {
-                    u32 tmp = REG;
-                    REG = OTHER_REG;
-                    OTHER_REG = tmp;
-                    break;
-                  }
-                  case ALLOCATE_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf == NULL);
-                    if (reg_num == 0) {
-                        ctx->tx_buf_len = REG;
-                    } else {
-                        ctx->tx_buf_len = decode_be16(ctx); // 2nd imm, at worst 6 B past prog_len
-                    }
-                    // checksumming functions requires minimum 266 byte buffer for correctness
-                    if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
-                    ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
-                    if (!ctx->tx_buf) {  // allocate failure
-                        ctx->tx_buf_len = 0;
-                        counter[-3]++;
-                        return PASS_PACKET;
-                    }
-                    memset(ctx->tx_buf, 0, ctx->tx_buf_len);
-                    ctx->mem.named.tx_buf_offset = 0;
-                    break;
-                  case TRANSMIT_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 pkt_len = ctx->mem.named.tx_buf_offset;
-                    // If pkt_len > allocate_buffer_len, it means sth. wrong
-                    // happened and the tx_buf should be deallocated.
-                    if (pkt_len > ctx->tx_buf_len) {
-                        do_discard_buffer(ctx);
-                        return PASS_PACKET;
-                    }
-                    // tx_buf_len cannot be large because we'd run out of RAM,
-                    // so the above unsigned comparison effectively guarantees casting pkt_len
-                    // to a signed value does not result in it going negative.
-                    u8 ip_ofs = DECODE_U8();              // 2nd imm, at worst 5 B past prog_len
-                    u8 csum_ofs = DECODE_U8();            // 3rd imm, at worst 6 B past prog_len
-                    u8 csum_start = 0;
-                    u16 partial_csum = 0;
-                    if (csum_ofs < 255) {
-                        csum_start = DECODE_U8();         // 4th imm, at worst 7 B past prog_len
-                        partial_csum = decode_be16(ctx);  // 5th imm, at worst 9 B past prog_len
-                    }
-                    int dscp = csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
-                                                    partial_csum, csum_start, csum_ofs,
-                                                    (bool)reg_num);
-                    int ret = do_transmit_buffer(ctx, pkt_len, dscp);
-                    if (ret) { counter[-4]++; return PASS_PACKET; } // transmit failure
-                    break;
-                  case EPKTDATACOPYIMM_EXT_OPCODE:  // 41
-                  case EPKTDATACOPYR1_EXT_OPCODE:   // 42
-                    pktcopy_src_offset = ctx->R[0];
-                    FALLTHROUGH;
-                  case PKTDATACOPYIMM_EXT_OPCODE: { // 65536
-                    u32 copy_len = ctx->R[1];
-                    if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
-                        copy_len = DECODE_U8();  // 2nd imm, at worst 8 bytes past prog_len
-                    }
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 dst_offs = ctx->mem.named.tx_buf_offset;
-                    ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
-                    if (reg_num == 0) {  // copy from packet
-                        ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
-                        const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
-                        ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
-                        ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
-                    } else {  // copy from data
-                        ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
-                    }
-                    dst_offs += copy_len;
-                    ctx->mem.named.tx_buf_offset = dst_offs;
-                    break;
-                  }
-                  case JDNSQMATCH_EXT_OPCODE:       // 43
-                  case JDNSAMATCH_EXT_OPCODE:       // 44
-                  case JDNSQMATCHSAFE_EXT_OPCODE:   // 45
-                  case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
-                    const u32 imm_len = 1 << (len_field - 1);
-                    u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
-                    int qtype = -1;
-                    if (imm & 1) { // JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes
-                        qtype = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
-                    }
-                    u32 udp_payload_offset = ctx->R[0];
-                    match_result_type match_rst = match_names(ctx->program + ctx->pc,
-                                                              ctx->program + ctx->program_len,
-                                                              ctx->packet + udp_payload_offset,
-                                                              ctx->packet_len - udp_payload_offset,
-                                                              qtype);
-                    if (match_rst == error_program) return PASS_PACKET;
-                    if (match_rst == error_packet) {
-                        counter[-5]++; // increment error dns packet counter
-                        return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
-                    }
-                    while (ctx->pc + 1 < ctx->program_len &&
-                           (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
-                        ctx->pc++;
-                    }
-                    ctx->pc += 2;  // skip the final double 0 needle end
-                    // relies on reg_num in {0,1} and match_rst being {false=0, true=1}
-                    if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
-                    break;
-                  }
-                  case EWRITE1_EXT_OPCODE:
-                  case EWRITE2_EXT_OPCODE:
-                  case EWRITE4_EXT_OPCODE: {
-                    ASSERT_RETURN(ctx->tx_buf);
-                    const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
-                    ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-                    u32 i;
-                    for (i = 0; i < write_len; ++i) {
-                        ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                            (u8)(REG >> (write_len - 1 - i) * 8);
-                    }
-                    break;
-                  }
-                  default:  // Unknown extended opcode
-                    return PASS_PACKET;  // Bail out
+            } else switch (imm) {
+              case NOT_EXT_OPCODE: REG = ~REG;      break;
+              case NEG_EXT_OPCODE: REG = -REG;      break;
+              case MOV_EXT_OPCODE: REG = OTHER_REG; break;
+              case SWAP_EXT_OPCODE: {
+                u32 tmp = REG;
+                REG = OTHER_REG;
+                OTHER_REG = tmp;
+                break;
               }
-              break;
-          case LDDW_OPCODE: {
-              u32 offs = OTHER_REG + (u32)signed_imm;
-              u32 size = 4;
-              u32 val = 0;
-              // Negative offsets wrap around the end of the address space.
-              // This allows us to efficiently access the end of the
-              // address space with one-byte immediates without using %=.
-              if (offs & 0x80000000) offs += ctx->ram_len;  // unsigned overflow intended
-              ASSERT_IN_DATA_BOUNDS(offs, size);
-              while (size--) val = (val << 8) | ctx->program[offs++];
-              REG = val;
-              break;
-          }
-          case STDW_OPCODE: {
-              u32 offs = OTHER_REG + (u32)signed_imm;
-              u32 size = 4;
-              u32 val = REG;
-              // Negative offsets wrap around the end of the address space.
-              // This allows us to efficiently access the end of the
-              // address space with one-byte immediates without using %=.
-              if (offs & 0x80000000) offs += ctx->ram_len;  // unsigned overflow intended
-              ASSERT_IN_DATA_BOUNDS(offs, size);
-              while (size--) {
-                  ctx->program[offs++] = (val >> 24);
-                  val <<= 8;
+              case ALLOCATE_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf == NULL);
+                if (reg_num == 0) {
+                    ctx->tx_buf_len = REG;
+                } else {
+                    ctx->tx_buf_len = decode_be16(ctx); // 2nd imm, at worst 6 B past prog_len
+                }
+                // checksumming functions requires minimum 266 byte buffer for correctness
+                if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
+                ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
+                if (!ctx->tx_buf) {  // allocate failure
+                    ctx->tx_buf_len = 0;
+                    counter[-3]++;
+                    return PASS_PACKET;
+                }
+                memset(ctx->tx_buf, 0, ctx->tx_buf_len);
+                ctx->mem.named.tx_buf_offset = 0;
+                break;
+              case TRANSMIT_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 pkt_len = ctx->mem.named.tx_buf_offset;
+                // If pkt_len > allocate_buffer_len, it means sth. wrong
+                // happened and the tx_buf should be deallocated.
+                if (pkt_len > ctx->tx_buf_len) {
+                    do_discard_buffer(ctx);
+                    return PASS_PACKET;
+                }
+                // tx_buf_len cannot be large because we'd run out of RAM,
+                // so the above unsigned comparison effectively guarantees casting pkt_len
+                // to a signed value does not result in it going negative.
+                u8 ip_ofs = DECODE_U8();              // 2nd imm, at worst 5 B past prog_len
+                u8 csum_ofs = DECODE_U8();            // 3rd imm, at worst 6 B past prog_len
+                u8 csum_start = 0;
+                u16 partial_csum = 0;
+                if (csum_ofs < 255) {
+                    csum_start = DECODE_U8();         // 4th imm, at worst 7 B past prog_len
+                    partial_csum = decode_be16(ctx);  // 5th imm, at worst 9 B past prog_len
+                }
+                int dscp = csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
+                                                partial_csum, csum_start, csum_ofs,
+                                                (bool)reg_num);
+                int ret = do_transmit_buffer(ctx, pkt_len, dscp);
+                if (ret) { counter[-4]++; return PASS_PACKET; } // transmit failure
+                break;
+              case EPKTDATACOPYIMM_EXT_OPCODE:  // 41
+              case EPKTDATACOPYR1_EXT_OPCODE:   // 42
+                pktcopy_src_offset = ctx->R[0];
+                FALLTHROUGH;
+              case PKTDATACOPYIMM_EXT_OPCODE: { // 65536
+                u32 copy_len = ctx->R[1];
+                if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
+                    copy_len = DECODE_U8();  // 2nd imm, at worst 8 bytes past prog_len
+                }
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 dst_offs = ctx->mem.named.tx_buf_offset;
+                ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
+                if (reg_num == 0) {  // copy from packet
+                    ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
+                    const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
+                    ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
+                    ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
+                } else {  // copy from data
+                    ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
+                }
+                dst_offs += copy_len;
+                ctx->mem.named.tx_buf_offset = dst_offs;
+                break;
               }
-              break;
-          }
+              case JDNSQMATCH_EXT_OPCODE:       // 43
+              case JDNSAMATCH_EXT_OPCODE:       // 44
+              case JDNSQMATCHSAFE_EXT_OPCODE:   // 45
+              case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
+                const u32 imm_len = 1 << (len_field - 1); // EXT_OPCODE, thus len_field > 0
+                u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
+                int qtype = -1;
+                if (imm & 1) { // JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes
+                    qtype = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
+                }
+                u32 udp_payload_offset = ctx->R[0];
+                match_result_type match_rst = match_names(ctx->program + ctx->pc,
+                                                          ctx->program + ctx->program_len,
+                                                          ctx->packet + udp_payload_offset,
+                                                          ctx->packet_len - udp_payload_offset,
+                                                          qtype);
+                if (match_rst == error_program) return PASS_PACKET;
+                if (match_rst == error_packet) {
+                    counter[-5]++; // increment error dns packet counter
+                    return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
+                }
+                while (ctx->pc + 1 < ctx->program_len &&
+                       (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
+                    ctx->pc++;
+                }
+                ctx->pc += 2;  // skip the final double 0 needle end
+                // relies on reg_num in {0,1} and match_rst being {false=0, true=1}
+                if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
+                break;
+              }
+              case EWRITE1_EXT_OPCODE:
+              case EWRITE2_EXT_OPCODE:
+              case EWRITE4_EXT_OPCODE: {
+                ASSERT_RETURN(ctx->tx_buf);
+                const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
+                ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+                u32 i;
+                for (i = 0; i < write_len; ++i) {
+                    ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                        (u8)(REG >> (write_len - 1 - i) * 8);
+                }
+                break;
+              }
+              case JONEOF_EXT_OPCODE: {
+                const u32 imm_len = 1 << (len_field - 1); // ext opcode len_field guaranteed > 0
+                u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
+                u8 imm3 = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
+                bool jmp = imm3 & 1;  // =0 jmp on match, =1 jmp on no match
+                u8 len = ((imm3 >> 1) & 3) + 1;  // size [1..4] in bytes of an element
+                u8 cnt = (imm3 >> 3) + 1;  // number [1..32] of elements in set
+                if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
+                while (cnt--) {
+                    u32 v = 0;
+                    int i;
+                    for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
+                    if (REG == v) jmp ^= true;
+                }
+                if (jmp) ctx->pc += jump_offs;
+                return PASS_PACKET;
+              }
+              default:  // Unknown extended opcode
+                return PASS_PACKET;  // Bail out
+            }
+            break;
+          case LDDW_OPCODE:
+          case STDW_OPCODE:
+            if (ctx->v6) {
+                if (!imm) return PASS_PACKET;
+                if (imm > 0xFFFF) return PASS_PACKET;
+                if (imm * 4 > ctx->ram_len) return PASS_PACKET;
+                if (opcode == LDDW_OPCODE) {
+                    REG = counter[-(s32)imm];
+                } else {
+                    counter[-(s32)imm] = REG;
+                }
+            } else {
+                u32 offs = OTHER_REG + (u32)signed_imm;
+                // Negative offsets wrap around the end of the address space.
+                // This allows us to efficiently access the end of the
+                // address space with one-byte immediates without using %=.
+                if (offs & 0x80000000) offs += ctx->ram_len;  // unsigned overflow intended
+                u32 size = 4;
+                ASSERT_IN_DATA_BOUNDS(offs, size);
+                if (opcode == LDDW_OPCODE) {
+                    u32 val = 0;
+                    while (size--) val = (val << 8) | ctx->program[offs++];
+                    REG = val;
+                } else {
+                    u32 val = REG;
+                    while (size--) {
+                        ctx->program[offs++] = (val >> 24);
+                        val <<= 8;
+                    }
+                }
+            }
+            break;
           case WRITE_OPCODE: {
-              ASSERT_RETURN(ctx->tx_buf);
-              ASSERT_RETURN(len_field);
-              const u32 write_len = 1 << (len_field - 1);
-              ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-              u32 i;
-              for (i = 0; i < write_len; ++i) {
-                  ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                      (u8)(imm >> (write_len - 1 - i) * 8);
-              }
-              break;
+            ASSERT_RETURN(ctx->tx_buf);
+            ASSERT_RETURN(len_field);
+            const u32 write_len = 1 << (len_field - 1);
+            ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+            u32 i;
+            for (i = 0; i < write_len; ++i) {
+                ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                    (u8)(imm >> (write_len - 1 - i) * 8);
+            }
+            break;
           }
           default:  // Unknown opcode
-              return PASS_PACKET;  // Bail out
-      }
-  } while (instructions_remaining--);
-  return PASS_PACKET;
+            return PASS_PACKET;  // Bail out
+        }
+    } while (instructions_remaining--);
+    return PASS_PACKET;
 }
 
 int apf_run(void* ctx, u32* const program, const u32 program_len,
             const u32 ram_len, const u8* const packet,
             const u32 packet_len, const u32 filter_age_16384ths) {
-  // Due to direct 32-bit read/write access to counters at end of ram
-  // APFv6 interpreter requires program & ram_len to be 4 byte aligned.
-  if (3 & (uintptr_t)program) return PASS_PACKET;
-  if (3 & ram_len) return PASS_PACKET;
+    // Due to direct 32-bit read/write access to counters at end of ram
+    // APFv6 interpreter requires program & ram_len to be 4 byte aligned.
+    if (3 & (uintptr_t)program) return PASS_PACKET;
+    if (3 & ram_len) return PASS_PACKET;
 
-  // We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB
-  // Similarly LDDW/STDW have special meaning for negative ram offsets.
-  // We also don't want garbage like program_len == 0xFFFFFFFF
-  if ((program_len | ram_len) >> 31) return PASS_PACKET;
+    // We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB
+    // Similarly LDDW/STDW have special meaning for negative ram offsets.
+    // We also don't want garbage like program_len == 0xFFFFFFFF
+    if ((program_len | ram_len) >> 31) return PASS_PACKET;
 
-  // APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid
-  // This cannot wrap due to previous check.
-  if (program_len + 20 > ram_len) return PASS_PACKET;
+    // APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid
+    // This cannot wrap due to previous check.
+    if (program_len + 20 > ram_len) return PASS_PACKET;
 
-  apf_context apf_ctx = {};
-  apf_ctx.caller_ctx = ctx;
-  apf_ctx.program = (u8*)program;
-  apf_ctx.program_len = program_len;
-  apf_ctx.ram_len = ram_len;
-  apf_ctx.packet = packet;
-  apf_ctx.packet_len = packet_len;
-  // Fill in pre-filled memory slot values.
-  apf_ctx.mem.named.program_size = program_len;
-  apf_ctx.mem.named.ram_len = ram_len;
-  apf_ctx.mem.named.packet_size = packet_len;
-  apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
-  apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
+    apf_context apf_ctx = {};
+    apf_ctx.caller_ctx = ctx;
+    apf_ctx.program = (u8*)program;
+    apf_ctx.program_len = program_len;
+    apf_ctx.ram_len = ram_len;
+    apf_ctx.packet = packet;
+    apf_ctx.packet_len = packet_len;
+    // Fill in pre-filled memory slot values.
+    apf_ctx.mem.named.program_size = program_len;
+    apf_ctx.mem.named.ram_len = ram_len;
+    apf_ctx.mem.named.packet_size = packet_len;
+    apf_ctx.mem.named.apf_version = apf_version();
+    apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
+    apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
 
-  int ret = do_apf_run(&apf_ctx);
-  if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
-  return ret;
+    int ret = do_apf_run(&apf_ctx);
+    if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
+    return ret;
 }
diff --git a/v6/apf_interpreter.c b/v6/apf_interpreter.c
new file mode 100644
index 0000000..ef7a6b8
--- /dev/null
+++ b/v6/apf_interpreter.c
@@ -0,0 +1,2 @@
+APFv6 is not yet finalized.
+There is a beta version available at v5/
diff --git a/v6/apf_interpreter.h b/v6/apf_interpreter.h
new file mode 100644
index 0000000..ef7a6b8
--- /dev/null
+++ b/v6/apf_interpreter.h
@@ -0,0 +1,2 @@
+APFv6 is not yet finalized.
+There is a beta version available at v5/