Snap for 11651058 from f508de53726c4f6e981ace469b0f5f9091dd5f6c to mainline-healthfitness-release

Change-Id: Ib657bf762c741de719da3eb77360522aca8b3a79
diff --git a/v5/apf_interpreter.c b/v5/apf_interpreter.c
index 5f22427..4bba410 100644
--- a/v5/apf_interpreter.c
+++ b/v5/apf_interpreter.c
@@ -672,19 +672,19 @@
 /* Accept packet if not within data bounds */
 #define ASSERT_IN_DATA_BOUNDS(p, size) ASSERT_RETURN(IN_DATA_BOUNDS(p, size))
 
-  /* Counters start at end of RAM and count *backwards* so this array takes negative integers. */
-  u32 *counter = (u32*)(ctx->program + ctx->ram_len);
+    /* Counters start at end of RAM and count *backwards* so this array takes negative integers. */
+    u32 *counter = (u32*)(ctx->program + ctx->ram_len);
 
-  ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
-  /* Only populate if IP version is IPv4. */
-  if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
-      ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
-  }
-  /* Count of instructions remaining to execute. This is done to ensure an */
-  /* upper bound on execution time. It should never be hit and is only for */
-  /* safety. Initialize to the number of bytes in the program which is an */
-  /* upper bound on the number of instructions in the program. */
-  u32 instructions_remaining = ctx->program_len;
+    ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
+    /* Only populate if IP version is IPv4. */
+    if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
+        ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
+    }
+    /* Count of instructions remaining to execute. This is done to ensure an */
+    /* upper bound on execution time. It should never be hit and is only for */
+    /* safety. Initialize to the number of bytes in the program which is an */
+    /* upper bound on the number of instructions in the program. */
+    u32 instructions_remaining = ctx->program_len;
 
 /* Is access to offset |p| length |size| within output buffer bounds? */
 #define IN_OUTPUT_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
@@ -694,43 +694,43 @@
 /* Accept packet if not write within allocated output buffer */
 #define ASSERT_IN_OUTPUT_BOUNDS(p, size) ASSERT_RETURN(IN_OUTPUT_BOUNDS(p, size))
 
-  do {
-      APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
-                     ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
-      if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
-      if (ctx->pc >= ctx->program_len) return PASS_PACKET;
+    do {
+        APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
+                       ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
+        if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
+        if (ctx->pc >= ctx->program_len) return PASS_PACKET;
 
-      const u8 bytecode = ctx->program[ctx->pc++];
-      const u32 opcode = EXTRACT_OPCODE(bytecode);
-      const u32 reg_num = EXTRACT_REGISTER(bytecode);
+        const u8 bytecode = ctx->program[ctx->pc++];
+        const u32 opcode = EXTRACT_OPCODE(bytecode);
+        const u32 reg_num = EXTRACT_REGISTER(bytecode);
 #define REG (ctx->R[reg_num])
 #define OTHER_REG (ctx->R[reg_num ^ 1])
-      /* All instructions have immediate fields, so load them now. */
-      const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
-      u32 imm = 0;
-      s32 signed_imm = 0;
-      if (len_field != 0) {
-          const u32 imm_len = 1 << (len_field - 1);
-          imm = decode_imm(ctx, imm_len); /* 1st imm, at worst bytes 1-4 past opcode/program_len */
-          /* Sign extend imm into signed_imm. */
-          signed_imm = (s32)(imm << ((4 - imm_len) * 8));
-          signed_imm >>= (4 - imm_len) * 8;
-      }
+        /* All instructions have immediate fields, so load them now. */
+        const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+        u32 imm = 0;
+        s32 signed_imm = 0;
+        if (len_field != 0) {
+            const u32 imm_len = 1 << (len_field - 1);
+            imm = decode_imm(ctx, imm_len); /* 1st imm, at worst bytes 1-4 past opcode/program_len */
+            /* Sign extend imm into signed_imm. */
+            signed_imm = (s32)(imm << ((4 - imm_len) * 8));
+            signed_imm >>= (4 - imm_len) * 8;
+        }
 
-      /* See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm. */
+        /* See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm. */
 #define ARITH_REG (ctx->R[reg_num & ctx->v6])
-      u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
-      s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
+        u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
+        s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
 
-      u32 pktcopy_src_offset = 0;  /* used for various pktdatacopy opcodes */
-      switch (opcode) {
+        u32 pktcopy_src_offset = 0;  /* used for various pktdatacopy opcodes */
+        switch (opcode) {
           case PASSDROP_OPCODE: {  /* APFv6+ */
-              if (len_field > 2) return PASS_PACKET;  /* max 64K counters (ie. imm < 64K) */
-              if (imm) {
-                  if (4 * imm > ctx->ram_len) return PASS_PACKET;
-                  counter[-(s32)imm]++;
-              }
-              return reg_num ? DROP_PACKET : PASS_PACKET;
+            if (len_field > 2) return PASS_PACKET;  /* max 64K counters (ie. imm < 64K) */
+            if (imm) {
+                if (4 * imm > ctx->ram_len) return PASS_PACKET;
+                counter[-(s32)imm]++;
+            }
+            return reg_num ? DROP_PACKET : PASS_PACKET;
           }
           case LDB_OPCODE:
           case LDH_OPCODE:
@@ -738,93 +738,93 @@
           case LDBX_OPCODE:
           case LDHX_OPCODE:
           case LDWX_OPCODE: {
-              u32 offs = imm;
-              /* Note: this can overflow and actually decrease offs. */
-              if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
-              ASSERT_IN_PACKET_BOUNDS(offs);
-              u32 load_size = 0;
-              switch (opcode) {
-                  case LDB_OPCODE:
-                  case LDBX_OPCODE:
-                    load_size = 1;
-                    break;
-                  case LDH_OPCODE:
-                  case LDHX_OPCODE:
-                    load_size = 2;
-                    break;
-                  case LDW_OPCODE:
-                  case LDWX_OPCODE:
-                    load_size = 4;
-                    break;
-                  /* Immediately enclosing switch statement guarantees */
-                  /* opcode cannot be any other value. */
-              }
-              const u32 end_offs = offs + (load_size - 1);
-              /* Catch overflow/wrap-around. */
-              ASSERT_RETURN(end_offs >= offs);
-              ASSERT_IN_PACKET_BOUNDS(end_offs);
-              u32 val = 0;
-              while (load_size--) val = (val << 8) | ctx->packet[offs++];
-              REG = val;
-              break;
+            u32 offs = imm;
+            /* Note: this can overflow and actually decrease offs. */
+            if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
+            ASSERT_IN_PACKET_BOUNDS(offs);
+            u32 load_size = 0;
+            switch (opcode) {
+              case LDB_OPCODE:
+              case LDBX_OPCODE:
+                load_size = 1;
+                break;
+              case LDH_OPCODE:
+              case LDHX_OPCODE:
+                load_size = 2;
+                break;
+              case LDW_OPCODE:
+              case LDWX_OPCODE:
+                load_size = 4;
+                break;
+              /* Immediately enclosing switch statement guarantees */
+              /* opcode cannot be any other value. */
+            }
+            const u32 end_offs = offs + (load_size - 1);
+            /* Catch overflow/wrap-around. */
+            ASSERT_RETURN(end_offs >= offs);
+            ASSERT_IN_PACKET_BOUNDS(end_offs);
+            u32 val = 0;
+            while (load_size--) val = (val << 8) | ctx->packet[offs++];
+            REG = val;
+            break;
           }
           case JMP_OPCODE:
-              if (reg_num && !ctx->v6) {  /* APFv6+ */
+            if (reg_num && !ctx->v6) {  /* APFv6+ */
                 /* First invocation of APFv6 jmpdata instruction */
                 counter[-1] = 0x12345678;  /* endianness marker */
                 counter[-2]++;  /* total packets ++ */
                 ctx->v6 = (u8)True;
-              }
-              /* This can jump backwards. Infinite looping prevented by instructions_remaining. */
-              ctx->pc += imm;
-              break;
+            }
+            /* This can jump backwards. Infinite looping prevented by instructions_remaining. */
+            ctx->pc += imm;
+            break;
           case JEQ_OPCODE:
           case JNE_OPCODE:
           case JGT_OPCODE:
           case JLT_OPCODE:
           case JSET_OPCODE: {
-              /* with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op */
-              if (len_field == 0) break;
-              /* Load second immediate field. */
-              u32 cmp_imm = 0;
-              if (reg_num == 1) {
-                  cmp_imm = ctx->R[1];
-              } else {
-                  u32 cmp_imm_len = 1 << (len_field - 1);
-                  cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
-              }
-              switch (opcode) {
-                  case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
-                  case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
-                  case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
-                  case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
-                  case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
-              }
-              break;
+            /* with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op */
+            if (len_field == 0) break;
+            /* Load second immediate field. */
+            u32 cmp_imm = 0;
+            if (reg_num == 1) {
+                cmp_imm = ctx->R[1];
+            } else {
+                u32 cmp_imm_len = 1 << (len_field - 1);
+                cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+            }
+            switch (opcode) {
+              case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
+              case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
+              case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
+              case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
+              case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
+            }
+            break;
           }
           case JBSMATCH_OPCODE: {
-              /* with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op */
-              if (len_field == 0) break;
-              /* Load second immediate field. */
-              u32 cmp_imm_len = 1 << (len_field - 1);
-              u32 cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
-              /* cmp_imm is size in bytes of data to compare. */
-              /* pc is offset of program bytes to compare. */
-              /* imm is jump target offset. */
-              /* R0 is offset of packet bytes to compare. */
-              if (cmp_imm > 0xFFFF) return PASS_PACKET;
-              Boolean do_jump = !reg_num;
-              /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
-              if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
-              ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
-              const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
-              ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
-              ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-              do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
-              /* skip past comparison bytes */
-              ctx->pc += cmp_imm;
-              if (do_jump) ctx->pc += imm;
-              break;
+            /* with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op */
+            if (len_field == 0) break;
+            /* Load second immediate field. */
+            u32 cmp_imm_len = 1 << (len_field - 1);
+            u32 cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+            /* cmp_imm is size in bytes of data to compare. */
+            /* pc is offset of program bytes to compare. */
+            /* imm is jump target offset. */
+            /* R0 is offset of packet bytes to compare. */
+            if (cmp_imm > 0xFFFF) return PASS_PACKET;
+            Boolean do_jump = !reg_num;
+            /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
+            if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
+            ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+            const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+            ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+            ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+            do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+            /* skip past comparison bytes */
+            ctx->pc += cmp_imm;
+            if (do_jump) ctx->pc += imm;
+            break;
           }
           /* There is a difference in APFv4 and APFv6 arithmetic behaviour! */
           /* APFv4:  R[0] op= Rbit ? R[1] : imm;  (and it thus doesn't make sense to have R=1 && len_field>0) */
@@ -835,259 +835,259 @@
           case AND_OPCODE: ARITH_REG &= (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
           case OR_OPCODE:  ARITH_REG |= arith_imm; break;
           case DIV_OPCODE: {  /* see above comment! */
-              const u32 div_operand = arith_imm;
-              ASSERT_RETURN(div_operand);
-              ARITH_REG /= div_operand;
-              break;
+            const u32 div_operand = arith_imm;
+            ASSERT_RETURN(div_operand);
+            ARITH_REG /= div_operand;
+            break;
           }
           case SH_OPCODE: {  /* see above comment! */
-              if (arith_signed_imm >= 0)
-                  ARITH_REG <<= arith_signed_imm;
-              else
-                  ARITH_REG >>= -arith_signed_imm;
-              break;
+            if (arith_signed_imm >= 0)
+                ARITH_REG <<= arith_signed_imm;
+            else
+                ARITH_REG >>= -arith_signed_imm;
+            break;
           }
           case LI_OPCODE:
-              REG = (u32)signed_imm;
-              break;
+            REG = (u32)signed_imm;
+            break;
           case PKTDATACOPY_OPCODE:
-              pktcopy_src_offset = imm;
-              imm = PKTDATACOPYIMM_EXT_OPCODE;
-              FALLTHROUGH;
+            pktcopy_src_offset = imm;
+            imm = PKTDATACOPYIMM_EXT_OPCODE;
+            FALLTHROUGH;
           case EXT_OPCODE:
-              if (/* imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0 */
-                  imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
+            if (/* imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0 */
+                imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
                 REG = ctx->mem.slot[imm - LDM_EXT_OPCODE];
-              } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
+            } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
                 ctx->mem.slot[imm - STM_EXT_OPCODE] = REG;
-              } else switch (imm) {
-                  case NOT_EXT_OPCODE: REG = ~REG;      break;
-                  case NEG_EXT_OPCODE: REG = -REG;      break;
-                  case MOV_EXT_OPCODE: REG = OTHER_REG; break;
-                  case SWAP_EXT_OPCODE: {
-                    u32 tmp = REG;
-                    REG = OTHER_REG;
-                    OTHER_REG = tmp;
-                    break;
-                  }
-                  case ALLOCATE_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf == NULL);
-                    if (reg_num == 0) {
-                        ctx->tx_buf_len = REG;
-                    } else {
-                        ctx->tx_buf_len = decode_be16(ctx); /* 2nd imm, at worst 6 B past prog_len */
-                    }
-                    /* checksumming functions requires minimum 266 byte buffer for correctness */
-                    if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
-                    ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
-                    if (!ctx->tx_buf) {  /* allocate failure */
-                        ctx->tx_buf_len = 0;
-                        counter[-3]++;
-                        return PASS_PACKET;
-                    }
-                    memset(ctx->tx_buf, 0, ctx->tx_buf_len);
-                    ctx->mem.named.tx_buf_offset = 0;
-                    break;
-                  case TRANSMIT_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 pkt_len = ctx->mem.named.tx_buf_offset;
-                    /* If pkt_len > allocate_buffer_len, it means sth. wrong */
-                    /* happened and the tx_buf should be deallocated. */
-                    if (pkt_len > ctx->tx_buf_len) {
-                        do_discard_buffer(ctx);
-                        return PASS_PACKET;
-                    }
-                    /* tx_buf_len cannot be large because we'd run out of RAM, */
-                    /* so the above unsigned comparison effectively guarantees casting pkt_len */
-                    /* to a signed value does not result in it going negative. */
-                    u8 ip_ofs = DECODE_U8();              /* 2nd imm, at worst 5 B past prog_len */
-                    u8 csum_ofs = DECODE_U8();            /* 3rd imm, at worst 6 B past prog_len */
-                    u8 csum_start = 0;
-                    u16 partial_csum = 0;
-                    if (csum_ofs < 255) {
-                        csum_start = DECODE_U8();         /* 4th imm, at worst 7 B past prog_len */
-                        partial_csum = decode_be16(ctx);  /* 5th imm, at worst 9 B past prog_len */
-                    }
-                    int dscp = apf_internal_csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
-                                                    partial_csum, csum_start, csum_ofs,
-                                                    (Boolean)reg_num);
-                    int ret = apf_internal_do_transmit_buffer(ctx, pkt_len, dscp);
-                    if (ret) { counter[-4]++; return PASS_PACKET; } /* transmit failure */
-                    break;
-                  case EPKTDATACOPYIMM_EXT_OPCODE:  /* 41 */
-                  case EPKTDATACOPYR1_EXT_OPCODE:   /* 42 */
-                    pktcopy_src_offset = ctx->R[0];
-                    FALLTHROUGH;
-                  case PKTDATACOPYIMM_EXT_OPCODE: { /* 65536 */
-                    u32 copy_len = ctx->R[1];
-                    if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
-                        copy_len = DECODE_U8();  /* 2nd imm, at worst 8 bytes past prog_len */
-                    }
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 dst_offs = ctx->mem.named.tx_buf_offset;
-                    ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
-                    if (reg_num == 0) {  /* copy from packet */
-                        ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
-                        const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
-                        ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
-                        ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
-                    } else {  /* copy from data */
-                        ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
-                    }
-                    dst_offs += copy_len;
-                    ctx->mem.named.tx_buf_offset = dst_offs;
-                    break;
-                  }
-                  case JDNSQMATCH_EXT_OPCODE:       /* 43 */
-                  case JDNSAMATCH_EXT_OPCODE:       /* 44 */
-                  case JDNSQMATCHSAFE_EXT_OPCODE:   /* 45 */
-                  case JDNSAMATCHSAFE_EXT_OPCODE: { /* 46 */
-                    const u32 imm_len = 1 << (len_field - 1); /* EXT_OPCODE, thus len_field > 0 */
-                    u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
-                    int qtype = -1;
-                    if (imm & 1) { /* JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes */
-                        qtype = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
-                    }
-                    u32 udp_payload_offset = ctx->R[0];
-                    match_result_type match_rst = apf_internal_match_names(ctx->program + ctx->pc,
-                                                              ctx->program + ctx->program_len,
-                                                              ctx->packet + udp_payload_offset,
-                                                              ctx->packet_len - udp_payload_offset,
-                                                              qtype);
-                    if (match_rst == error_program) return PASS_PACKET;
-                    if (match_rst == error_packet) {
-                        counter[-5]++; /* increment error dns packet counter */
-                        return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
-                    }
-                    while (ctx->pc + 1 < ctx->program_len &&
-                           (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
-                        ctx->pc++;
-                    }
-                    ctx->pc += 2;  /* skip the final double 0 needle end */
-                    /* relies on reg_num in {0,1} and match_rst being {False=0, True=1} */
-                    if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
-                    break;
-                  }
-                  case EWRITE1_EXT_OPCODE:
-                  case EWRITE2_EXT_OPCODE:
-                  case EWRITE4_EXT_OPCODE: {
-                    ASSERT_RETURN(ctx->tx_buf);
-                    const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
-                    ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-                    u32 i;
-                    for (i = 0; i < write_len; ++i) {
-                        ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                            (u8)(REG >> (write_len - 1 - i) * 8);
-                    }
-                    break;
-                  }
-                  case JONEOF_EXT_OPCODE: {
-                    const u32 imm_len = 1 << (len_field - 1); /* ext opcode len_field guaranteed > 0 */
-                    u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
-                    u8 imm3 = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
-                    Boolean jmp = imm3 & 1;  /* =0 jmp on match, =1 jmp on no match */
-                    u8 len = ((imm3 >> 1) & 3) + 1;  /* size [1..4] in bytes of an element */
-                    u8 cnt = (imm3 >> 3) + 1;  /* number [1..32] of elements in set */
-                    if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
-                    while (cnt--) {
-                        u32 v = 0;
-                        int i;
-                        for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
-                        if (REG == v) jmp ^= True;
-                    }
-                    if (jmp) ctx->pc += jump_offs;
-                    return PASS_PACKET;
-                  }
-                  default:  /* Unknown extended opcode */
-                    return PASS_PACKET;  /* Bail out */
+            } else switch (imm) {
+              case NOT_EXT_OPCODE: REG = ~REG;      break;
+              case NEG_EXT_OPCODE: REG = -REG;      break;
+              case MOV_EXT_OPCODE: REG = OTHER_REG; break;
+              case SWAP_EXT_OPCODE: {
+                u32 tmp = REG;
+                REG = OTHER_REG;
+                OTHER_REG = tmp;
+                break;
               }
-              break;
+              case ALLOCATE_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf == NULL);
+                if (reg_num == 0) {
+                    ctx->tx_buf_len = REG;
+                } else {
+                    ctx->tx_buf_len = decode_be16(ctx); /* 2nd imm, at worst 6 B past prog_len */
+                }
+                /* checksumming functions requires minimum 266 byte buffer for correctness */
+                if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
+                ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
+                if (!ctx->tx_buf) {  /* allocate failure */
+                    ctx->tx_buf_len = 0;
+                    counter[-3]++;
+                    return PASS_PACKET;
+                }
+                memset(ctx->tx_buf, 0, ctx->tx_buf_len);
+                ctx->mem.named.tx_buf_offset = 0;
+                break;
+              case TRANSMIT_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 pkt_len = ctx->mem.named.tx_buf_offset;
+                /* If pkt_len > allocate_buffer_len, it means sth. wrong */
+                /* happened and the tx_buf should be deallocated. */
+                if (pkt_len > ctx->tx_buf_len) {
+                    do_discard_buffer(ctx);
+                    return PASS_PACKET;
+                }
+                /* tx_buf_len cannot be large because we'd run out of RAM, */
+                /* so the above unsigned comparison effectively guarantees casting pkt_len */
+                /* to a signed value does not result in it going negative. */
+                u8 ip_ofs = DECODE_U8();              /* 2nd imm, at worst 5 B past prog_len */
+                u8 csum_ofs = DECODE_U8();            /* 3rd imm, at worst 6 B past prog_len */
+                u8 csum_start = 0;
+                u16 partial_csum = 0;
+                if (csum_ofs < 255) {
+                    csum_start = DECODE_U8();         /* 4th imm, at worst 7 B past prog_len */
+                    partial_csum = decode_be16(ctx);  /* 5th imm, at worst 9 B past prog_len */
+                }
+                int dscp = apf_internal_csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
+                                                partial_csum, csum_start, csum_ofs,
+                                                (Boolean)reg_num);
+                int ret = apf_internal_do_transmit_buffer(ctx, pkt_len, dscp);
+                if (ret) { counter[-4]++; return PASS_PACKET; } /* transmit failure */
+                break;
+              case EPKTDATACOPYIMM_EXT_OPCODE:  /* 41 */
+              case EPKTDATACOPYR1_EXT_OPCODE:   /* 42 */
+                pktcopy_src_offset = ctx->R[0];
+                FALLTHROUGH;
+              case PKTDATACOPYIMM_EXT_OPCODE: { /* 65536 */
+                u32 copy_len = ctx->R[1];
+                if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
+                    copy_len = DECODE_U8();  /* 2nd imm, at worst 8 bytes past prog_len */
+                }
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 dst_offs = ctx->mem.named.tx_buf_offset;
+                ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
+                if (reg_num == 0) {  /* copy from packet */
+                    ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
+                    const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
+                    ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
+                    ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
+                } else {  /* copy from data */
+                    ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
+                }
+                dst_offs += copy_len;
+                ctx->mem.named.tx_buf_offset = dst_offs;
+                break;
+              }
+              case JDNSQMATCH_EXT_OPCODE:       /* 43 */
+              case JDNSAMATCH_EXT_OPCODE:       /* 44 */
+              case JDNSQMATCHSAFE_EXT_OPCODE:   /* 45 */
+              case JDNSAMATCHSAFE_EXT_OPCODE: { /* 46 */
+                const u32 imm_len = 1 << (len_field - 1); /* EXT_OPCODE, thus len_field > 0 */
+                u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
+                int qtype = -1;
+                if (imm & 1) { /* JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes */
+                    qtype = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
+                }
+                u32 udp_payload_offset = ctx->R[0];
+                match_result_type match_rst = apf_internal_match_names(ctx->program + ctx->pc,
+                                                          ctx->program + ctx->program_len,
+                                                          ctx->packet + udp_payload_offset,
+                                                          ctx->packet_len - udp_payload_offset,
+                                                          qtype);
+                if (match_rst == error_program) return PASS_PACKET;
+                if (match_rst == error_packet) {
+                    counter[-5]++; /* increment error dns packet counter */
+                    return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
+                }
+                while (ctx->pc + 1 < ctx->program_len &&
+                       (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
+                    ctx->pc++;
+                }
+                ctx->pc += 2;  /* skip the final double 0 needle end */
+                /* relies on reg_num in {0,1} and match_rst being {False=0, True=1} */
+                if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
+                break;
+              }
+              case EWRITE1_EXT_OPCODE:
+              case EWRITE2_EXT_OPCODE:
+              case EWRITE4_EXT_OPCODE: {
+                ASSERT_RETURN(ctx->tx_buf);
+                const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
+                ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+                u32 i;
+                for (i = 0; i < write_len; ++i) {
+                    ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                        (u8)(REG >> (write_len - 1 - i) * 8);
+                }
+                break;
+              }
+              case JONEOF_EXT_OPCODE: {
+                const u32 imm_len = 1 << (len_field - 1); /* ext opcode len_field guaranteed > 0 */
+                u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
+                u8 imm3 = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
+                Boolean jmp = imm3 & 1;  /* =0 jmp on match, =1 jmp on no match */
+                u8 len = ((imm3 >> 1) & 3) + 1;  /* size [1..4] in bytes of an element */
+                u8 cnt = (imm3 >> 3) + 1;  /* number [1..32] of elements in set */
+                if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
+                while (cnt--) {
+                    u32 v = 0;
+                    int i;
+                    for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
+                    if (REG == v) jmp ^= True;
+                }
+                if (jmp) ctx->pc += jump_offs;
+                return PASS_PACKET;
+              }
+              default:  /* Unknown extended opcode */
+                return PASS_PACKET;  /* Bail out */
+            }
+            break;
           case LDDW_OPCODE:
           case STDW_OPCODE:
-              if (ctx->v6) {
-                  if (!imm) return PASS_PACKET;
-                  if (imm > 0xFFFF) return PASS_PACKET;
-                  if (imm * 4 > ctx->ram_len) return PASS_PACKET;
-                  if (opcode == LDDW_OPCODE) {
-                     REG = counter[-(s32)imm];
-                  } else {
-                     counter[-(s32)imm] = REG;
-                  }
-              } else {
-                  u32 offs = OTHER_REG + (u32)signed_imm;
-                  /* Negative offsets wrap around the end of the address space. */
-                  /* This allows us to efficiently access the end of the */
-                  /* address space with one-byte immediates without using %=. */
-                  if (offs & 0x80000000) offs += ctx->ram_len;  /* unsigned overflow intended */
-                  u32 size = 4;
-                  ASSERT_IN_DATA_BOUNDS(offs, size);
-                  if (opcode == LDDW_OPCODE) {
-                      u32 val = 0;
-                      while (size--) val = (val << 8) | ctx->program[offs++];
-                      REG = val;
-                  } else {
-                      u32 val = REG;
-                      while (size--) {
-                          ctx->program[offs++] = (val >> 24);
-                          val <<= 8;
-                      }
-                  }
-              }
-              break;
+            if (ctx->v6) {
+                if (!imm) return PASS_PACKET;
+                if (imm > 0xFFFF) return PASS_PACKET;
+                if (imm * 4 > ctx->ram_len) return PASS_PACKET;
+                if (opcode == LDDW_OPCODE) {
+                    REG = counter[-(s32)imm];
+                } else {
+                    counter[-(s32)imm] = REG;
+                }
+            } else {
+                u32 offs = OTHER_REG + (u32)signed_imm;
+                /* Negative offsets wrap around the end of the address space. */
+                /* This allows us to efficiently access the end of the */
+                /* address space with one-byte immediates without using %=. */
+                if (offs & 0x80000000) offs += ctx->ram_len;  /* unsigned overflow intended */
+                u32 size = 4;
+                ASSERT_IN_DATA_BOUNDS(offs, size);
+                if (opcode == LDDW_OPCODE) {
+                    u32 val = 0;
+                    while (size--) val = (val << 8) | ctx->program[offs++];
+                    REG = val;
+                } else {
+                    u32 val = REG;
+                    while (size--) {
+                        ctx->program[offs++] = (val >> 24);
+                        val <<= 8;
+                    }
+                }
+            }
+            break;
           case WRITE_OPCODE: {
-              ASSERT_RETURN(ctx->tx_buf);
-              ASSERT_RETURN(len_field);
-              const u32 write_len = 1 << (len_field - 1);
-              ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-              u32 i;
-              for (i = 0; i < write_len; ++i) {
-                  ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                      (u8)(imm >> (write_len - 1 - i) * 8);
-              }
-              break;
+            ASSERT_RETURN(ctx->tx_buf);
+            ASSERT_RETURN(len_field);
+            const u32 write_len = 1 << (len_field - 1);
+            ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+            u32 i;
+            for (i = 0; i < write_len; ++i) {
+                ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                    (u8)(imm >> (write_len - 1 - i) * 8);
+            }
+            break;
           }
           default:  /* Unknown opcode */
-              return PASS_PACKET;  /* Bail out */
-      }
-  } while (instructions_remaining--);
-  return PASS_PACKET;
+            return PASS_PACKET;  /* Bail out */
+        }
+    } while (instructions_remaining--);
+    return PASS_PACKET;
 }
 
 int apf_run(void* ctx, u32* const program, const u32 program_len,
             const u32 ram_len, const u8* const packet,
             const u32 packet_len, const u32 filter_age_16384ths) {
-  /* Due to direct 32-bit read/write access to counters at end of ram */
-  /* APFv6 interpreter requires program & ram_len to be 4 byte aligned. */
-  if (3 & (uintptr_t)program) return PASS_PACKET;
-  if (3 & ram_len) return PASS_PACKET;
+    /* Due to direct 32-bit read/write access to counters at end of ram */
+    /* APFv6 interpreter requires program & ram_len to be 4 byte aligned. */
+    if (3 & (uintptr_t)program) return PASS_PACKET;
+    if (3 & ram_len) return PASS_PACKET;
 
-  /* We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB */
-  /* Similarly LDDW/STDW have special meaning for negative ram offsets. */
-  /* We also don't want garbage like program_len == 0xFFFFFFFF */
-  if ((program_len | ram_len) >> 31) return PASS_PACKET;
+    /* We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB */
+    /* Similarly LDDW/STDW have special meaning for negative ram offsets. */
+    /* We also don't want garbage like program_len == 0xFFFFFFFF */
+    if ((program_len | ram_len) >> 31) return PASS_PACKET;
 
-  /* APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid */
-  /* This cannot wrap due to previous check. */
-  if (program_len + 20 > ram_len) return PASS_PACKET;
+    /* APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid */
+    /* This cannot wrap due to previous check. */
+    if (program_len + 20 > ram_len) return PASS_PACKET;
 
-  apf_context apf_ctx = {};
-  apf_ctx.caller_ctx = ctx;
-  apf_ctx.program = (u8*)program;
-  apf_ctx.program_len = program_len;
-  apf_ctx.ram_len = ram_len;
-  apf_ctx.packet = packet;
-  apf_ctx.packet_len = packet_len;
-  /* Fill in pre-filled memory slot values. */
-  apf_ctx.mem.named.program_size = program_len;
-  apf_ctx.mem.named.ram_len = ram_len;
-  apf_ctx.mem.named.packet_size = packet_len;
-  apf_ctx.mem.named.apf_version = apf_version();
-  apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
-  apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
+    apf_context apf_ctx = {};
+    apf_ctx.caller_ctx = ctx;
+    apf_ctx.program = (u8*)program;
+    apf_ctx.program_len = program_len;
+    apf_ctx.ram_len = ram_len;
+    apf_ctx.packet = packet;
+    apf_ctx.packet_len = packet_len;
+    /* Fill in pre-filled memory slot values. */
+    apf_ctx.mem.named.program_size = program_len;
+    apf_ctx.mem.named.ram_len = ram_len;
+    apf_ctx.mem.named.packet_size = packet_len;
+    apf_ctx.mem.named.apf_version = apf_version();
+    apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
+    apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
 
-  int ret = do_apf_run(&apf_ctx);
-  if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
-  return ret;
+    int ret = do_apf_run(&apf_ctx);
+    if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
+    return ret;
 }
diff --git a/v5/apf_interpreter_source.c b/v5/apf_interpreter_source.c
index 7438029..197c152 100644
--- a/v5/apf_interpreter_source.c
+++ b/v5/apf_interpreter_source.c
@@ -126,19 +126,19 @@
 // Accept packet if not within data bounds
 #define ASSERT_IN_DATA_BOUNDS(p, size) ASSERT_RETURN(IN_DATA_BOUNDS(p, size))
 
-  // Counters start at end of RAM and count *backwards* so this array takes negative integers.
-  u32 *counter = (u32*)(ctx->program + ctx->ram_len);
+    // Counters start at end of RAM and count *backwards* so this array takes negative integers.
+    u32 *counter = (u32*)(ctx->program + ctx->ram_len);
 
-  ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
-  // Only populate if IP version is IPv4.
-  if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
-      ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
-  }
-  // Count of instructions remaining to execute. This is done to ensure an
-  // upper bound on execution time. It should never be hit and is only for
-  // safety. Initialize to the number of bytes in the program which is an
-  // upper bound on the number of instructions in the program.
-  u32 instructions_remaining = ctx->program_len;
+    ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
+    // Only populate if IP version is IPv4.
+    if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
+        ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
+    }
+    // Count of instructions remaining to execute. This is done to ensure an
+    // upper bound on execution time. It should never be hit and is only for
+    // safety. Initialize to the number of bytes in the program which is an
+    // upper bound on the number of instructions in the program.
+    u32 instructions_remaining = ctx->program_len;
 
 // Is access to offset |p| length |size| within output buffer bounds?
 #define IN_OUTPUT_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
@@ -148,43 +148,43 @@
 // Accept packet if not write within allocated output buffer
 #define ASSERT_IN_OUTPUT_BOUNDS(p, size) ASSERT_RETURN(IN_OUTPUT_BOUNDS(p, size))
 
-  do {
-      APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
-                     ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
-      if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
-      if (ctx->pc >= ctx->program_len) return PASS_PACKET;
+    do {
+        APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
+                       ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
+        if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
+        if (ctx->pc >= ctx->program_len) return PASS_PACKET;
 
-      const u8 bytecode = ctx->program[ctx->pc++];
-      const u32 opcode = EXTRACT_OPCODE(bytecode);
-      const u32 reg_num = EXTRACT_REGISTER(bytecode);
+        const u8 bytecode = ctx->program[ctx->pc++];
+        const u32 opcode = EXTRACT_OPCODE(bytecode);
+        const u32 reg_num = EXTRACT_REGISTER(bytecode);
 #define REG (ctx->R[reg_num])
 #define OTHER_REG (ctx->R[reg_num ^ 1])
-      // All instructions have immediate fields, so load them now.
-      const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
-      u32 imm = 0;
-      s32 signed_imm = 0;
-      if (len_field != 0) {
-          const u32 imm_len = 1 << (len_field - 1);
-          imm = decode_imm(ctx, imm_len); // 1st imm, at worst bytes 1-4 past opcode/program_len
-          // Sign extend imm into signed_imm.
-          signed_imm = (s32)(imm << ((4 - imm_len) * 8));
-          signed_imm >>= (4 - imm_len) * 8;
-      }
+        // All instructions have immediate fields, so load them now.
+        const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+        u32 imm = 0;
+        s32 signed_imm = 0;
+        if (len_field != 0) {
+            const u32 imm_len = 1 << (len_field - 1);
+            imm = decode_imm(ctx, imm_len); // 1st imm, at worst bytes 1-4 past opcode/program_len
+            // Sign extend imm into signed_imm.
+            signed_imm = (s32)(imm << ((4 - imm_len) * 8));
+            signed_imm >>= (4 - imm_len) * 8;
+        }
 
-      // See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm.
+        // See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm.
 #define ARITH_REG (ctx->R[reg_num & ctx->v6])
-      u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
-      s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
+        u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
+        s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
 
-      u32 pktcopy_src_offset = 0;  // used for various pktdatacopy opcodes
-      switch (opcode) {
+        u32 pktcopy_src_offset = 0;  // used for various pktdatacopy opcodes
+        switch (opcode) {
           case PASSDROP_OPCODE: {  // APFv6+
-              if (len_field > 2) return PASS_PACKET;  // max 64K counters (ie. imm < 64K)
-              if (imm) {
-                  if (4 * imm > ctx->ram_len) return PASS_PACKET;
-                  counter[-(s32)imm]++;
-              }
-              return reg_num ? DROP_PACKET : PASS_PACKET;
+            if (len_field > 2) return PASS_PACKET;  // max 64K counters (ie. imm < 64K)
+            if (imm) {
+                if (4 * imm > ctx->ram_len) return PASS_PACKET;
+                counter[-(s32)imm]++;
+            }
+            return reg_num ? DROP_PACKET : PASS_PACKET;
           }
           case LDB_OPCODE:
           case LDH_OPCODE:
@@ -192,93 +192,93 @@
           case LDBX_OPCODE:
           case LDHX_OPCODE:
           case LDWX_OPCODE: {
-              u32 offs = imm;
-              // Note: this can overflow and actually decrease offs.
-              if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
-              ASSERT_IN_PACKET_BOUNDS(offs);
-              u32 load_size = 0;
-              switch (opcode) {
-                  case LDB_OPCODE:
-                  case LDBX_OPCODE:
-                    load_size = 1;
-                    break;
-                  case LDH_OPCODE:
-                  case LDHX_OPCODE:
-                    load_size = 2;
-                    break;
-                  case LDW_OPCODE:
-                  case LDWX_OPCODE:
-                    load_size = 4;
-                    break;
-                  // Immediately enclosing switch statement guarantees
-                  // opcode cannot be any other value.
-              }
-              const u32 end_offs = offs + (load_size - 1);
-              // Catch overflow/wrap-around.
-              ASSERT_RETURN(end_offs >= offs);
-              ASSERT_IN_PACKET_BOUNDS(end_offs);
-              u32 val = 0;
-              while (load_size--) val = (val << 8) | ctx->packet[offs++];
-              REG = val;
-              break;
+            u32 offs = imm;
+            // Note: this can overflow and actually decrease offs.
+            if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
+            ASSERT_IN_PACKET_BOUNDS(offs);
+            u32 load_size = 0;
+            switch (opcode) {
+              case LDB_OPCODE:
+              case LDBX_OPCODE:
+                load_size = 1;
+                break;
+              case LDH_OPCODE:
+              case LDHX_OPCODE:
+                load_size = 2;
+                break;
+              case LDW_OPCODE:
+              case LDWX_OPCODE:
+                load_size = 4;
+                break;
+              // Immediately enclosing switch statement guarantees
+              // opcode cannot be any other value.
+            }
+            const u32 end_offs = offs + (load_size - 1);
+            // Catch overflow/wrap-around.
+            ASSERT_RETURN(end_offs >= offs);
+            ASSERT_IN_PACKET_BOUNDS(end_offs);
+            u32 val = 0;
+            while (load_size--) val = (val << 8) | ctx->packet[offs++];
+            REG = val;
+            break;
           }
           case JMP_OPCODE:
-              if (reg_num && !ctx->v6) {  // APFv6+
+            if (reg_num && !ctx->v6) {  // APFv6+
                 // First invocation of APFv6 jmpdata instruction
                 counter[-1] = 0x12345678;  // endianness marker
                 counter[-2]++;  // total packets ++
                 ctx->v6 = (u8)true;
-              }
-              // This can jump backwards. Infinite looping prevented by instructions_remaining.
-              ctx->pc += imm;
-              break;
+            }
+            // This can jump backwards. Infinite looping prevented by instructions_remaining.
+            ctx->pc += imm;
+            break;
           case JEQ_OPCODE:
           case JNE_OPCODE:
           case JGT_OPCODE:
           case JLT_OPCODE:
           case JSET_OPCODE: {
-              // with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op
-              if (len_field == 0) break;
-              // Load second immediate field.
-              u32 cmp_imm = 0;
-              if (reg_num == 1) {
-                  cmp_imm = ctx->R[1];
-              } else {
-                  u32 cmp_imm_len = 1 << (len_field - 1);
-                  cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
-              }
-              switch (opcode) {
-                  case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
-                  case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
-                  case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
-                  case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
-                  case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
-              }
-              break;
+            // with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op
+            if (len_field == 0) break;
+            // Load second immediate field.
+            u32 cmp_imm = 0;
+            if (reg_num == 1) {
+                cmp_imm = ctx->R[1];
+            } else {
+                u32 cmp_imm_len = 1 << (len_field - 1);
+                cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+            }
+            switch (opcode) {
+              case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
+              case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
+              case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
+              case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
+              case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
+            }
+            break;
           }
           case JBSMATCH_OPCODE: {
-              // with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op
-              if (len_field == 0) break;
-              // Load second immediate field.
-              u32 cmp_imm_len = 1 << (len_field - 1);
-              u32 cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
-              // cmp_imm is size in bytes of data to compare.
-              // pc is offset of program bytes to compare.
-              // imm is jump target offset.
-              // R0 is offset of packet bytes to compare.
-              if (cmp_imm > 0xFFFF) return PASS_PACKET;
-              bool do_jump = !reg_num;
-              // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
-              if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
-              ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
-              const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
-              ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
-              ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-              do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
-              // skip past comparison bytes
-              ctx->pc += cmp_imm;
-              if (do_jump) ctx->pc += imm;
-              break;
+            // with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op
+            if (len_field == 0) break;
+            // Load second immediate field.
+            u32 cmp_imm_len = 1 << (len_field - 1);
+            u32 cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+            // cmp_imm is size in bytes of data to compare.
+            // pc is offset of program bytes to compare.
+            // imm is jump target offset.
+            // R0 is offset of packet bytes to compare.
+            if (cmp_imm > 0xFFFF) return PASS_PACKET;
+            bool do_jump = !reg_num;
+            // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
+            if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
+            ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+            const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+            ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+            ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+            do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+            // skip past comparison bytes
+            ctx->pc += cmp_imm;
+            if (do_jump) ctx->pc += imm;
+            break;
           }
           // There is a difference in APFv4 and APFv6 arithmetic behaviour!
           // APFv4:  R[0] op= Rbit ? R[1] : imm;  (and it thus doesn't make sense to have R=1 && len_field>0)
@@ -289,259 +289,259 @@
           case AND_OPCODE: ARITH_REG &= (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
           case OR_OPCODE:  ARITH_REG |= arith_imm; break;
           case DIV_OPCODE: {  // see above comment!
-              const u32 div_operand = arith_imm;
-              ASSERT_RETURN(div_operand);
-              ARITH_REG /= div_operand;
-              break;
+            const u32 div_operand = arith_imm;
+            ASSERT_RETURN(div_operand);
+            ARITH_REG /= div_operand;
+            break;
           }
           case SH_OPCODE: {  // see above comment!
-              if (arith_signed_imm >= 0)
-                  ARITH_REG <<= arith_signed_imm;
-              else
-                  ARITH_REG >>= -arith_signed_imm;
-              break;
+            if (arith_signed_imm >= 0)
+                ARITH_REG <<= arith_signed_imm;
+            else
+                ARITH_REG >>= -arith_signed_imm;
+            break;
           }
           case LI_OPCODE:
-              REG = (u32)signed_imm;
-              break;
+            REG = (u32)signed_imm;
+            break;
           case PKTDATACOPY_OPCODE:
-              pktcopy_src_offset = imm;
-              imm = PKTDATACOPYIMM_EXT_OPCODE;
-              FALLTHROUGH;
+            pktcopy_src_offset = imm;
+            imm = PKTDATACOPYIMM_EXT_OPCODE;
+            FALLTHROUGH;
           case EXT_OPCODE:
-              if (// imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0
-                  imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
+            if (// imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0
+                imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
                 REG = ctx->mem.slot[imm - LDM_EXT_OPCODE];
-              } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
+            } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
                 ctx->mem.slot[imm - STM_EXT_OPCODE] = REG;
-              } else switch (imm) {
-                  case NOT_EXT_OPCODE: REG = ~REG;      break;
-                  case NEG_EXT_OPCODE: REG = -REG;      break;
-                  case MOV_EXT_OPCODE: REG = OTHER_REG; break;
-                  case SWAP_EXT_OPCODE: {
-                    u32 tmp = REG;
-                    REG = OTHER_REG;
-                    OTHER_REG = tmp;
-                    break;
-                  }
-                  case ALLOCATE_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf == NULL);
-                    if (reg_num == 0) {
-                        ctx->tx_buf_len = REG;
-                    } else {
-                        ctx->tx_buf_len = decode_be16(ctx); // 2nd imm, at worst 6 B past prog_len
-                    }
-                    // checksumming functions requires minimum 266 byte buffer for correctness
-                    if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
-                    ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
-                    if (!ctx->tx_buf) {  // allocate failure
-                        ctx->tx_buf_len = 0;
-                        counter[-3]++;
-                        return PASS_PACKET;
-                    }
-                    memset(ctx->tx_buf, 0, ctx->tx_buf_len);
-                    ctx->mem.named.tx_buf_offset = 0;
-                    break;
-                  case TRANSMIT_EXT_OPCODE:
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 pkt_len = ctx->mem.named.tx_buf_offset;
-                    // If pkt_len > allocate_buffer_len, it means sth. wrong
-                    // happened and the tx_buf should be deallocated.
-                    if (pkt_len > ctx->tx_buf_len) {
-                        do_discard_buffer(ctx);
-                        return PASS_PACKET;
-                    }
-                    // tx_buf_len cannot be large because we'd run out of RAM,
-                    // so the above unsigned comparison effectively guarantees casting pkt_len
-                    // to a signed value does not result in it going negative.
-                    u8 ip_ofs = DECODE_U8();              // 2nd imm, at worst 5 B past prog_len
-                    u8 csum_ofs = DECODE_U8();            // 3rd imm, at worst 6 B past prog_len
-                    u8 csum_start = 0;
-                    u16 partial_csum = 0;
-                    if (csum_ofs < 255) {
-                        csum_start = DECODE_U8();         // 4th imm, at worst 7 B past prog_len
-                        partial_csum = decode_be16(ctx);  // 5th imm, at worst 9 B past prog_len
-                    }
-                    int dscp = csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
-                                                    partial_csum, csum_start, csum_ofs,
-                                                    (bool)reg_num);
-                    int ret = do_transmit_buffer(ctx, pkt_len, dscp);
-                    if (ret) { counter[-4]++; return PASS_PACKET; } // transmit failure
-                    break;
-                  case EPKTDATACOPYIMM_EXT_OPCODE:  // 41
-                  case EPKTDATACOPYR1_EXT_OPCODE:   // 42
-                    pktcopy_src_offset = ctx->R[0];
-                    FALLTHROUGH;
-                  case PKTDATACOPYIMM_EXT_OPCODE: { // 65536
-                    u32 copy_len = ctx->R[1];
-                    if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
-                        copy_len = DECODE_U8();  // 2nd imm, at worst 8 bytes past prog_len
-                    }
-                    ASSERT_RETURN(ctx->tx_buf);
-                    u32 dst_offs = ctx->mem.named.tx_buf_offset;
-                    ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
-                    if (reg_num == 0) {  // copy from packet
-                        ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
-                        const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
-                        ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
-                        ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
-                    } else {  // copy from data
-                        ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
-                        memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
-                    }
-                    dst_offs += copy_len;
-                    ctx->mem.named.tx_buf_offset = dst_offs;
-                    break;
-                  }
-                  case JDNSQMATCH_EXT_OPCODE:       // 43
-                  case JDNSAMATCH_EXT_OPCODE:       // 44
-                  case JDNSQMATCHSAFE_EXT_OPCODE:   // 45
-                  case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
-                    const u32 imm_len = 1 << (len_field - 1); // EXT_OPCODE, thus len_field > 0
-                    u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
-                    int qtype = -1;
-                    if (imm & 1) { // JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes
-                        qtype = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
-                    }
-                    u32 udp_payload_offset = ctx->R[0];
-                    match_result_type match_rst = match_names(ctx->program + ctx->pc,
-                                                              ctx->program + ctx->program_len,
-                                                              ctx->packet + udp_payload_offset,
-                                                              ctx->packet_len - udp_payload_offset,
-                                                              qtype);
-                    if (match_rst == error_program) return PASS_PACKET;
-                    if (match_rst == error_packet) {
-                        counter[-5]++; // increment error dns packet counter
-                        return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
-                    }
-                    while (ctx->pc + 1 < ctx->program_len &&
-                           (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
-                        ctx->pc++;
-                    }
-                    ctx->pc += 2;  // skip the final double 0 needle end
-                    // relies on reg_num in {0,1} and match_rst being {false=0, true=1}
-                    if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
-                    break;
-                  }
-                  case EWRITE1_EXT_OPCODE:
-                  case EWRITE2_EXT_OPCODE:
-                  case EWRITE4_EXT_OPCODE: {
-                    ASSERT_RETURN(ctx->tx_buf);
-                    const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
-                    ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-                    u32 i;
-                    for (i = 0; i < write_len; ++i) {
-                        ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                            (u8)(REG >> (write_len - 1 - i) * 8);
-                    }
-                    break;
-                  }
-                  case JONEOF_EXT_OPCODE: {
-                    const u32 imm_len = 1 << (len_field - 1); // ext opcode len_field guaranteed > 0
-                    u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
-                    u8 imm3 = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
-                    bool jmp = imm3 & 1;  // =0 jmp on match, =1 jmp on no match
-                    u8 len = ((imm3 >> 1) & 3) + 1;  // size [1..4] in bytes of an element
-                    u8 cnt = (imm3 >> 3) + 1;  // number [1..32] of elements in set
-                    if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
-                    while (cnt--) {
-                        u32 v = 0;
-                        int i;
-                        for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
-                        if (REG == v) jmp ^= true;
-                    }
-                    if (jmp) ctx->pc += jump_offs;
-                    return PASS_PACKET;
-                  }
-                  default:  // Unknown extended opcode
-                    return PASS_PACKET;  // Bail out
+            } else switch (imm) {
+              case NOT_EXT_OPCODE: REG = ~REG;      break;
+              case NEG_EXT_OPCODE: REG = -REG;      break;
+              case MOV_EXT_OPCODE: REG = OTHER_REG; break;
+              case SWAP_EXT_OPCODE: {
+                u32 tmp = REG;
+                REG = OTHER_REG;
+                OTHER_REG = tmp;
+                break;
               }
-              break;
+              case ALLOCATE_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf == NULL);
+                if (reg_num == 0) {
+                    ctx->tx_buf_len = REG;
+                } else {
+                    ctx->tx_buf_len = decode_be16(ctx); // 2nd imm, at worst 6 B past prog_len
+                }
+                // checksumming functions requires minimum 266 byte buffer for correctness
+                if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
+                ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
+                if (!ctx->tx_buf) {  // allocate failure
+                    ctx->tx_buf_len = 0;
+                    counter[-3]++;
+                    return PASS_PACKET;
+                }
+                memset(ctx->tx_buf, 0, ctx->tx_buf_len);
+                ctx->mem.named.tx_buf_offset = 0;
+                break;
+              case TRANSMIT_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 pkt_len = ctx->mem.named.tx_buf_offset;
+                // If pkt_len > allocate_buffer_len, it means sth. wrong
+                // happened and the tx_buf should be deallocated.
+                if (pkt_len > ctx->tx_buf_len) {
+                    do_discard_buffer(ctx);
+                    return PASS_PACKET;
+                }
+                // tx_buf_len cannot be large because we'd run out of RAM,
+                // so the above unsigned comparison effectively guarantees casting pkt_len
+                // to a signed value does not result in it going negative.
+                u8 ip_ofs = DECODE_U8();              // 2nd imm, at worst 5 B past prog_len
+                u8 csum_ofs = DECODE_U8();            // 3rd imm, at worst 6 B past prog_len
+                u8 csum_start = 0;
+                u16 partial_csum = 0;
+                if (csum_ofs < 255) {
+                    csum_start = DECODE_U8();         // 4th imm, at worst 7 B past prog_len
+                    partial_csum = decode_be16(ctx);  // 5th imm, at worst 9 B past prog_len
+                }
+                int dscp = csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
+                                                partial_csum, csum_start, csum_ofs,
+                                                (bool)reg_num);
+                int ret = do_transmit_buffer(ctx, pkt_len, dscp);
+                if (ret) { counter[-4]++; return PASS_PACKET; } // transmit failure
+                break;
+              case EPKTDATACOPYIMM_EXT_OPCODE:  // 41
+              case EPKTDATACOPYR1_EXT_OPCODE:   // 42
+                pktcopy_src_offset = ctx->R[0];
+                FALLTHROUGH;
+              case PKTDATACOPYIMM_EXT_OPCODE: { // 65536
+                u32 copy_len = ctx->R[1];
+                if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
+                    copy_len = DECODE_U8();  // 2nd imm, at worst 8 bytes past prog_len
+                }
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 dst_offs = ctx->mem.named.tx_buf_offset;
+                ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
+                if (reg_num == 0) {  // copy from packet
+                    ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
+                    const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
+                    ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
+                    ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
+                } else {  // copy from data
+                    ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
+                }
+                dst_offs += copy_len;
+                ctx->mem.named.tx_buf_offset = dst_offs;
+                break;
+              }
+              case JDNSQMATCH_EXT_OPCODE:       // 43
+              case JDNSAMATCH_EXT_OPCODE:       // 44
+              case JDNSQMATCHSAFE_EXT_OPCODE:   // 45
+              case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
+                const u32 imm_len = 1 << (len_field - 1); // EXT_OPCODE, thus len_field > 0
+                u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
+                int qtype = -1;
+                if (imm & 1) { // JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes
+                    qtype = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
+                }
+                u32 udp_payload_offset = ctx->R[0];
+                match_result_type match_rst = match_names(ctx->program + ctx->pc,
+                                                          ctx->program + ctx->program_len,
+                                                          ctx->packet + udp_payload_offset,
+                                                          ctx->packet_len - udp_payload_offset,
+                                                          qtype);
+                if (match_rst == error_program) return PASS_PACKET;
+                if (match_rst == error_packet) {
+                    counter[-5]++; // increment error dns packet counter
+                    return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
+                }
+                while (ctx->pc + 1 < ctx->program_len &&
+                       (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
+                    ctx->pc++;
+                }
+                ctx->pc += 2;  // skip the final double 0 needle end
+                // relies on reg_num in {0,1} and match_rst being {false=0, true=1}
+                if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
+                break;
+              }
+              case EWRITE1_EXT_OPCODE:
+              case EWRITE2_EXT_OPCODE:
+              case EWRITE4_EXT_OPCODE: {
+                ASSERT_RETURN(ctx->tx_buf);
+                const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
+                ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+                u32 i;
+                for (i = 0; i < write_len; ++i) {
+                    ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                        (u8)(REG >> (write_len - 1 - i) * 8);
+                }
+                break;
+              }
+              case JONEOF_EXT_OPCODE: {
+                const u32 imm_len = 1 << (len_field - 1); // ext opcode len_field guaranteed > 0
+                u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
+                u8 imm3 = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
+                bool jmp = imm3 & 1;  // =0 jmp on match, =1 jmp on no match
+                u8 len = ((imm3 >> 1) & 3) + 1;  // size [1..4] in bytes of an element
+                u8 cnt = (imm3 >> 3) + 1;  // number [1..32] of elements in set
+                if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
+                while (cnt--) {
+                    u32 v = 0;
+                    int i;
+                    for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
+                    if (REG == v) jmp ^= true;
+                }
+                if (jmp) ctx->pc += jump_offs;
+                return PASS_PACKET;
+              }
+              default:  // Unknown extended opcode
+                return PASS_PACKET;  // Bail out
+            }
+            break;
           case LDDW_OPCODE:
           case STDW_OPCODE:
-              if (ctx->v6) {
-                  if (!imm) return PASS_PACKET;
-                  if (imm > 0xFFFF) return PASS_PACKET;
-                  if (imm * 4 > ctx->ram_len) return PASS_PACKET;
-                  if (opcode == LDDW_OPCODE) {
-                     REG = counter[-(s32)imm];
-                  } else {
-                     counter[-(s32)imm] = REG;
-                  }
-              } else {
-                  u32 offs = OTHER_REG + (u32)signed_imm;
-                  // Negative offsets wrap around the end of the address space.
-                  // This allows us to efficiently access the end of the
-                  // address space with one-byte immediates without using %=.
-                  if (offs & 0x80000000) offs += ctx->ram_len;  // unsigned overflow intended
-                  u32 size = 4;
-                  ASSERT_IN_DATA_BOUNDS(offs, size);
-                  if (opcode == LDDW_OPCODE) {
-                      u32 val = 0;
-                      while (size--) val = (val << 8) | ctx->program[offs++];
-                      REG = val;
-                  } else {
-                      u32 val = REG;
-                      while (size--) {
-                          ctx->program[offs++] = (val >> 24);
-                          val <<= 8;
-                      }
-                  }
-              }
-              break;
+            if (ctx->v6) {
+                if (!imm) return PASS_PACKET;
+                if (imm > 0xFFFF) return PASS_PACKET;
+                if (imm * 4 > ctx->ram_len) return PASS_PACKET;
+                if (opcode == LDDW_OPCODE) {
+                    REG = counter[-(s32)imm];
+                } else {
+                    counter[-(s32)imm] = REG;
+                }
+            } else {
+                u32 offs = OTHER_REG + (u32)signed_imm;
+                // Negative offsets wrap around the end of the address space.
+                // This allows us to efficiently access the end of the
+                // address space with one-byte immediates without using %=.
+                if (offs & 0x80000000) offs += ctx->ram_len;  // unsigned overflow intended
+                u32 size = 4;
+                ASSERT_IN_DATA_BOUNDS(offs, size);
+                if (opcode == LDDW_OPCODE) {
+                    u32 val = 0;
+                    while (size--) val = (val << 8) | ctx->program[offs++];
+                    REG = val;
+                } else {
+                    u32 val = REG;
+                    while (size--) {
+                        ctx->program[offs++] = (val >> 24);
+                        val <<= 8;
+                    }
+                }
+            }
+            break;
           case WRITE_OPCODE: {
-              ASSERT_RETURN(ctx->tx_buf);
-              ASSERT_RETURN(len_field);
-              const u32 write_len = 1 << (len_field - 1);
-              ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
-              u32 i;
-              for (i = 0; i < write_len; ++i) {
-                  ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
-                      (u8)(imm >> (write_len - 1 - i) * 8);
-              }
-              break;
+            ASSERT_RETURN(ctx->tx_buf);
+            ASSERT_RETURN(len_field);
+            const u32 write_len = 1 << (len_field - 1);
+            ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+            u32 i;
+            for (i = 0; i < write_len; ++i) {
+                ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                    (u8)(imm >> (write_len - 1 - i) * 8);
+            }
+            break;
           }
           default:  // Unknown opcode
-              return PASS_PACKET;  // Bail out
-      }
-  } while (instructions_remaining--);
-  return PASS_PACKET;
+            return PASS_PACKET;  // Bail out
+        }
+    } while (instructions_remaining--);
+    return PASS_PACKET;
 }
 
 int apf_run(void* ctx, u32* const program, const u32 program_len,
             const u32 ram_len, const u8* const packet,
             const u32 packet_len, const u32 filter_age_16384ths) {
-  // Due to direct 32-bit read/write access to counters at end of ram
-  // APFv6 interpreter requires program & ram_len to be 4 byte aligned.
-  if (3 & (uintptr_t)program) return PASS_PACKET;
-  if (3 & ram_len) return PASS_PACKET;
+    // Due to direct 32-bit read/write access to counters at end of ram
+    // APFv6 interpreter requires program & ram_len to be 4 byte aligned.
+    if (3 & (uintptr_t)program) return PASS_PACKET;
+    if (3 & ram_len) return PASS_PACKET;
 
-  // We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB
-  // Similarly LDDW/STDW have special meaning for negative ram offsets.
-  // We also don't want garbage like program_len == 0xFFFFFFFF
-  if ((program_len | ram_len) >> 31) return PASS_PACKET;
+    // We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB
+    // Similarly LDDW/STDW have special meaning for negative ram offsets.
+    // We also don't want garbage like program_len == 0xFFFFFFFF
+    if ((program_len | ram_len) >> 31) return PASS_PACKET;
 
-  // APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid
-  // This cannot wrap due to previous check.
-  if (program_len + 20 > ram_len) return PASS_PACKET;
+    // APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid
+    // This cannot wrap due to previous check.
+    if (program_len + 20 > ram_len) return PASS_PACKET;
 
-  apf_context apf_ctx = {};
-  apf_ctx.caller_ctx = ctx;
-  apf_ctx.program = (u8*)program;
-  apf_ctx.program_len = program_len;
-  apf_ctx.ram_len = ram_len;
-  apf_ctx.packet = packet;
-  apf_ctx.packet_len = packet_len;
-  // Fill in pre-filled memory slot values.
-  apf_ctx.mem.named.program_size = program_len;
-  apf_ctx.mem.named.ram_len = ram_len;
-  apf_ctx.mem.named.packet_size = packet_len;
-  apf_ctx.mem.named.apf_version = apf_version();
-  apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
-  apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
+    apf_context apf_ctx = {};
+    apf_ctx.caller_ctx = ctx;
+    apf_ctx.program = (u8*)program;
+    apf_ctx.program_len = program_len;
+    apf_ctx.ram_len = ram_len;
+    apf_ctx.packet = packet;
+    apf_ctx.packet_len = packet_len;
+    // Fill in pre-filled memory slot values.
+    apf_ctx.mem.named.program_size = program_len;
+    apf_ctx.mem.named.ram_len = ram_len;
+    apf_ctx.mem.named.packet_size = packet_len;
+    apf_ctx.mem.named.apf_version = apf_version();
+    apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
+    apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
 
-  int ret = do_apf_run(&apf_ctx);
-  if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
-  return ret;
+    int ret = do_apf_run(&apf_ctx);
+    if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
+    return ret;
 }