Snap for 11698527 from a6a7a71ee9527ee0190205f82ac3b2745fc54e6b to mainline-appsearch-release

Change-Id: I1f06099f617aeae6f9436ecced52908f39b04b6f
diff --git a/Android.bp b/Android.bp
index a1abee1..c6871f7 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,6 +1,7 @@
 // Copyright 2016 The Android Open Source Project
 
 package {
+    default_team: "trendy_team_fwk_core_networking",
     default_applicable_licenses: ["hardware_google_apf_license"],
 }
 
@@ -69,7 +70,7 @@
         "apf_interpreter.c",
         "disassembler.c",
         "v5/apf_interpreter.c",
-        "v5/test_buf_allocator.c"
+        "v5/test_buf_allocator.c",
     ],
     cflags: [
         "-DAPF_TRACE_HOOK=apf_trace_hook",
@@ -103,3 +104,27 @@
         unit_test: true,
     },
 }
+
+cc_test_host {
+    name: "apf_checksum_test",
+    srcs: [
+        "apf_checksum_test.cc",
+    ],
+    cflags: [
+        "-Wall",
+        "-Wno-unused-function",
+    ],
+    stl: "c++_static",
+}
+
+cc_test_host {
+    name: "apf_dns_test",
+    srcs: [
+        "apf_dns_test.cc",
+    ],
+    cflags: [
+        "-Wall",
+        "-Wno-unused-function",
+    ],
+    stl: "c++_static",
+}
diff --git a/apf2java/apf2java.in b/apf2java/apf2java.in
new file mode 100644
index 0000000..0bc5910
--- /dev/null
+++ b/apf2java/apf2java.in
@@ -0,0 +1 @@
+6bf8b03a01b8120c6b8894022b06006b847c022488a27c021f88a47c021a88b87c021588cd7c021088e17c020b88e384004608066a0e6dff68a401f6000600010800060412147a1f016dff648401e900021a1c6b807c01e00000686bc8a401cc0006ffffffffffff6a266bb0a401c50004c0a801ec6bec7401ba120c84007808000a17821f1112149c00181fffab0d2a108211446a3239a205067e9046bc70086be874018f0a1e52f06ba07c018a00e06ba81a1e7e0000017dffffffff6ba47e00000172c0a801ff0a1782140612149c000d1fffab0d2a108206076dff6c7401596bdc68a401450006ffffffffffff6bac7401466be474013b7c001386dd686bc4a401280006ffffffffffff6bbc7401290a147c011c00007a0e3a6b8c0a267c011600ff6bd474010b0a366b9c7c010800858218886a26a2040fff02000000000000000000000000006b9872edaa0e82e096aa0f8c00d9048e68a2c60f7e9046bc700828c68e23672c86dd606a12a2ad1400603afffe800000000000002ac68efffe23672c6a36a2a60286006a3aa29f0240c0123c940098091e8c00931b586a3ea2721c0000000000000000010128c68e23672c05010000000005dc030440c01a5a9600000067555555558e0000005effffffff1a5e9600000053555555558e0000004affffffff6a62a22d18000000002401fa000480f0000000000000000000190300001a7a7a2800920a78940020091e8c001b1b586a7ea204122401fa000480f000000000000000000107016bb872086bd8b03a01b87206b03a01b87201
diff --git a/apf2java/apf2java.out b/apf2java/apf2java.out
new file mode 100644
index 0000000..04cb137
--- /dev/null
+++ b/apf2java/apf2java.out
@@ -0,0 +1,170 @@
+    @Test
+    public void testFullApfV4ProgramGeneration() throws IllegalInstructionException {
+        ApfV4Generator gen = new ApfV4Generator(APF_VERSION_4);
+        gen.addLoadImmediate(R1, -8);
+        gen.addLoadData(R0, 0);
+        gen.addAdd(1);
+        gen.addStoreData(R0, 0);
+        gen.addLoad16(R0, 12);
+        gen.addLoadImmediate(R1, -120);
+        gen.addJumpIfR0LessThan(0x600, "LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R1, -124);
+        gen.addJumpIfR0Equals(0x88a2, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0Equals(0x88a4, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0Equals(0x88b8, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0Equals(0x88cd, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0Equals(0x88e1, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0Equals(0x88e3, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0NotEquals(0x806, "LABEL_122");
+        gen.addLoadImmediate(R0, 14);
+        gen.addLoadImmediate(R1, -152);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("000108000604"), "LABEL_INC_AND_DROP");
+        gen.addLoad16(R0, 20);
+        gen.addJumpIfR0Equals(0x1, "LABEL_104");
+        gen.addLoadImmediate(R1, -156);
+        gen.addJumpIfR0NotEquals(0x2, "LABEL_INC_AND_DROP");
+        gen.addLoad32(R0, 28);
+        gen.addLoadImmediate(R1, -128);
+        gen.addJumpIfR0Equals(0x0, "LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R0, 0);
+        gen.addLoadImmediate(R1, -56);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("ffffffffffff"), "LABEL_INC_AND_PASS");
+
+        gen.defineLabel("LABEL_104");
+        gen.addLoadImmediate(R0, 38);
+        gen.addLoadImmediate(R1, -80);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("c0a801ec"), "LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R1, -20);
+        gen.addJump("LABEL_INC_AND_PASS");
+
+        gen.defineLabel("LABEL_122");
+        gen.addLoad16(R0, 12);
+        gen.addJumpIfR0NotEquals(0x800, "LABEL_249");
+        gen.addLoad8(R0, 23);
+        gen.addJumpIfR0NotEquals(0x11, "LABEL_165");
+        gen.addLoad16(R0, 20);
+        gen.addJumpIfR0AnyBitsSet(0x1fff, "LABEL_165");
+        gen.addLoadFromMemory(R1, 13);
+        gen.addLoad16Indexed(R0, 16);
+        gen.addJumpIfR0NotEquals(0x44, "LABEL_165");
+        gen.addLoadImmediate(R0, 50);
+        gen.addAddR1();
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("7e9046bc7008"), "LABEL_165");
+        gen.addLoadImmediate(R1, -24);
+        gen.addJump("LABEL_INC_AND_PASS");
+
+        gen.defineLabel("LABEL_165");
+        gen.addLoad8(R0, 30);
+        gen.addAnd(240);
+        gen.addLoadImmediate(R1, -96);
+        gen.addJumpIfR0Equals(0xe0, "LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R1, -88);
+        gen.addLoad32(R0, 30);
+        gen.addJumpIfR0Equals(0xffffffff, "LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R1, -92);
+        gen.addJumpIfR0Equals(0xc0a801ff, "LABEL_INC_AND_DROP");
+        gen.addLoad8(R0, 23);
+        gen.addJumpIfR0NotEquals(0x6, "LABEL_225");
+        gen.addLoad16(R0, 20);
+        gen.addJumpIfR0AnyBitsSet(0x1fff, "LABEL_225");
+        gen.addLoadFromMemory(R1, 13);
+        gen.addLoad16Indexed(R0, 16);
+        gen.addJumpIfR0NotEquals(0x7, "LABEL_225");
+        gen.addLoadImmediate(R1, -148);
+        gen.addJump("LABEL_INC_AND_DROP");
+
+        gen.defineLabel("LABEL_225");
+        gen.addLoadImmediate(R1, -36);
+        gen.addLoadImmediate(R0, 0);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("ffffffffffff"), "LABEL_INC_AND_PASS");
+        gen.addLoadImmediate(R1, -84);
+        gen.addJump("LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R1, -28);
+        gen.addJump("LABEL_INC_AND_PASS");
+
+        gen.defineLabel("LABEL_249");
+        gen.addJumpIfR0Equals(0x86dd, "LABEL_273");
+        gen.addLoadImmediate(R0, 0);
+        gen.addLoadImmediate(R1, -60);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("ffffffffffff"), "LABEL_INC_AND_PASS");
+        gen.addLoadImmediate(R1, -68);
+        gen.addJump("LABEL_INC_AND_DROP");
+
+        gen.defineLabel("LABEL_273");
+        gen.addLoad8(R0, 20);
+        gen.addJumpIfR0Equals(0x0, "LABEL_INC_AND_PASS");
+        gen.addJumpIfR0Equals(0x3a, "LABEL_297");
+        gen.addLoadImmediate(R1, -116);
+        gen.addLoad8(R0, 38);
+        gen.addJumpIfR0Equals(0xff, "LABEL_INC_AND_DROP");
+        gen.addLoadImmediate(R1, -44);
+        gen.addJump("LABEL_INC_AND_PASS");
+
+        gen.defineLabel("LABEL_297");
+        gen.addLoad8(R0, 54);
+        gen.addLoadImmediate(R1, -100);
+        gen.addJumpIfR0Equals(0x85, "LABEL_INC_AND_DROP");
+        gen.addJumpIfR0NotEquals(0x88, "LABEL_333");
+        gen.addLoadImmediate(R0, 38);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("ff0200000000000000000000000000"), "LABEL_333");
+        gen.addLoadImmediate(R1, -104);
+        gen.addJump("LABEL_INC_AND_DROP");
+
+        gen.defineLabel("LABEL_333");
+        gen.addLoadFromMemory(R0, 14);
+        gen.addJumpIfR0NotEquals(0x96, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadFromMemory(R0, 15);
+        gen.addJumpIfR0GreaterThan(0x48e, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R0, 0);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("7e9046bc700828c68e23672c86dd60"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R0, 18);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("00603afffe800000000000002ac68efffe23672c"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R0, 54);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("8600"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R0, 58);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("40c0"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoad16(R0, 60);
+        gen.addJumpIfR0LessThan(0x91e, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addJumpIfR0GreaterThan(0x1b58, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R0, 62);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("0000000000000000010128c68e23672c05010000000005dc030440c0"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoad32(R0, 90);
+        gen.addJumpIfR0LessThan(0x55555555, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addJumpIfR0GreaterThan(0xffffffffL, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoad32(R0, 94);
+        gen.addJumpIfR0LessThan(0x55555555, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addJumpIfR0GreaterThan(0xffffffffL, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R0, 98);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("000000002401fa000480f000000000000000000019030000"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoad32(R0, 122);
+        gen.addJumpIfR0Equals(0x0, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addJumpIfR0LessThan(0x78, "LABEL_535");
+        gen.addJumpIfR0LessThan(0x91e, "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addJumpIfR0GreaterThan(0x1b58, "LABEL_UNSOLICITED_MULTICAST_NA");
+
+        gen.defineLabel("LABEL_535");
+        gen.addLoadImmediate(R0, 126);
+        gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("2401fa000480f00000000000000000010701"), "LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R1, -72);
+        gen.addJump("LABEL_INC_AND_DROP");
+
+        gen.defineLabel("LABEL_UNSOLICITED_MULTICAST_NA");
+        gen.addLoadImmediate(R1, -40);
+
+        gen.defineLabel("LABEL_INC_AND_PASS");
+        gen.addLoadData(R0, 0);
+        gen.addAdd(1);
+        gen.addStoreData(R0, 0);
+        gen.addJump(PASS_LABEL);
+
+        gen.defineLabel("LABEL_INC_AND_DROP");
+        gen.addLoadData(R0, 0);
+        gen.addAdd(1);
+        gen.addStoreData(R0, 0);
+        gen.addJump(DROP_LABEL);
+
+        byte[] program = gen.generate();
+        final String programString = toHexString(program).toLowerCase();
+        final String referenceProgramHexString = "6bf8b03a01b8120c6b8894022b06006b847c022488a27c021f88a47c021a88b87c021588cd7c021088e17c020b88e384004608066a0e6dff68a401f6000600010800060412147a1f016dff648401e900021a1c6b807c01e00000686bc8a401cc0006ffffffffffff6a266bb0a401c50004c0a801ec6bec7401ba120c84007808000a17821f1112149c00181fffab0d2a108211446a3239a205067e9046bc70086be874018f0a1e52f06ba07c018a00e06ba81a1e7e0000017dffffffff6ba47e00000172c0a801ff0a1782140612149c000d1fffab0d2a108206076dff6c7401596bdc68a401450006ffffffffffff6bac7401466be474013b7c001386dd686bc4a401280006ffffffffffff6bbc7401290a147c011c00007a0e3a6b8c0a267c011600ff6bd474010b0a366b9c7c010800858218886a26a2040fff02000000000000000000000000006b9872edaa0e82e096aa0f8c00d9048e68a2c60f7e9046bc700828c68e23672c86dd606a12a2ad1400603afffe800000000000002ac68efffe23672c6a36a2a60286006a3aa29f0240c0123c940098091e8c00931b586a3ea2721c0000000000000000010128c68e23672c05010000000005dc030440c01a5a9600000067555555558e0000005effffffff1a5e9600000053555555558e0000004affffffff6a62a22d18000000002401fa000480f0000000000000000000190300001a7a7a2800920a78940020091e8c001b1b586a7ea204122401fa000480f000000000000000000107016bb872086bd8b03a01b87206b03a01b87201";
+        assertEquals(referenceProgramHexString, programString);
+    }
diff --git a/apf2java/apf2java.sh b/apf2java/apf2java.sh
new file mode 100755
index 0000000..b46fea6
--- /dev/null
+++ b/apf2java/apf2java.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+set -e
+set -u
+
+# depends on 'm apf_disassembler'
+"${ANDROID_HOST_OUT}/bin/apf_disassembler" < apf2java.in > apf2java.txt
+
+sed -r \
+'s@: li +r([01]), (-?[0-9]+)@: gen.addLoadImmediate(R\1, \2);@;'\
+'s@: and +r0, (-?[0-9]+)@: gen.addAnd(\1);@;'\
+'s@: add +r0, (-?[0-9]+)@: gen.addAdd(\1);@;'\
+'s@: add +r0, r1@: gen.addAddR1();@;'\
+'s@: swap +@: gen.addSwap();@;'\
+'s@: neg +r([01])@: gen.addNeg(R\1);@;'\
+'s@: jmp +(PASS|DROP)@: gen.addJump(\1_LABEL);@;'\
+'s@: jbsne +r0, 0x([0-9a-f]+), ([0-9]+), ([0-9a-f]+)@: gen.addJumpIfBytesAtR0NotEqual(hexStringToByteArray("\3"), LABEL_\2);@;'\
+'s@: jeq +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1Equals(0x\2, LABEL_\3);@;'\
+'s@: jne +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1NotEquals(0x\2, LABEL_\3);@;'\
+'s@: jlt +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1LessThan(0x\2, LABEL_\3);@;'\
+'s@: jgt +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1GreaterThan(0x\2, LABEL_\3);@;'\
+'s@: jset +r([01]), 0x([0-9a-f]+), ([0-9]+)@: gen.addJumpIfR\1AnyBitsSet(0x\2, LABEL_\3);@;'\
+'s@: jmp +([0-9]+)@: gen.addJump(LABEL_\1);@;'\
+'s@: lddw +r0, \[r1\]@: gen.addLoadData(R0, 0);@;'\
+'s@: stdw +r0, \[r1\]@: gen.addStoreData(R0, 0);@;'\
+'s@: ldb +r([01]), \[([0-9]+)\]@: gen.addLoad8(R\1, \2);@;'\
+'s@: ldh +r([01]), \[([0-9]+)\]@: gen.addLoad16(R\1, \2);@;'\
+'s@: ldw +r([01]), \[([0-9]+)\]@: gen.addLoad32(R\1, \2);@;'\
+'s@: ldbx +r([01]), \[r1\+([0-9]+)\]@: gen.addLoad8Indexed(R\1, \2);@;'\
+'s@: ldhx +r([01]), \[r1\+([0-9]+)\]@: gen.addLoad16Indexed(R\1, \2);@;'\
+'s@: ldwx +r([01]), \[r1\+([0-9]+)\]@: gen.addLoad32Indexed(R\1, \2);@;'\
+'s@: ldm +r([01]), m\[([0-9]+)\]@: gen.addLoadFromMemory(R\1, \2);@;'\
+'/addJumpIfR0(Greater|Less)Than/s@(0x[8-f][0-9a-f]{7})@\1L@;'\
+< apf2java.txt > tmp
+declare -ar LABELS=($(sed -rn 's@.*LABEL_([0-9]+).*@\1@p' < tmp | sort -u))
+for L in "${LABELS[@]}"; do
+  #echo "[LABEL_${L}]"
+  sed -r "s@^( +${L}:)@\ngen.defineLabel(LABEL_${L});\n\1@" < tmp > tmp2
+  cat tmp2 > tmp
+done
+
+sed -r \
+'s@^ +[0-9]+: @@;'\
+'s@(LABEL_[0-9]+)@"\1"@;'\
+"s@\"LABEL_${LABELS[-1]}\"@\"LABEL_INC_AND_DROP\"@;"\
+"s@\"LABEL_${LABELS[-2]}\"@\"LABEL_INC_AND_PASS\"@;"\
+"s@\"LABEL_${LABELS[-3]}\"@\"LABEL_UNSOLICITED_MULTICAST_NA\"@;"\
+< tmp > tmp2
+# The above label renames are based on what our current generator emits as prologue.
+
+if [[ "$(egrep -v '^$|gen' < tmp2 | wc -l)" != 0 ]]; then
+  echo 'Failure to translate:'
+  egrep -v '^$|gen' < tmp2
+  exit 1
+fi
+
+{
+  echo '    @Test'
+  echo '    public void testFullApfV4ProgramGeneration() throws IllegalInstructionException {'
+  echo '        ApfV4Generator gen = new ApfV4Generator(APF_VERSION_4);'
+  sed -r 's@^(.+)$@        \1@' < tmp2
+  echo
+  echo '        byte[] program = gen.generate();'
+  echo '        final String programString = toHexString(program).toLowerCase();'
+  echo -n '        final String referenceProgramHexString = "'
+  tr -d '\n' < apf2java.in
+  echo '";'
+  echo '        assertEquals(referenceProgramHexString, programString);'
+  echo '    }'
+} > apf2java.out
+
+rm -f tmp tmp2
diff --git a/apf2java/apf2java.txt b/apf2java/apf2java.txt
new file mode 100644
index 0000000..6012332
--- /dev/null
+++ b/apf2java/apf2java.txt
@@ -0,0 +1,137 @@
+       0: li          r1, -8
+       2: lddw        r0, [r1]
+       3: add         r0, 1
+       5: stdw        r0, [r1]
+       6: ldh         r0, [12]
+       8: li          r1, -120
+      10: jlt         r0, 0x600, 570
+      15: li          r1, -124
+      17: jeq         r0, 0x88a2, 570
+      22: jeq         r0, 0x88a4, 570
+      27: jeq         r0, 0x88b8, 570
+      32: jeq         r0, 0x88cd, 570
+      37: jeq         r0, 0x88e1, 570
+      42: jeq         r0, 0x88e3, 570
+      47: jne         r0, 0x806, 122
+      52: li          r0, 14
+      54: li          r1, -152
+      57: jbsne       r0, 0x6, 570, 000108000604
+      68: ldh         r0, [20]
+      70: jeq         r0, 0x1, 104
+      73: li          r1, -156
+      76: jne         r0, 0x2, 570
+      81: ldw         r0, [28]
+      83: li          r1, -128
+      85: jeq         r0, 0x0, 570
+      90: li          r0, 0
+      91: li          r1, -56
+      93: jbsne       r0, 0x6, 564, ffffffffffff
+     104: li          r0, 38
+     106: li          r1, -80
+     108: jbsne       r0, 0x4, 570, c0a801ec
+     117: li          r1, -20
+     119: jmp         564
+     122: ldh         r0, [12]
+     124: jne         r0, 0x800, 249
+     129: ldb         r0, [23]
+     131: jne         r0, 0x11, 165
+     134: ldh         r0, [20]
+     136: jset        r0, 0x1fff, 165
+     141: ldm         r1, m[13]
+     143: ldhx        r0, [r1+16]
+     145: jne         r0, 0x44, 165
+     148: li          r0, 50
+     150: add         r0, r1
+     151: jbsne       r0, 0x6, 165, 7e9046bc7008
+     160: li          r1, -24
+     162: jmp         564
+     165: ldb         r0, [30]
+     167: and         r0, 240
+     169: li          r1, -96
+     171: jeq         r0, 0xe0, 570
+     176: li          r1, -88
+     178: ldw         r0, [30]
+     180: jeq         r0, 0xffffffff, 570
+     189: li          r1, -92
+     191: jeq         r0, 0xc0a801ff, 570
+     200: ldb         r0, [23]
+     202: jne         r0, 0x6, 225
+     205: ldh         r0, [20]
+     207: jset        r0, 0x1fff, 225
+     212: ldm         r1, m[13]
+     214: ldhx        r0, [r1+16]
+     216: jne         r0, 0x7, 225
+     219: li          r1, -148
+     222: jmp         570
+     225: li          r1, -36
+     227: li          r0, 0
+     228: jbsne       r0, 0x6, 564, ffffffffffff
+     239: li          r1, -84
+     241: jmp         570
+     244: li          r1, -28
+     246: jmp         564
+     249: jeq         r0, 0x86dd, 273
+     254: li          r0, 0
+     255: li          r1, -60
+     257: jbsne       r0, 0x6, 564, ffffffffffff
+     268: li          r1, -68
+     270: jmp         570
+     273: ldb         r0, [20]
+     275: jeq         r0, 0x0, 564
+     280: jeq         r0, 0x3a, 297
+     283: li          r1, -116
+     285: ldb         r0, [38]
+     287: jeq         r0, 0xff, 570
+     292: li          r1, -44
+     294: jmp         564
+     297: ldb         r0, [54]
+     299: li          r1, -100
+     301: jeq         r0, 0x85, 570
+     306: jne         r0, 0x88, 333
+     309: li          r0, 38
+     311: jbsne       r0, 0xf, 333, ff0200000000000000000000000000
+     329: li          r1, -104
+     331: jmp         570
+     333: ldm         r0, m[14]
+     335: jne         r0, 0x96, 562
+     338: ldm         r0, m[15]
+     340: jgt         r0, 0x48e, 562
+     345: li          r0, 0
+     346: jbsne       r0, 0xf, 562, 7e9046bc700828c68e23672c86dd60
+     364: li          r0, 18
+     366: jbsne       r0, 0x14, 562, 00603afffe800000000000002ac68efffe23672c
+     389: li          r0, 54
+     391: jbsne       r0, 0x2, 562, 8600
+     396: li          r0, 58
+     398: jbsne       r0, 0x2, 562, 40c0
+     403: ldh         r0, [60]
+     405: jlt         r0, 0x91e, 562
+     410: jgt         r0, 0x1b58, 562
+     415: li          r0, 62
+     417: jbsne       r0, 0x1c, 562, 0000000000000000010128c68e23672c05010000000005dc030440c0
+     448: ldw         r0, [90]
+     450: jlt         r0, 0x55555555, 562
+     459: jgt         r0, 0xffffffff, 562
+     468: ldw         r0, [94]
+     470: jlt         r0, 0x55555555, 562
+     479: jgt         r0, 0xffffffff, 562
+     488: li          r0, 98
+     490: jbsne       r0, 0x18, 562, 000000002401fa000480f000000000000000000019030000
+     517: ldw         r0, [122]
+     519: jeq         r0, 0x0, 562
+     522: jlt         r0, 0x78, 535
+     525: jlt         r0, 0x91e, 562
+     530: jgt         r0, 0x1b58, 562
+     535: li          r0, 126
+     537: jbsne       r0, 0x12, 562, 2401fa000480f00000000000000000010701
+     558: li          r1, -72
+     560: jmp         570
+     562: li          r1, -40
+     564: lddw        r0, [r1]
+     565: add         r0, 1
+     567: stdw        r0, [r1]
+     568: jmp         PASS
+     570: lddw        r0, [r1]
+     571: add         r0, 1
+     573: stdw        r0, [r1]
+     574: jmp         DROP
diff --git a/apf_checksum.h b/apf_checksum.h
new file mode 100644
index 0000000..3d9e2bc
--- /dev/null
+++ b/apf_checksum.h
@@ -0,0 +1,65 @@
+/**
+ * Calculate big endian 16-bit sum of a buffer (max 128kB),
+ * then fold and negate it, producing a 16-bit result in [0..FFFE].
+ */
+FUNC(u16 calc_csum(u32 sum, const u8* const buf, const s32 len)) {
+    s32 i;
+    for (i = 0; i < len; ++i) sum += buf[i] * ((i & 1) ? 1u : 256u);
+
+    sum = (sum & 0xFFFF) + (sum >> 16);  /* max after this is 1FFFE */
+    u16 csum = sum + (sum >> 16);
+    return ~csum;  /* assuming sum > 0 on input, this is in [0..FFFE] */
+}
+
+static u16 fix_udp_csum(u16 csum) {
+    return csum ? csum : 0xFFFF;
+}
+
+/**
+ * Calculate and store packet checksums and return dscp.
+ *
+ * @param pkt - pointer to the very start of the to-be-transmitted packet,
+ *              ie. the start of the ethernet header (if one is present)
+ *     WARNING: at minimum 266 bytes of buffer pointed to by 'pkt' pointer
+ *              *MUST* be writable.
+ * (IPv4 header checksum is a 2 byte value, 10 bytes after ip_ofs,
+ * which has a maximum value of 254.  Thus 254[ip_ofs] + 10 + 2[u16] = 266)
+ *
+ * @param len - length of the packet (this may be < 266).
+ * @param ip_ofs - offset from beginning of pkt to IPv4 or IPv6 header:
+ *                 IP version detected based on top nibble of this byte,
+ *                 for IPv4 we will calculate and store IP header checksum,
+ *                 but only for the first 20 bytes of the header,
+ *                 prior to calling this the IPv4 header checksum field
+ *                 must be initialized to the partial checksum of the IPv4
+ *                 options (0 if none)
+ *                 255 means there is no IP header (for example ARP)
+ *                 DSCP will be retrieved from this IP header (0 if none).
+ * @param partial_csum - additional value to include in L4 checksum
+ * @param csum_start - offset from beginning of pkt to begin L4 checksum
+ *                     calculation (until end of pkt specified by len)
+ * @param csum_ofs - offset from beginning of pkt to store L4 checksum
+ *                   255 means do not calculate/store L4 checksum
+ * @param udp - true iff we should generate a UDP style L4 checksum (0 -> 0xFFFF)
+ *
+ * @return 6-bit DSCP value [0..63], garbage on parse error.
+ */
+FUNC(int csum_and_return_dscp(u8* const pkt, const s32 len, const u8 ip_ofs,
+  const u16 partial_csum, const u8 csum_start, const u8 csum_ofs, const bool udp)) {
+    if (csum_ofs < 255) {
+        // note that calc_csum() treats negative lengths as zero
+        u32 csum = calc_csum(partial_csum, pkt + csum_start, len - csum_start);
+        if (udp) csum = fix_udp_csum(csum);
+        store_be16(pkt + csum_ofs, csum);
+    }
+    if (ip_ofs < 255) {
+        u8 ip = pkt[ip_ofs] >> 4;
+        if (ip == 4) {
+            store_be16(pkt + ip_ofs + 10, calc_csum(0, pkt + ip_ofs, IPV4_HLEN));
+            return pkt[ip_ofs + 1] >> 2;  /* DSCP */
+        } else if (ip == 6) {
+            return (read_be16(pkt + ip_ofs) >> 6) & 0x3F;  /* DSCP */
+        }
+    }
+    return 0;
+}
diff --git a/apf_checksum_test.cc b/apf_checksum_test.cc
new file mode 100644
index 0000000..7e54f3d
--- /dev/null
+++ b/apf_checksum_test.cc
@@ -0,0 +1,239 @@
+#include <cstdint>
+#include <cstddef>
+#include <gtest/gtest.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include "apf_defs.h"
+#include "apf_utils.h"
+#include "apf_checksum.h"
+
+namespace apf {
+
+#define htons(x) __builtin_bswap16(x)
+#define packed __attribute__((packed))
+
+
+
+TEST(ApfChecksumTest, CalcIPv4UDPChecksum) {
+    // An IPv4 UDP packet with IPv4 header checksum and UDP checksum set to 0
+    union packed {
+        uint8_t data[77];
+        struct packed {
+          struct ethhdr ethhdr;
+          struct iphdr iphdr;
+          struct udphdr udphdr;
+          uint8_t udp_payload[];
+        } pkt;
+    } ether_ipv4_udp_pkt = {{
+        0x01, 0x00, 0x5e, 0x00, 0x00, 0xfb,
+        0x38, 0xca, 0x84, 0xb7, 0x7f, 0x16,
+        0x08, 0x00, // end of ethernet header
+        0x45,
+        0x04,
+        0x00, 0x3f,
+        0x43, 0xcd,
+        0x40, 0x00,
+        0xff,
+        0x11,
+        0x00, 0x00,
+        0xc0, 0xa8, 0x01, 0x03,
+        0xe0, 0x00, 0x00, 0xfb, // end of ipv4 header
+        0x14, 0xe9,
+        0x14, 0xe9,
+        0x00, 0x2b,
+        0x00, 0x00, // end of udp header
+        0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x62, 0x05, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x00, 0x00,
+        0x01, 0x80, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x04, 0xc0, 0xa8, 0x01, 0x09,
+    }};
+    // Reset IPv4 header checksum to 0
+    ether_ipv4_udp_pkt.pkt.iphdr.check = 0;
+    // Set the UDP checksum to UDP payload size
+    ether_ipv4_udp_pkt.pkt.udphdr.check = htons(sizeof(ether_ipv4_udp_pkt) - IPV4_HLEN - ETH_HLEN);
+    uint8_t dscp = csum_and_return_dscp((uint8_t *)&ether_ipv4_udp_pkt, sizeof(ether_ipv4_udp_pkt),
+                                ETH_HLEN /* ip_ofs */, IPPROTO_UDP /* partial_csum */,
+                                ETH_HLEN + offsetof(iphdr, saddr) /* csum_start */,
+                                ETH_HLEN + IPV4_HLEN + offsetof(udphdr, check) /* csum_ofs */,
+                                true /* udp */);
+    EXPECT_EQ(dscp, 1);
+    // Verify IPv4 header checksum
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv4_udp_pkt.pkt.iphdr.check), 0x9535);
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv4_udp_pkt.pkt.udphdr.check), 0xa73d);
+}
+
+TEST(ApfChecksumTest, CalcIPv6UDPChecksum) {
+    // An IPv6 UDP packet with UDP checksum set to 0
+    union packed {
+        uint8_t data[97];
+        struct packed {
+          struct ethhdr ethhdr;
+          struct ipv6hdr ipv6hdr;
+          struct udphdr udphdr;
+          uint8_t udp_payload[];
+        } pkt;
+    } ether_ipv6_udp_pkt = {{
+        0x33, 0x33, 0x00, 0x00, 0x00, 0xfb,
+        0x38, 0xca, 0x84, 0xb7, 0x7f, 0x16,
+        0x86, 0xdd, // end of ethernet header
+        0x61, 0x89, 0xf4, 0x6b,
+        0x00, 0x2b,
+        0x11,
+        0xff,
+        0x24, 0x0d, 0x00, 0x1a, 0x03, 0xa6, 0xc4, 0x00, 0xb7, 0x5a, 0xb4, 0x85, 0x28, 0x10, 0xad, 0x6b,
+        0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, // end of ipv6 header
+        0x14, 0xe9,
+        0x14, 0xe9,
+        0x00, 0x2b,
+        0x00, 0x00, // end of udp header
+        0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x62, 0x05, 0x6c,
+        0x6f, 0x63, 0x61, 0x6c, 0x00, 0x00, 0x01, 0x80, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x04, 0xc0,
+        0xa8, 0x01, 0x09
+    }};
+    // Set the UDP checksum to UDP payload size
+    ether_ipv6_udp_pkt.pkt.udphdr.check = htons(sizeof(ether_ipv6_udp_pkt) - IPV6_HLEN - ETH_HLEN);
+    uint8_t dscp = csum_and_return_dscp((uint8_t *)&ether_ipv6_udp_pkt, sizeof(ether_ipv6_udp_pkt),
+                                ETH_HLEN /* ip_ofs */, IPPROTO_UDP /* partial_csum */,
+                                ETH_HLEN + offsetof(ipv6hdr, saddr) /* csum_start */,
+                                ETH_HLEN + IPV6_HLEN + offsetof(udphdr, check) /* csum_ofs */,
+                                true /* udp */);
+    EXPECT_EQ(dscp, 6);
+    // verify UDP checksum
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv6_udp_pkt.pkt.udphdr.check), 0x1cbd);
+}
+
+TEST(ApfChecksumTest, CalcICMPv6Checksum) {
+    // An ICMPv6 packet with checksum field set to 0
+    union packed {
+        uint8_t data[78];
+        struct packed {
+          struct ethhdr ethhdr;
+          struct ipv6hdr ipv6hdr;
+          struct icmp6hdr icmp6hdr;
+          uint8_t icmpv6_payload[];
+        } pkt;
+    } ether_ipv6_icmp6_pkt = {{
+        0xcc, 0x1a, 0xfa, 0xc7, 0xd2, 0xd8,
+        0xbc, 0xd0, 0x74, 0x58, 0xf1, 0x4f,
+        0x86, 0xdd, // end of ethernet header
+        0x61, 0x80, 0x00, 0x00,
+        0x00, 0x18,
+        0x3a,
+        0xff,
+        0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x12, 0x11, 0x2c, 0xdc, 0x04, 0x35, 0x11,
+        0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // end of ipv6 header
+        0x88,
+        0x00,
+        0x00, 0x00, // end of icmpv6 header
+        0x40, 0x00, 0x00, 0x00, 0x24, 0x0d, 0x00, 0x1a, 0x03, 0xa6, 0xc4, 0x00, 0xfd, 0x3d, 0x12, 0xb7,
+        0x90, 0xb6, 0xe9, 0xd2
+    }};
+    // Set the ICMPv6 checksum to ICMPv6 payload size
+    ether_ipv6_icmp6_pkt.pkt.icmp6hdr.icmp6_cksum = htons(sizeof(ether_ipv6_icmp6_pkt) - IPV6_HLEN - ETH_HLEN);
+    uint8_t dscp = csum_and_return_dscp((uint8_t *)&ether_ipv6_icmp6_pkt, sizeof(ether_ipv6_icmp6_pkt),
+                                ETH_HLEN /* ip_ofs */, IPPROTO_ICMPV6 /* partial_csum */,
+                                ETH_HLEN + offsetof(ipv6hdr, saddr) /* csum_start */,
+                                ETH_HLEN + IPV6_HLEN + offsetof(icmp6hdr, icmp6_cksum) /* csum_ofs */,
+                                false /* udp */);
+    EXPECT_EQ(dscp, 6);
+    // verify layer 4 checksum
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv6_icmp6_pkt.pkt.icmp6hdr.icmp6_cksum), 0x8a09);
+}
+
+TEST(ApfChecksumTest, CalcICMPv6ChecksumWithHopByHopOption) {
+    // An ICMPv6 packet(including hop-by-hop option) with checksum field set to 0
+    union packed {
+        uint8_t data[90];
+        struct packed {
+          struct ethhdr ethhdr;
+          struct ipv6hdr ipv6hdr;
+          uint8_t hopopts[8];
+          struct icmp6hdr icmp6hdr;
+          uint8_t icmpv6_payload[];
+        } pkt;
+    } ether_ipv6_hopopts_icmp6_pkt = {{
+        0x33, 0x33, 0x00, 0x00, 0x00, 0x16,
+        0xe0, 0x4f, 0x43, 0xe6, 0xfb, 0xcf,
+        0x86, 0xdd, // end of ethernet header
+        0x60, 0x00, 0x00, 0x00,
+        0x00, 0x24,
+        0x00,
+        0x01,
+        0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x71, 0x6b, 0xe2, 0xfe, 0xd6, 0x53, 0x4e, 0xe0,
+        0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, // end of ipv6 header
+        0x3a, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, // end of hop-by-hop option
+        0x8f,
+        0x00,
+        0x00, 0x00, // end of icmpv6 header
+        0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c
+    }};
+
+    // Set the ICMPv6 checksum to ICMPv6 (header + payload) size + ~{16-bit sum of hop-by-hop header}
+    ether_ipv6_hopopts_icmp6_pkt.pkt.icmp6hdr.icmp6_cksum =
+        htons(sizeof(ether_ipv6_hopopts_icmp6_pkt) - IPV6_HLEN - ETH_HLEN
+              - sizeof(ether_ipv6_hopopts_icmp6_pkt.pkt.hopopts)
+              + 0xbffd); // 0xffff - (0x3a00 + 0x0502 + 0x0000 + 0x0100) = 0xbffd
+    uint8_t dscp = csum_and_return_dscp((uint8_t *)&ether_ipv6_hopopts_icmp6_pkt,
+                                        sizeof(ether_ipv6_hopopts_icmp6_pkt),
+                                        ETH_HLEN /* ip_ofs */, IPPROTO_ICMPV6 /* partial_csum */,
+                                        ETH_HLEN + offsetof(ipv6hdr, saddr) /* csum_start */,
+                                        ETH_HLEN + IPV6_HLEN
+                                        + sizeof(ether_ipv6_hopopts_icmp6_pkt.pkt.hopopts)
+                                        + offsetof(icmp6hdr, icmp6_cksum) /* csum_ofs */,
+                                        false /* udp */);
+    EXPECT_EQ(dscp, 0);
+    // verify layer 4 checksum
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv6_hopopts_icmp6_pkt.pkt.icmp6hdr.icmp6_cksum), 0xf760);
+}
+
+TEST(ApfChecksumTest, CalcIGMPv2Checksum) {
+    // An IGMPv2 packet with ip checksum field set to 0
+    union packed {
+        uint8_t data[46];
+        struct packed {
+          struct ethhdr ethhdr;
+          struct iphdr iphdr;
+          uint8_t router_alert_option[4];
+          struct igmphdr igmphdr;
+        } pkt;
+    } ether_ipv4_igmpv2_pkt = {{
+        0x01, 0x00, 0x5e, 0x00, 0x00, 0xfb,
+        0xa2, 0x29, 0xae, 0xb3, 0x56, 0x6b,
+        0x08, 0x00, // end of ethernet header
+        0x46,
+        0x00,
+        0x00, 0x20,
+        0xf8, 0xf3,
+        0x00, 0x00,
+        0x01,
+        0x02,
+        0x00, 0x00,
+        0xc0, 0xa8, 0x01, 0xed,
+        0xe0, 0x00, 0x00, 0xfb, // end of ipv4 header without option
+        0x94, 0x04, 0x00, 0x00, // router alert option
+        0x16,
+        0x00,
+        0x09, 0x04,
+        0xe0, 0x00, 0x00, 0xfb // end of igmp payload
+    }};
+
+    // Set IPv4 checksum to 0x9404 + 0x0000 = 0x9404
+    ether_ipv4_igmpv2_pkt.pkt.iphdr.check = htons(0x9404);
+    uint8_t dscp = csum_and_return_dscp((uint8_t *)&ether_ipv4_igmpv2_pkt,
+                                        sizeof(ether_ipv4_igmpv2_pkt),
+                                        ETH_HLEN /* ip_ofs */, IPPROTO_IGMP /* partial_csum */,
+                                        0 /* csum_start */,
+                                        255 /* csum_ofs */,
+                                        false /* udp */);
+    EXPECT_EQ(dscp, 0);
+    // Verify IPv4 header checksum
+    EXPECT_EQ(read_be16((uint8_t *)&ether_ipv4_igmpv2_pkt.pkt.iphdr.check), 0x8853);
+}
+
+}  // namespace apf
diff --git a/apf_defs.h b/apf_defs.h
new file mode 100644
index 0000000..e619dce
--- /dev/null
+++ b/apf_defs.h
@@ -0,0 +1,25 @@
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+
+typedef enum {
+  error_program = -2,
+  error_packet = -1,
+  nomatch = false,
+  match = true
+} match_result_type;
+
+#define ETH_P_IP	0x0800
+#define ETH_P_IPV6	0x86DD
+
+#define ETH_HLEN	14
+#define IPV4_HLEN	20
+#define IPV6_HLEN	40
+#define TCP_HLEN	20
+#define UDP_HLEN	8
+
+#define FUNC(x) x; x
diff --git a/apf_disassembler.c b/apf_disassembler.c
index 738079a..e77d832 100644
--- a/apf_disassembler.c
+++ b/apf_disassembler.c
@@ -29,7 +29,6 @@
 int main(void) {
   uint32_t program_len = 0;
   uint8_t program[10000];
-  char output_buffer[512];
 
   // Read in hex program bytes
   int byte;
@@ -38,8 +37,6 @@
   }
 
   for (uint32_t pc = 0; pc < program_len;) {
-      pc = apf_disassemble(program, program_len, pc, output_buffer,
-                           sizeof(output_buffer) / sizeof(output_buffer[0]));
-      printf("%s\n", output_buffer);
+      printf("%s\n", apf_disassemble(program, program_len, &pc));
   }
 }
diff --git a/apf_dns.h b/apf_dns.h
new file mode 100644
index 0000000..db7f724
--- /dev/null
+++ b/apf_dns.h
@@ -0,0 +1,125 @@
+/**
+ * Compares a (Q)NAME starting at udp[*ofs] with the target name.
+ *
+ * @param needle - non-NULL - pointer to DNS encoded target name to match against.
+ *   example: [11]_googlecast[4]_tcp[5]local[0]  (where [11] is a byte with value 11)
+ * @param needle_bound - non-NULL - points at first invalid byte past needle.
+ * @param udp - non-NULL - pointer to the start of the UDP payload (DNS header).
+ * @param udp_len - length of the UDP payload.
+ * @param ofs - non-NULL - pointer to the offset of the beginning of the (Q)NAME.
+ *   On non-error return will be updated to point to the first unread offset,
+ *   ie. the next position after the (Q)NAME.
+ *
+ * @return 1 if matched, 0 if not matched, -1 if error in packet, -2 if error in program.
+ */
+FUNC(match_result_type match_single_name(const u8* needle,
+                                    const u8* const needle_bound,
+                                    const u8* const udp,
+                                    const u32 udp_len,
+                                    u32* const ofs)) {
+    u32 first_unread_offset = *ofs;
+    bool is_qname_match = true;
+    int lvl;
+
+    /* DNS names are <= 255 characters including terminating 0, since >= 1 char + '.' per level => max. 127 levels */
+    for (lvl = 1; lvl <= 127; ++lvl) {
+        if (*ofs >= udp_len) return error_packet;
+        u8 v = udp[(*ofs)++];
+        if (v >= 0xC0) { /* RFC 1035 4.1.4 - handle message compression */
+            if (*ofs >= udp_len) return error_packet;
+            u8 w = udp[(*ofs)++];
+            if (*ofs > first_unread_offset) first_unread_offset = *ofs;
+            u32 new_ofs = (v - 0xC0) * 256u + w;
+            if (new_ofs >= *ofs) return error_packet;  /* RFC 1035 4.1.4 allows only backward pointers */
+            *ofs = new_ofs;
+        } else if (v > 63) {
+            return error_packet;  /* RFC 1035 2.3.4 - label size is 1..63. */
+        } else if (v) {
+            u8 label_size = v;
+            if (*ofs + label_size > udp_len) return error_packet;
+            if (needle >= needle_bound) return error_program;
+            if (is_qname_match) {
+                u8 len = *needle++;
+                if (len == label_size) {
+                    if (needle + label_size > needle_bound) return error_program;
+                    while (label_size--) {
+                        u8 w = udp[(*ofs)++];
+                        is_qname_match &= (uppercase(w) == *needle++);
+                    }
+                } else {
+                    if (len != 0xFF) is_qname_match = false;
+                    *ofs += label_size;
+                }
+            } else {
+                is_qname_match = false;
+                *ofs += label_size;
+            }
+        } else { /* reached the end of the name */
+            if (first_unread_offset > *ofs) *ofs = first_unread_offset;
+            return (is_qname_match && *needle == 0) ? match : nomatch;
+        }
+    }
+    return error_packet;  /* too many dns domain name levels */
+}
+
+/**
+ * Check if DNS packet contains any of the target names with the provided
+ * question_type.
+ *
+ * @param needles - non-NULL - pointer to DNS encoded target nameS to match against.
+ *   example: [3]foo[3]com[0][3]bar[3]net[0][0]  -- note ends with an extra NULL byte.
+ * @param needle_bound - non-NULL - points at first invalid byte past needles.
+ * @param udp - non-NULL - pointer to the start of the UDP payload (DNS header).
+ * @param udp_len - length of the UDP payload.
+ * @param question_type - question type to match against or -1 to match answers.
+ *
+ * @return 1 if matched, 0 if not matched, -1 if error in packet, -2 if error in program.
+ */
+FUNC(match_result_type match_names(const u8* needles,
+                              const u8* const needle_bound,
+                              const u8* const udp,
+                              const u32 udp_len,
+                              const int question_type)) {
+    if (udp_len < 12) return error_packet;  /* lack of dns header */
+
+    /* dns header: be16 tid, flags, num_{questions,answers,authority,additional} */
+    u32 num_questions = read_be16(udp + 4);
+    u32 num_answers = read_be16(udp + 6) + read_be16(udp + 8) + read_be16(udp + 10);
+
+    /* loop until we hit final needle, which is a null byte */
+    while (true) {
+        if (needles >= needle_bound) return error_program;
+        if (!*needles) return nomatch;  /* we've run out of needles without finding a match */
+        u32 ofs = 12;  /* dns header is 12 bytes */
+        u32 i;
+        /* match questions */
+        for (i = 0; i < num_questions; ++i) {
+            match_result_type m = match_single_name(needles, needle_bound, udp, udp_len, &ofs);
+            if (m < nomatch) return m;
+            if (ofs + 2 > udp_len) return error_packet;
+            int qtype = (int)read_be16(udp + ofs);
+            ofs += 4; /* skip be16 qtype & qclass */
+            if (question_type == -1) continue;
+            if (m == nomatch) continue;
+            if (qtype == 0xFF /* QTYPE_ANY */ || qtype == question_type) return match;
+        }
+        /* match answers */
+        if (question_type == -1) for (i = 0; i < num_answers; ++i) {
+            match_result_type m = match_single_name(needles, needle_bound, udp, udp_len, &ofs);
+            if (m < nomatch) return m;
+            ofs += 8; /* skip be16 type, class & be32 ttl */
+            if (ofs + 2 > udp_len) return error_packet;
+            ofs += 2 + read_be16(udp + ofs);  /* skip be16 rdata length field, plus length bytes */
+            if (m == match) return match;
+        }
+        /* move needles pointer to the next needle. */
+        do {
+            u8 len = *needles++;
+            if (len == 0xFF) continue;
+            if (len > 63) return error_program;
+            needles += len;
+            if (needles >= needle_bound) return error_program;
+        } while (*needles);
+        needles++;  /* skip the NULL byte at the end of *a* DNS name */
+    }
+}
diff --git a/apf_dns_test.cc b/apf_dns_test.cc
new file mode 100644
index 0000000..9497ccd
--- /dev/null
+++ b/apf_dns_test.cc
@@ -0,0 +1,251 @@
+#include <stdint.h>
+#include <gtest/gtest.h>
+#include <arpa/inet.h>
+#include "apf_defs.h"
+#include "apf_utils.h"
+#include "apf_dns.h"
+
+namespace apf {
+
+TEST(ApfDnsTest, MatchSingleNameWithNoNameCompression) {
+    const uint8_t needle_match[] = {
+        0x04, '_', 'N', 'M', 'T',
+        0x04, '_', 'T', 'C', 'P',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00 // needle = _NMT._TCP.LOCAL
+    };
+    const uint8_t udp_payload[] = {
+        0x00, 0x00, 0x00, 0x00, // tid = 0x00, flags = 0x00,
+        0x00, 0x01, // qdcount = 1
+        0x00, 0x00, // ancount = 0
+        0x00, 0x00, // nscount = 0
+        0x00, 0x00, // arcount = 0
+        0x04, '_', 'n', 'm', 't',
+        0x04, '_', 't', 'c', 'p',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00, // qname1 = _nmt._tcp.local
+        0x00, 0x0c, 0x00, 0x01  // type = PTR, class = 0x0001
+    };
+    u32 ofs = 12;
+    EXPECT_EQ(match_single_name(needle_match, needle_match + sizeof(needle_match), udp_payload, sizeof(udp_payload), &ofs), match);
+    EXPECT_EQ(ofs, 29);
+    const uint8_t needle_match_star[] = {
+        0x04, '_', 'N', 'M', 'T',
+        0xff,
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00 // needle = _NMT.*.LOCAL
+    };
+    ofs = 12;
+    EXPECT_EQ(match_single_name(needle_match_star, needle_match_star + sizeof(needle_match_star), udp_payload, sizeof(udp_payload), &ofs), match);
+    EXPECT_EQ(ofs, 29);
+    const uint8_t needle_nomatch[] = {
+        0x04, '_', 'M', 'M', 'M',
+        0x04, '_', 't', 'c', 'p',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00 // needle = _MMM._tcp.local
+    };
+    ofs = 12;
+    EXPECT_EQ(match_single_name(needle_nomatch, needle_nomatch + sizeof(needle_nomatch), udp_payload, sizeof(udp_payload), &ofs), nomatch);
+    EXPECT_EQ(ofs, 29);
+    const uint8_t needle_nomatch_star[] = {
+        0xff,
+        0x04, '_', 'u', 'd', 'p',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00 // needle = *._udp.local
+    };
+    ofs = 12;
+    EXPECT_EQ(match_single_name(needle_nomatch_star, needle_nomatch_star + sizeof(needle_nomatch_star), udp_payload, sizeof(udp_payload), &ofs), nomatch);
+    EXPECT_EQ(ofs, 29);
+}
+
+TEST(ApfDnsTest, MatchSingleNameWithoutNameCompression) {
+    const uint8_t needle_match[] = {
+        0x01, 'B',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00 // needle = B.LOCAL
+    };
+    const uint8_t udp_payload[] = {
+        0x00, 0x00, 0x00, 0x00, // tid = 0x00, flags = 0x00,
+        0x00, 0x02, // qdcount = 2
+        0x00, 0x00, // ancount = 0
+        0x00, 0x00, // nscount = 0
+        0x00, 0x00, // arcount = 0
+        0x01, 'a',
+        0x01, 'b',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00, // qname1 = a.b.local
+        0x00, 0x01, 0x00, 0x01,  // type = A, class = 0x0001
+        0xc0, 0x0e, // qname2 = b.local (name compression)
+        0x00, 0x01, 0x00, 0x01 // type = A, class = 0x0001
+    };
+    u32 ofs = 27;
+    EXPECT_EQ(match_single_name(needle_match, needle_match + sizeof(needle_match), udp_payload, sizeof(udp_payload), &ofs), match);
+    EXPECT_EQ(ofs, 29);
+    const uint8_t needle_match_star[] = {
+        0x01, 'B',
+        0xff,
+        0x00 // needle = B.*
+    };
+    ofs = 27;
+    EXPECT_EQ(match_single_name(needle_match_star, needle_match_star + sizeof(needle_match_star), udp_payload, sizeof(udp_payload), &ofs), match);
+    EXPECT_EQ(ofs, 29);
+}
+
+TEST(ApfDnsTest, MatchSingleNameWithInfiniteloop) {
+    const uint8_t needle_match[] = {
+        0x01, 'B',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00 // needle = B.LOCAL
+    };
+    const uint8_t udp_payload[] = {
+        0x00, 0x00, 0x00, 0x00, // tid = 0x00, flags = 0x00,
+        0x00, 0x02, // qdcount = 2
+        0x00, 0x00, // ancount = 0
+        0x00, 0x00, // nscount = 0
+        0x00, 0x00, // arcount = 0
+        0x01, 'a',
+        0x01, 'b',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00, // qname1 = a.b.local
+        0x00, 0x01, 0x00, 0x01,  // type = A, class = 0x0001
+        0xc0, 0x1b, // corrupted pointer cause infinite loop
+        0x00, 0x01, 0x00, 0x01 // type = A, class = 0x0001
+    };
+    u32 ofs = 27;
+    EXPECT_EQ(match_single_name(needle_match, needle_match + sizeof(needle_match), udp_payload, sizeof(udp_payload), &ofs), error_packet);
+    const uint8_t needle_match_star[] = {
+        0x01, 'B',
+        0xff,
+        0x00 // needle = B.*
+    };
+    ofs = 27;
+    EXPECT_EQ(match_single_name(needle_match_star, needle_match_star + sizeof(needle_match_star), udp_payload, sizeof(udp_payload), &ofs), error_packet);
+}
+
+TEST(ApfDnsTest, MatchNamesInQuestions) {
+    // needles = { A.B.LOCAL }
+    const uint8_t needles_match1[] = {
+        0x01, 'A',
+        0x01, 'B',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00,
+        0x00
+    };
+    const uint8_t udp_payload[] = {
+        0x00, 0x00, 0x00, 0x00, // tid = 0x00, flags = 0x00,
+        0x00, 0x02, // qdcount = 2
+        0x00, 0x00, // ancount = 0
+        0x00, 0x00, // nscount = 0
+        0x00, 0x00, // arcount = 0
+        0x01, 'a',
+        0x01, 'b',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00, // qname1 = a.b.local
+        0x00, 0x01, 0x00, 0x01,// type = A, class = 0x0001
+        0xc0, 0x0e, // qname2 = b.local (name compression)
+        0x00, 0x01, 0x00, 0x01 // type = A, class = 0x0001
+    };
+    EXPECT_EQ(match_names(needles_match1, needles_match1 + sizeof(needles_match1),  udp_payload, sizeof(udp_payload), 0x01), match);
+    // needles = { A, B.LOCAL }
+    const uint8_t needles_match2[] = {
+        0x01, 'A',
+        0x00,
+        0x01, 'B',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_match2, needles_match2 + sizeof(needles_match2), udp_payload, sizeof(udp_payload), 0x01), match);
+    // needles = { *, B.* }
+    const uint8_t needles_match2_star[] = {
+        0xff,
+        0x00,
+        0x01, 'B',
+        0xff,
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_match2_star, needles_match2_star + sizeof(needles_match2_star), udp_payload, sizeof(udp_payload), 0x01), match);
+    // needles = { C.LOCAL }
+    const uint8_t needles_nomatch[] = {
+        0x01, 'C',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_nomatch, needles_nomatch + sizeof(needles_nomatch), udp_payload, sizeof(udp_payload), 0x01), nomatch);
+    // needles = { C.* }
+    const uint8_t needles_nomatch_star[] = {
+        0x01, 'C',
+        0xff,
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_nomatch_star, needles_nomatch_star + sizeof(needles_nomatch_star), udp_payload, sizeof(udp_payload), 0x01), nomatch);
+}
+
+TEST(ApfDnsTest, MatchNamesInAnswers) {
+    // needles = { A.B.LOCAL }
+    const uint8_t needles_match1[] = {
+        0x01, 'A',
+        0x01, 'B',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00,
+        0x00
+    };
+    const uint8_t udp_payload[] = {
+        0x00, 0x00, 0x84, 0x00, // tid = 0x00, flags = 0x8400,
+        0x00, 0x00, // qdcount = 0
+        0x00, 0x02, // ancount = 2
+        0x00, 0x00, // nscount = 0
+        0x00, 0x00, // arcount = 0
+        0x01, 'a',
+        0x01, 'b',
+        0x05, 'l', 'o', 'c', 'a', 'l',
+        0x00, // name1 = a.b.local
+        0x00, 0x01, 0x80, 0x01, // type = A, class = 0x8001
+        0x00, 0x00, 0x00, 0x78, // ttl = 120
+        0x00, 0x04, 0xc0, 0xa8, 0x01, 0x09, // rdlengh = 4, rdata = 192.168.1.9
+        0xc0, 0x0e, // name2 = b.local (name compression)
+        0x00, 0x01, 0x80, 0x01, // type = A, class = 0x8001
+        0x00, 0x00, 0x00, 0x78, // ttl = 120
+        0x00, 0x04, 0xc0, 0xa8, 0x01, 0x09 // rdlengh = 4, rdata = 192.168.1.9
+    };
+    EXPECT_EQ(match_names(needles_match1, needles_match1 + sizeof(needles_match1), udp_payload, sizeof(udp_payload), -1), match);
+    // needles = { A, B.LOCAL }
+    const uint8_t needles_match2[] = {
+        0x01, 'A', 0x00,
+        0x01, 'B',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_match2, needles_match2 + sizeof(needles_match2), udp_payload, sizeof(udp_payload), -1), match);
+    // needles = { *, B.* }
+    const uint8_t needles_match2_star[] = {
+        0xff,
+        0x01, 'B',
+        0xff,
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_match2_star, needles_match2_star + sizeof(needles_match2_star), udp_payload, sizeof(udp_payload), -1), match);
+    // needles = { C.LOCAL }
+    const uint8_t needles_nomatch[] = {
+        0x01, 'C',
+        0x05, 'L', 'O', 'C', 'A', 'L',
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_nomatch, needles_nomatch + sizeof(needles_nomatch), udp_payload, sizeof(udp_payload), -1), nomatch);
+    // needles = { C.* }
+    const uint8_t needles_nomatch_star[] = {
+        0x01, 'C',
+        0xff,
+        0x00,
+        0x00
+    };
+    EXPECT_EQ(match_names(needles_nomatch_star, needles_nomatch_star + sizeof(needles_nomatch_star), udp_payload, sizeof(udp_payload), -1), nomatch);
+}
+
+} // namespace apf
diff --git a/apf_interpreter.c b/apf_interpreter.c
new file mode 120000
index 0000000..7844a0d
--- /dev/null
+++ b/apf_interpreter.c
@@ -0,0 +1 @@
+v4/apf_interpreter.c
\ No newline at end of file
diff --git a/apf_interpreter.h b/apf_interpreter.h
new file mode 120000
index 0000000..a07ceaf
--- /dev/null
+++ b/apf_interpreter.h
@@ -0,0 +1 @@
+v4/apf_interpreter.h
\ No newline at end of file
diff --git a/apf_run.c b/apf_run.c
index 242a7bb..90a8a8f 100644
--- a/apf_run.c
+++ b/apf_run.c
@@ -162,7 +162,7 @@
 
 void print_transmitted_packet() {
     printf("transmitted packet: ");
-    print_hex(apf_test_tx_packet, (int) apf_test_tx_packet_len);
+    print_hex(apf_test_buffer, (int) apf_test_tx_packet_len);
     printf("\n");
 }
 
@@ -176,7 +176,7 @@
 
     int ret;
     if (use_apf_v6_interpreter) {
-        ret = apf_run(NULL, program, program_len, ram_len, packet, packet_len,
+        ret = apf_run(NULL, (uint32_t*)program, program_len, ram_len, packet, packet_len,
                             filter_age);
     } else {
         ret = accept_packet(program, program_len, ram_len, packet, packet_len,
@@ -187,7 +187,6 @@
     free(packet);
 }
 
-static char output_buffer[512];
 
 void apf_trace_hook(uint32_t pc, const uint32_t* regs, const uint8_t* program, uint32_t program_len,
                     const uint8_t* packet __unused, uint32_t packet_len __unused,
@@ -195,9 +194,7 @@
     if (!tracing_enabled) return;
 
     printf("%8" PRIx32 " %8" PRIx32 " ", regs[0], regs[1]);
-    apf_disassemble(program, program_len, pc, output_buffer,
-                    sizeof(output_buffer) / sizeof(output_buffer[0]));
-    printf("%s\n", output_buffer);
+    printf("%s\n", apf_disassemble(program, program_len, &pc));
 }
 
 // Process pcap file through APF filter and generate output files
@@ -234,7 +231,7 @@
 
         int result;
         if (use_apf_v6_interpreter) {
-            result = apf_run(NULL, program, program_len, ram_len, apf_packet,
+            result = apf_run(NULL, (uint32_t*)program, program_len, ram_len, apf_packet,
                              apf_header.len, filter_age);
         } else {
             result = accept_packet(program, program_len, ram_len, apf_packet,
diff --git a/apf_utils.h b/apf_utils.h
new file mode 100644
index 0000000..a95a94e
--- /dev/null
+++ b/apf_utils.h
@@ -0,0 +1,12 @@
+static u32 read_be16(const u8* buf) {
+    return buf[0] * 256u + buf[1];
+}
+
+static void store_be16(u8* const buf, const u16 v) {
+    buf[0] = (u8)(v >> 8);
+    buf[1] = (u8)v;
+}
+
+static u8 uppercase(u8 c) {
+    return (c >= 'a') && (c <= 'z') ? c - ('a' - 'A') : c;
+}
diff --git a/devtools/.gitignore b/devtools/.gitignore
new file mode 100644
index 0000000..a5c36b1
--- /dev/null
+++ b/devtools/.gitignore
@@ -0,0 +1,2 @@
+apf_interpreter.arm.o
+apf_interpreter.x86.o
diff --git a/devtools/apf_interpreter.c b/devtools/apf_interpreter.c
new file mode 120000
index 0000000..de07dc7
--- /dev/null
+++ b/devtools/apf_interpreter.c
@@ -0,0 +1 @@
+../v5/apf_interpreter.c
\ No newline at end of file
diff --git a/devtools/apf_interpreter.h b/devtools/apf_interpreter.h
new file mode 120000
index 0000000..9a19beb
--- /dev/null
+++ b/devtools/apf_interpreter.h
@@ -0,0 +1 @@
+../v5/apf_interpreter.h
\ No newline at end of file
diff --git a/devtools/apf_interpreter_minimal.c b/devtools/apf_interpreter_minimal.c
new file mode 100644
index 0000000..7373d6f
--- /dev/null
+++ b/devtools/apf_interpreter_minimal.c
@@ -0,0 +1,15 @@
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed int int32_t;
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+#define NULL ((void*)0)
+
+typedef enum { false, true } bool;
+#include "../apf_defs.h"
+#include "../apf_utils.h"
+#include "../apf_dns.h"
+#include "../apf_checksum.h"
diff --git a/devtools/mk b/devtools/mk
new file mode 100755
index 0000000..408cc0f
--- /dev/null
+++ b/devtools/mk
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Requires:
+#   sudo apt install gcc-arm-linux-gnueabihf gcc-arm-linux-gnueabi
+
+set -e
+set -u
+
+cd "${0%/*}"
+
+declare -ar FLAGS=(
+  '-std=c89'
+  '-pedantic'
+  '-Wall'
+  '-Werror'
+  '-Werror=implicit-fallthrough'
+  '-Werror=strict-prototypes'
+  '-Wsign-compare'
+  '-Wsign-conversion'
+  '-Wunused-parameter'
+  '-Wuninitialized'
+  '-Os'
+  '-fomit-frame-pointer'
+  '-Wno-declaration-after-statement'
+)
+
+arm-linux-gnueabi-gcc "${FLAGS[@]}" apf_interpreter.c -c -o apf_interpreter.arm.o
+clang -m32 "${FLAGS[@]}" -Wnullable-to-nonnull-conversion -Wthread-safety apf_interpreter.c -c -o apf_interpreter.x86.o
+size apf_interpreter.arm.o
+size apf_interpreter.x86.o
diff --git a/disassembler.c b/disassembler.c
index 06a164b..ac6f625 100644
--- a/disassembler.c
+++ b/disassembler.c
@@ -16,19 +16,38 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <stdarg.h>
 
+typedef enum { false, true } bool;
+
+#include "v5/apf_defs.h"
 #include "v5/apf.h"
+#include "disassembler.h"
 
 // If "c" is of a signed type, generate a compile warning that gets promoted to an error.
 // This makes bounds checking simpler because ">= 0" can be avoided. Otherwise adding
 // superfluous ">= 0" with unsigned expressions generates compile warnings.
 #define ENFORCE_UNSIGNED(c) ((c)==(uint32_t)(c))
 
-static int print_opcode(const char* opcode, char* output_buffer,
-                        int output_buffer_len, int offset) {
-    int ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                      "%-6s", opcode);
-    return ret;
+char print_buf[1024];
+char* buf_ptr;
+int buf_remain;
+bool v6_mode = false;
+
+__attribute__ ((format (printf, 1, 2) ))
+static void bprintf(const char* format, ...) {
+    va_list args;
+    va_start(args, format);
+    int ret = vsnprintf(buf_ptr, buf_remain, format, args);
+    va_end(args);
+    if (ret < 0) return;
+    if (ret >= buf_remain) ret = buf_remain;
+    buf_ptr += ret;
+    buf_remain -= ret;
+}
+
+static void print_opcode(const char* opcode) {
+    bprintf("%-12s", opcode);
 }
 
 // Mapping from opcode number to opcode name.
@@ -52,70 +71,53 @@
     [JGT_OPCODE] = "jgt",
     [JLT_OPCODE] = "jlt",
     [JSET_OPCODE] = "jset",
-    [JNEBS_OPCODE] = "jnebs",
+    [JBSMATCH_OPCODE] = NULL,
     [LDDW_OPCODE] = "lddw",
     [STDW_OPCODE] = "stdw",
     [WRITE_OPCODE] = "write",
 };
 
-static int print_jump_target(uint32_t target, uint32_t program_len,
-                             char* output_buffer, int output_buffer_len,
-                             int offset) {
-    int ret;
+static void print_jump_target(uint32_t target, uint32_t program_len) {
     if (target == program_len) {
-        ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                       "PASS");
+        bprintf("PASS");
     } else if (target == program_len + 1) {
-        ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                       "DROP");
+        bprintf("DROP");
     } else {
-        ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                       "%u", target);
+        bprintf("%u", target);
     }
-    return ret;
 }
 
-uint32_t apf_disassemble(const uint8_t* program, uint32_t program_len,
-                         uint32_t pc, char* output_buffer,
-                         int output_buffer_len) {
-    if (pc > program_len + 1) {
-        fprintf(stderr, "pc is overflow: pc %d, program_len: %d", pc,
-                program_len);
-        return pc;
-    }
-#define ASSERT_RET_INBOUND(x)                                               \
-    if ((x) < 0 || (x) >= (output_buffer_len - offset)) return pc + 2
-
-    int offset = 0;
-    int ret;
-    ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                       "%8u: ", pc);
-    ASSERT_RET_INBOUND(ret);
-    offset += ret;
-
-    if (pc == program_len) {
-        ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                       "PASS");
-        ASSERT_RET_INBOUND(ret);
-        offset += ret;
-        return ++pc;
+const char* apf_disassemble(const uint8_t* program, uint32_t program_len, uint32_t* const ptr2pc) {
+    buf_ptr = print_buf;
+    buf_remain = sizeof(print_buf);
+    if (*ptr2pc > program_len + 1) {
+        bprintf("pc is overflow: pc %d, program_len: %d", *ptr2pc, program_len);
+        return print_buf;
     }
 
-    if (pc == program_len + 1) {
-        ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                       "DROP");
-        ASSERT_RET_INBOUND(ret);
-        offset += ret;
-        return ++pc;
+    bprintf("%8u: ", *ptr2pc);
+
+    if (*ptr2pc == program_len) {
+        bprintf("PASS");
+        ++(*ptr2pc);
+        return print_buf;
     }
 
-    const uint8_t bytecode = program[pc++];
+    if (*ptr2pc == program_len + 1) {
+        bprintf("DROP");
+        ++(*ptr2pc);
+        return print_buf;
+    }
+
+    const uint8_t bytecode = program[(*ptr2pc)++];
     const uint32_t opcode = EXTRACT_OPCODE(bytecode);
-#define PRINT_OPCODE()                                                         \
-    print_opcode(opcode_names[opcode], output_buffer, output_buffer_len, offset)
-#define DECODE_IMM(value, length)                                              \
-    for (uint32_t i = 0; i < (length) && pc < program_len; i++)                \
-        value = (value << 8) | program[pc++]
+
+#define PRINT_OPCODE() print_opcode(opcode_names[opcode])
+#define DECODE_IMM(length)  ({                                        \
+    uint32_t value = 0;                                               \
+    for (uint32_t i = 0; i < (length) && *ptr2pc < program_len; i++)  \
+        value = (value << 8) | program[(*ptr2pc)++];                  \
+    value;})
 
     const uint32_t reg_num = EXTRACT_REGISTER(bytecode);
     // All instructions have immediate fields, so load them now.
@@ -124,123 +126,92 @@
     int32_t signed_imm = 0;
     if (len_field != 0) {
         const uint32_t imm_len = 1 << (len_field - 1);
-        DECODE_IMM(imm, imm_len);
+        imm = DECODE_IMM(imm_len);
         // Sign extend imm into signed_imm.
         signed_imm = imm << ((4 - imm_len) * 8);
         signed_imm >>= (4 - imm_len) * 8;
     }
     switch (opcode) {
+        case PASSDROP_OPCODE:
+            if (reg_num == 0) {
+                print_opcode("pass");
+            } else {
+                print_opcode("drop");
+            }
+            if (imm > 0) {
+                bprintf("counter=%d", imm);
+            }
+            break;
         case LDB_OPCODE:
         case LDH_OPCODE:
         case LDW_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
-            ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                          "r%d, [%u]", reg_num, imm);
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            PRINT_OPCODE();
+            bprintf("r%d, [%u]", reg_num, imm);
             break;
         case LDBX_OPCODE:
         case LDHX_OPCODE:
         case LDWX_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            PRINT_OPCODE();
             if (imm) {
-                ret =
-                    snprintf(output_buffer + offset, output_buffer_len - offset,
-                             "r%d, [r1+%u]", reg_num, imm);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r%d, [r1+%u]", reg_num, imm);
             } else {
-                ret =
-                    snprintf(output_buffer + offset, output_buffer_len - offset,
-                             "r%d, [r1]", reg_num);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r%d, [r1]", reg_num);
             }
             break;
         case JMP_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
-            ret = print_jump_target(pc + imm, program_len, output_buffer,
-                                    output_buffer_len, offset);
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            if (reg_num == 0) {
+                PRINT_OPCODE();
+                print_jump_target(*ptr2pc + imm, program_len);
+            } else {
+                v6_mode = true;
+                print_opcode("data");
+                bprintf("%d, ", imm);
+                uint32_t len = imm;
+                while (len--) bprintf("%02x", program[(*ptr2pc)++]);
+            }
             break;
         case JEQ_OPCODE:
         case JNE_OPCODE:
         case JGT_OPCODE:
         case JLT_OPCODE:
-        case JSET_OPCODE:
-        case JNEBS_OPCODE: {
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
-            ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                          "r0, ");
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+        case JSET_OPCODE: {
+            PRINT_OPCODE();
+            bprintf("r0, ");
             // Load second immediate field.
-            uint32_t cmp_imm = 0;
             if (reg_num == 1) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "r1, ");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r1, ");
             } else if (len_field == 0) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "0, ");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("0, ");
             } else {
-                DECODE_IMM(cmp_imm, 1 << (len_field - 1));
-                ret = snprintf(output_buffer + offset,
-                              output_buffer_len - offset, "0x%x, ", cmp_imm);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                uint32_t cmp_imm = DECODE_IMM(1 << (len_field - 1));
+                bprintf("0x%x, ", cmp_imm);
             }
-            if (opcode == JNEBS_OPCODE) {
-                ret = print_jump_target(pc + imm + cmp_imm, program_len,
-                                  output_buffer, output_buffer_len, offset);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, ", ");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
-                while (cmp_imm--) {
-                    uint8_t byte = program[pc++];
-                    ret = snprintf(output_buffer + offset,
-                                  output_buffer_len - offset, "%02x", byte);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                }
+            print_jump_target(*ptr2pc + imm, program_len);
+            break;
+        }
+        case JBSMATCH_OPCODE: {
+            if (reg_num == 0) {
+                print_opcode("jbsne");
             } else {
-                ret = print_jump_target(pc + imm, program_len, output_buffer,
-                                  output_buffer_len, offset);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                print_opcode("jbseq");
+            }
+            bprintf("r0, ");
+            uint32_t cmp_imm = DECODE_IMM(1 << (len_field - 1));
+            bprintf("0x%x, ", cmp_imm);
+            print_jump_target(*ptr2pc + imm + cmp_imm, program_len);
+            bprintf(", ");
+            while (cmp_imm--) {
+                uint8_t byte = program[(*ptr2pc)++];
+                bprintf("%02x", byte);
             }
             break;
         }
         case SH_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            PRINT_OPCODE();
             if (reg_num) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "r0, r1");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r0, r1");
             } else {
-                ret =
-                    snprintf(output_buffer + offset, output_buffer_len - offset,
-                             "r0, %d", signed_imm);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r0, %d", signed_imm);
             }
             break;
         case ADD_OPCODE:
@@ -248,34 +219,18 @@
         case DIV_OPCODE:
         case AND_OPCODE:
         case OR_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            PRINT_OPCODE();
             if (reg_num) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "r0, r1");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r0, r1");
             } else if (!imm && opcode == DIV_OPCODE) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "pass (div 0)");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("pass (div 0)");
             } else {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "r0, %u", imm);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("r0, %u", imm);
             }
             break;
         case LI_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
-            ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                           "r%d, %d", reg_num, signed_imm);
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            PRINT_OPCODE();
+            bprintf("r%d, %d", reg_num, signed_imm);
             break;
         case EXT_OPCODE:
             if (
@@ -287,211 +242,167 @@
                 imm >= LDM_EXT_OPCODE &&
 #endif
                 imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
-                ret = print_opcode("ldm", output_buffer, output_buffer_len,
-                                   offset);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
-                ret =
-                    snprintf(output_buffer + offset, output_buffer_len - offset,
-                             "r%d, m[%u]", reg_num, imm - LDM_EXT_OPCODE);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                print_opcode("ldm");
+                bprintf("r%d, m[%u]", reg_num, imm - LDM_EXT_OPCODE);
             } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
-                ret = print_opcode("stm", output_buffer, output_buffer_len,
-                                   offset);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
-                ret =
-                    snprintf(output_buffer + offset, output_buffer_len - offset,
-                             "r%d, m[%u]", reg_num, imm - STM_EXT_OPCODE);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                print_opcode("stm");
+                bprintf("r%d, m[%u]", reg_num, imm - STM_EXT_OPCODE);
             } else switch (imm) {
                 case NOT_EXT_OPCODE:
-                    ret = print_opcode("not", output_buffer,
-                                       output_buffer_len, offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    ret = snprintf(output_buffer + offset,
-                                   output_buffer_len - offset, "r%d",
-                                   reg_num);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                    print_opcode("not");
+                    bprintf("r%d", reg_num);
                     break;
                 case NEG_EXT_OPCODE:
-                    ret = print_opcode("neg", output_buffer, output_buffer_len,
-                                       offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    ret = snprintf(output_buffer + offset,
-                                  output_buffer_len - offset, "r%d", reg_num);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                    print_opcode("neg");
+                    bprintf("r%d", reg_num);
                     break;
                 case SWAP_EXT_OPCODE:
-                    ret = print_opcode("swap", output_buffer, output_buffer_len,
-                                       offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                    print_opcode("swap");
                     break;
                 case MOV_EXT_OPCODE:
-                    ret = print_opcode("mov", output_buffer, output_buffer_len,
-                                       offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    ret = snprintf(output_buffer + offset,
-                                   output_buffer_len - offset, "r%d, r%d",
-                                   reg_num, reg_num ^ 1);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                    print_opcode("mov");
+                    bprintf("r%d, r%d", reg_num, reg_num ^ 1);
                     break;
-                case ALLOC_EXT_OPCODE:
-                    ret = print_opcode("alloc", output_buffer,
-                                       output_buffer_len, offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    ret =
-                        snprintf(output_buffer + offset,
-                                 output_buffer_len - offset, "r%d", reg_num);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                case ALLOCATE_EXT_OPCODE:
+                    print_opcode("allocate");
+                    if (reg_num == 0) {
+                        bprintf("r%d", reg_num);
+                    } else {
+                        uint32_t alloc_len = DECODE_IMM(2);
+                        bprintf("%d", alloc_len);
+                    }
                     break;
-                case TRANS_EXT_OPCODE:
-                    ret = print_opcode("trans", output_buffer,
-                                       output_buffer_len, offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    ret =
-                        snprintf(output_buffer + offset,
-                                 output_buffer_len - offset, "r%d", reg_num);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                case TRANSMIT_EXT_OPCODE:
+                    print_opcode(reg_num ? "transmitudp" : "transmit");
+                    u8 ip_ofs = DECODE_IMM(1);
+                    u8 csum_ofs = DECODE_IMM(1);
+                    if (csum_ofs < 255) {
+                        u8 csum_start = DECODE_IMM(1);
+                        u16 partial_csum = DECODE_IMM(2);
+                        bprintf("ip_ofs=%d, csum_ofs=%d, csum_start=%d, partial_csum=0x%04x",
+                                ip_ofs, csum_ofs, csum_start, partial_csum);
+                    } else {
+                        bprintf("ip_ofs=%d", ip_ofs);
+                    }
                     break;
-                case EWRITE1_EXT_OPCODE:
-                case EWRITE2_EXT_OPCODE:
-                case EWRITE4_EXT_OPCODE: {
-                    ret = print_opcode("write", output_buffer,
-                                       output_buffer_len, offset);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    ret = snprintf(output_buffer + offset,
-                                   output_buffer_len - offset, "r%d, %d",
-                                   reg_num, 1 << (imm - EWRITE1_EXT_OPCODE));
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                case EWRITE1_EXT_OPCODE: print_opcode("ewrite1"); bprintf("r%d", reg_num); break;
+                case EWRITE2_EXT_OPCODE: print_opcode("ewrite2"); bprintf("r%d", reg_num); break;
+                case EWRITE4_EXT_OPCODE: print_opcode("ewrite4"); bprintf("r%d", reg_num); break;
+                case EPKTDATACOPYIMM_EXT_OPCODE:
+                case EPKTDATACOPYR1_EXT_OPCODE: {
+                    if (reg_num == 0) {
+                        print_opcode("epktcopy");
+                    } else {
+                        print_opcode("edatacopy");
+                    }
+                    if (imm == EPKTDATACOPYIMM_EXT_OPCODE) {
+                        uint32_t len = DECODE_IMM(1);
+                        bprintf(" src=r0, len=%d", len);
+                    } else {
+                        bprintf(" src=r0, len=r1");
+                    }
+
                     break;
                 }
-                case EDATACOPY:
-                case EPKTCOPY: {
-                    if (imm == EPKTCOPY) {
-                        ret = print_opcode("pcopy", output_buffer,
-                                           output_buffer_len, offset);
-                    } else {
-                        ret = print_opcode("dcopy", output_buffer,
-                                           output_buffer_len, offset);
+                case JDNSQMATCH_EXT_OPCODE:       // 43
+                case JDNSAMATCH_EXT_OPCODE:       // 44
+                case JDNSQMATCHSAFE_EXT_OPCODE:   // 45
+                case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
+                    uint32_t offs = DECODE_IMM(1 << (len_field - 1));
+                    int qtype = -1;
+                    switch(imm) {
+                        case JDNSQMATCH_EXT_OPCODE:
+                            print_opcode(reg_num ? "jdnsqeq" : "jdnsqne");
+                            qtype = DECODE_IMM(1);
+                            break;
+                        case JDNSQMATCHSAFE_EXT_OPCODE:
+                            print_opcode(reg_num ? "jdnsqeqsafe" : "jdnsqnesafe");
+                            qtype = DECODE_IMM(1);
+                            break;
+                        case JDNSAMATCH_EXT_OPCODE:
+                            print_opcode(reg_num ? "jdnsaeq" : "jdnsane"); break;
+                        case JDNSAMATCHSAFE_EXT_OPCODE:
+                            print_opcode(reg_num ? "jdnsaeqsafe" : "jdnsanesafe"); break;
+                        default:
+                            bprintf("unknown_ext %u", imm); break;
                     }
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
-                    if (len_field > 0) {
-                        const uint32_t imm_len = 1 << (len_field - 1);
-                        uint32_t relative_offs = 0;
-                        DECODE_IMM(relative_offs, imm_len);
-                        uint32_t copy_len = 0;
-                        DECODE_IMM(copy_len, 1);
-
-                        ret = snprintf(
-                            output_buffer + offset, output_buffer_len - offset,
-                            "[r%u+%d], %d", reg_num, relative_offs, copy_len);
-                        ASSERT_RET_INBOUND(ret);
-                        offset += ret;
+                    bprintf("r0, ");
+                    uint32_t end = *ptr2pc;
+                    while (end + 1 < program_len && !(program[end] == 0 && program[end + 1] == 0)) {
+                        end++;
+                    }
+                    end += 2;
+                    print_jump_target(end + offs, program_len);
+                    bprintf(", ");
+                    if (imm == JDNSQMATCH_EXT_OPCODE || imm == JDNSQMATCHSAFE_EXT_OPCODE) {
+                        bprintf("%d, ", qtype);
+                    }
+                    while (*ptr2pc < end) {
+                        uint8_t byte = program[(*ptr2pc)++];
+                        // values < 0x40 could be lengths, but - and 0..9 are in practice usually
+                        // too long to be lengths so print them as characters. All other chars < 0x40
+                        // are not valid in dns character.
+                        if (byte == '-' || (byte >= '0' && byte <= '9') || byte >= 0x40) {
+                            bprintf("%c", byte);
+                        } else {
+                            bprintf("(%d)", byte);
+                        }
                     }
                     break;
                 }
                 default:
-                    ret = snprintf(output_buffer + offset,
-                                   output_buffer_len - offset, "unknown_ext %u",
-                                   imm);
-                    ASSERT_RET_INBOUND(ret);
-                    offset += ret;
+                    bprintf("unknown_ext %u", imm);
                     break;
             }
             break;
         case LDDW_OPCODE:
         case STDW_OPCODE:
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
-            if (signed_imm > 0) {
-                ret = snprintf(output_buffer + offset,
-                           output_buffer_len - offset, "r%u, [r%u+%d]", reg_num,
-                           reg_num ^ 1, signed_imm);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
-            } else if (signed_imm < 0) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "r%u, [r%u-%d]",
-                               reg_num, reg_num ^ 1, -signed_imm);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+            PRINT_OPCODE();
+            if (v6_mode) {
+                if (opcode == LDDW_OPCODE) {
+                    bprintf("r%u, counter=%d", reg_num, imm);
+                } else {
+                    bprintf("counter=%d, r%u", imm, reg_num);
+                }
             } else {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "r%u, [r%u]", reg_num,
-                               reg_num ^ 1);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                if (signed_imm > 0) {
+                    bprintf("r%u, [r%u+%d]", reg_num, reg_num ^ 1, signed_imm);
+                } else if (signed_imm < 0) {
+                    bprintf("r%u, [r%u-%d]", reg_num, reg_num ^ 1, -signed_imm);
+                } else {
+                    bprintf("r%u, [r%u]", reg_num, reg_num ^ 1);
+                }
             }
             break;
         case WRITE_OPCODE: {
-            ret = PRINT_OPCODE();
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            PRINT_OPCODE();
             uint32_t write_len = 1 << (len_field - 1);
             if (write_len > 0) {
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "0x");
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("0x");
             }
             for (uint32_t i = 0; i < write_len; ++i) {
                 uint8_t byte =
                     (uint8_t) ((imm >> (write_len - 1 - i) * 8) & 0xff);
-                ret = snprintf(output_buffer + offset,
-                               output_buffer_len - offset, "%02x", byte);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
+                bprintf("%02x", byte);
 
             }
             break;
         }
-        case MEMCOPY_OPCODE: {
+        case PKTDATACOPY_OPCODE: {
             if (reg_num == 0) {
-                ret = print_opcode("pcopy", output_buffer, output_buffer_len,
-                                   offset);
+                print_opcode("pktcopy");
             } else {
-                ret = print_opcode("dcopy", output_buffer, output_buffer_len,
-                                   offset);
+                print_opcode("datacopy");
             }
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
-            if (len_field > 0) {
-                uint32_t src_offs = imm;
-                uint32_t copy_len = 0;
-                DECODE_IMM(copy_len, 1);
-                ret =
-                    snprintf(output_buffer + offset, output_buffer_len - offset,
-                             "%d, %d", src_offs, copy_len);
-                ASSERT_RET_INBOUND(ret);
-                offset += ret;
-            }
+            uint32_t src_offs = imm;
+            uint32_t copy_len = DECODE_IMM(1);
+            bprintf("src=%d, len=%d", src_offs, copy_len);
             break;
         }
         // Unknown opcode
         default:
-            ret = snprintf(output_buffer + offset, output_buffer_len - offset,
-                           "unknown %u", opcode);
-            ASSERT_RET_INBOUND(ret);
-            offset += ret;
+            bprintf("unknown %u", opcode);
             break;
     }
-    return pc;
+    return print_buf;
 }
diff --git a/disassembler.h b/disassembler.h
index cf10166..3c40cd6 100644
--- a/disassembler.h
+++ b/disassembler.h
@@ -23,20 +23,17 @@
 #endif
 
 /**
- * Disassembles a APF program into a human-readable format.
+ * Disassembles an APF program into a human-readable format.
  *
  * @param program the program bytecode.
  * @param program_len the length of the program bytecode.
- * @param pc The program counter which point to the current instruction.
- * @param output_buffer A pointer to a buffer where the disassembled
- *                      instruction will be stored.
- * @param output_buffer_len the length of the output buffer.
+ * @param ptr2pc pointer to the program counter which points to the current instruction.
+ *           After function call, the program counter will be updated to point to the
+ *           next instruction.
  *
- * @return the program counter which point to the next instruction.
+ * @return pointer to static buffer which contains human readable text.
  */
-uint32_t apf_disassemble(const uint8_t* program, uint32_t program_len,
-                         uint32_t pc, char* output_buffer,
-                         int output_buffer_len);
+const char* apf_disassemble(const uint8_t* program, uint32_t program_len, uint32_t* ptr2pc);
 
 #ifdef __cplusplus
 }
diff --git a/doc/packet_formats.txt b/doc/packet_formats.txt
new file mode 100644
index 0000000..3abf976
--- /dev/null
+++ b/doc/packet_formats.txt
@@ -0,0 +1,82 @@
+https://en.wikipedia.org/wiki/Internet_Protocol_version_4
+
+IPv4 header format
++---------+-------+---------------------------------------+---------------------------------------+---------------------------------------+---------------------------------------+
+| Offsets | Octet |                   0                   |                   1                   |                   2                   |                   3                   |
+|  Octet  |  Bit  |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
++---------+-------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+|    0    |   0   | Version [4]       | IHL [>=5]         | DSCP                        | ECN     | Total length [>=20, >=4*IHL]                                                  |
++---------+-------+-------------------+-------------------+-----------------------------+---------+----+----+----+----------------------------------------------------------------+
+|    4    |  32   | Identification                                                                |  0 | DF | MF | Fragment offset                                                |
++---------+-------+---------------------------------------+---------------------------------------+----+----+----+----------------------------------------------------------------+
+|    8    |  64   | Time to Live                          | Protocol                              | Header checksum                                                               |
++---------+-------+---------------------------------------+---------------------------------------+-------------------------------------------------------------------------------+
+|   12    |  96   | Source address                                                                                                                                                |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|   16    | 128   | Destination address                                                                                                                                           |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|   20    | 160   | Options (if IHL > 5)                                                                                                                                          |
+|   ..    | ...   |                                                                                                                                                               |
+|   56    | 448   |                                                                                                                                                               |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+https://en.wikipedia.org/wiki/IPv6_packet
+
+Fixed header format
++---------+-------+---------------------------------------+---------------------------------------+---------------------------------------+---------------------------------------+
+| Offsets | Octet |                   0                   |                   1                   |                   2                   |                   3                   |
+|  Octet  |  Bit  |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
++---------+-------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+|    0    |   0   | Version [6]       | Traffic class (DSCP 6 + ECN 2 bits)   | Flow label                                                                                        |
++---------+-------+-------------------+---------------------------------------+-------------------+---------------------------------------+---------------------------------------+
+|    4    |  32   | Payload length                                                                | Next header                           | Hop limit                             |
++---------+-------+-------------------------------------------------------------------------------+---------------------------------------+---------------------------------------+
+|    8    |  64   | Source Address                                                                                                                                                |
+|   12    |  96   |                                                                                                                                                               |
+|   16    | 128   |                                                                                                                                                               |
+|   20    | 160   |                                                                                                                                                               |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|   24    | 192   | Destination address                                                                                                                                           |
+|   28    | 224   |                                                                                                                                                               |
+|   32    | 256   |                                                                                                                                                               |
+|   36    | 288   |                                                                                                                                                               |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+https://en.wikipedia.org/wiki/ICMPv6
+
+ICMPv6 header (4 bytes): u8 type, u8 code, u16 checksum
+(4 bytes in) Message body...  (but note in practice it's actually 4 bytes reserved, then 'real' message body at 8 bytes in)
+
+https://en.wikipedia.org/wiki/Transmission_Control_Protocol
+
+TCP segment header
++---------+-------+---------------------------------------+---------------------------------------+---------------------------------------+---------------------------------------+
+| Offsets | Octet |                   0                   |                   1                   |                   2                   |                   3                   |
+|  Octet  |  Bit  |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
++---------+-------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+|    0    |   0   | Source port                                                                   | Destination port                                                              |
++---------+-------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
+|    4    |  32   | Sequence number                                                                                                                                               |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|    8    |  64   | Acknowledgment number (if ACK set)                                                                                                                            |
++---------+-------+-------------------+-------------------+----+----+----+----+----+----+----+----+-------------------------------------------------------------------------------+
+|   12    |  96   | Data Offset       | Reserved (zero)   | CWR| ECE| URG| ACK| PSH| RST| SYN| FIN| Window size                                                                   |
++---------+-------+-------------------+-------------------+----+----+----+----+----+----+----+----+-------------------------------------------------------------------------------+
+|   16    | 128   | Checksum                                                                      | Urgent pointer (if URG set)                                                   |
++---------+-------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
+|   20    | 160   | Options (if data offset > 5. Padded at the end with "0" bits if necessary.)                                                                                   |
+|   ..    | ...   |                                                                                                                                                               |
+|   56    | 448   |                                                                                                                                                               |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+https://en.wikipedia.org/wiki/User_Datagram_Protocol
+
+UDP datagram header (8 bytes): be16 source port, destination port, length, checksum
++---------+-------+---------------------------------------+---------------------------------------+---------------------------------------+---------------------------------------+
+| Offsets | Octet |                   0                   |                   1                   |                   2                   |                   3                   |
+|  Octet  |  Bit  |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
++---------+-------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+|    0    |   0   | Source port                                                                   | Destination port                                                              |
++---------+-------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
+|    4    |  32   | Length                                                                        | Checksum                                                                      |
++---------+-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/testdata/large_ra_without_counters.output b/testdata/large_ra_without_counters.output
index 7db6c33..967b204 100644
--- a/testdata/large_ra_without_counters.output
+++ b/testdata/large_ra_without_counters.output
@@ -1,26 +1,26 @@
       R0       R1       PC  Instruction
 -------------------------------------------------
-       0        0        0: ldh   r0, [12]
-    86dd        0        2: jlt   r0, 0x600, DROP
-    86dd        0        7: jne   r0, 0x806, 64
-    86dd        0       64: jne   r0, 0x800, 141
-    86dd        0      141: jeq   r0, 0x86dd, 161
-    86dd        0      161: ldb   r0, [20]
-      3a        0      163: jeq   r0, 0x3a, 176
-      3a        0      176: ldb   r0, [54]
-      86        0      178: jeq   r0, 0x85, DROP
-      86        0      183: jne   r0, 0x88, 210
-      86        0      210: ldm   r0, m[14]
-      ee        0      212: jne   r0, 0x46, 297
-      ee        0      297: ldm   r0, m[14]
-      ee        0      299: jne   r0, 0x66, 433
-      ee        0      433: ldm   r0, m[14]
-      ee        0      435: jne   r0, 0x6e, 571
-      ee        0      571: ldm   r0, m[14]
-      ee        0      573: jne   r0, 0x5e, 687
-      ee        0      687: ldm   r0, m[14]
-      ee        0      689: jne   r0, 0x5e, 808
-      ee        0      808: ldm   r0, m[14]
-      ee        0      810: jne   r0, 0x4e, PASS
+       0        0        0: ldh         r0, [12]
+    86dd        0        2: jlt         r0, 0x600, DROP
+    86dd        0        7: jne         r0, 0x806, 64
+    86dd        0       64: jne         r0, 0x800, 141
+    86dd        0      141: jeq         r0, 0x86dd, 161
+    86dd        0      161: ldb         r0, [20]
+      3a        0      163: jeq         r0, 0x3a, 176
+      3a        0      176: ldb         r0, [54]
+      86        0      178: jeq         r0, 0x85, DROP
+      86        0      183: jne         r0, 0x88, 210
+      86        0      210: ldm         r0, m[14]
+      ee        0      212: jne         r0, 0x46, 297
+      ee        0      297: ldm         r0, m[14]
+      ee        0      299: jne         r0, 0x66, 433
+      ee        0      433: ldm         r0, m[14]
+      ee        0      435: jne         r0, 0x6e, 571
+      ee        0      571: ldm         r0, m[14]
+      ee        0      573: jne         r0, 0x5e, 687
+      ee        0      687: ldm         r0, m[14]
+      ee        0      689: jne         r0, 0x5e, 808
+      ee        0      808: ldm         r0, m[14]
+      ee        0      810: jne         r0, 0x4e, PASS
       ee        0      908: PASS
 Packet passed
diff --git a/testdata/one_ra_with_counters.output b/testdata/one_ra_with_counters.output
index 000c0bb..4cd2b65 100644
--- a/testdata/one_ra_with_counters.output
+++ b/testdata/one_ra_with_counters.output
@@ -1,57 +1,57 @@
       R0       R1       PC  Instruction
 -------------------------------------------------
-       0        0        0: li    r1, -4
-       0 fffffffc        2: lddw  r0, [r1]
-      29 fffffffc        3: add   r0, 1
-      2a fffffffc        5: stdw  r0, [r1]
-      2a fffffffc        6: ldh   r0, [12]
-    86dd fffffffc        8: li    r1, -104
-    86dd ffffff98       10: jlt   r0, 0x600, 503
-    86dd ffffff98       15: li    r1, -108
-    86dd ffffff94       17: jeq   r0, 0x88a2, 503
-    86dd ffffff94       22: jeq   r0, 0x88a4, 503
-    86dd ffffff94       27: jeq   r0, 0x88b8, 503
-    86dd ffffff94       32: jeq   r0, 0x88cd, 503
-    86dd ffffff94       37: jeq   r0, 0x88e3, 503
-    86dd ffffff94       42: jne   r0, 0x806, 115
-    86dd ffffff94      115: jne   r0, 0x800, 215
-    86dd ffffff94      215: jeq   r0, 0x86dd, 239
-    86dd ffffff94      239: ldb   r0, [20]
-      3a ffffff94      241: jeq   r0, 0x3a, 255
-      3a ffffff94      255: ldb   r0, [54]
-      86 ffffff94      257: li    r1, -84
-      86 ffffffac      259: jeq   r0, 0x85, 503
-      86 ffffffac      262: jne   r0, 0x88, 290
-      86 ffffffac      290: ldm   r0, m[14]
-      96 ffffffac      292: jne   r0, 0x96, 495
-      96 ffffffac      295: ldm   r0, m[15]
-       0 ffffffac      297: jgt   r0, 0x258, 495
-       0 ffffffac      302: li    r0, 0
-       0 ffffffac      303: jnebs r0, 0xf, 495, 428e66343deb28a24b792e9086dd68
-       0 ffffffac      321: li    r0, 18
-      12 ffffffac      323: jnebs r0, 0x26, 495, 00603afffe8000000000000002005efffe000265fe80000000000000408e66fffe343deb8600
-      12 ffffffac      364: li    r0, 58
-      3a ffffffac      366: jnebs r0, 0x2, 495, 4000
-      3a ffffffac      371: ldh   r0, [60]
-     e10 ffffffac      373: jlt   r0, 0x258, 495
-     e10 ffffffac      378: li    r0, 62
-      3e ffffffac      380: jnebs r0, 0x14, 495, 0000000000000000010100005e00026519050000
-      3e ffffffac      403: ldw   r0, [82]
-     e10 ffffffac      405: jlt   r0, 0x258, 495
-     e10 ffffffac      410: li    r0, 86
-      56 ffffffac      412: jnebs r0, 0x24, 495, 2001486048600000000000000000884420014860486000000000000000008888030440c0
-      56 ffffffac      451: ldw   r0, [122]
-  278d00 ffffffac      453: jlt   r0, 0x258, 495
-  278d00 ffffffac      458: ldw   r0, [126]
-   93a80 ffffffac      460: jlt   r0, 0x258, 495
-   93a80 ffffffac      465: li    r0, 130
-      82 ffffffac      468: jnebs r0, 0x14, 495, 000000002a0079e10abc0e000000000000000000
-      82 ffffffac      491: li    r1, -56
-      82 ffffffc8      493: jmp   503
-      82 ffffffc8      503: lddw  r0, [r1]
-      1b ffffffc8      504: add   r0, 1
-      1c ffffffc8      506: stdw  r0, [r1]
-      1c ffffffc8      507: jmp   DROP
+       0        0        0: li          r1, -4
+       0 fffffffc        2: lddw        r0, [r1]
+      29 fffffffc        3: add         r0, 1
+      2a fffffffc        5: stdw        r0, [r1]
+      2a fffffffc        6: ldh         r0, [12]
+    86dd fffffffc        8: li          r1, -104
+    86dd ffffff98       10: jlt         r0, 0x600, 503
+    86dd ffffff98       15: li          r1, -108
+    86dd ffffff94       17: jeq         r0, 0x88a2, 503
+    86dd ffffff94       22: jeq         r0, 0x88a4, 503
+    86dd ffffff94       27: jeq         r0, 0x88b8, 503
+    86dd ffffff94       32: jeq         r0, 0x88cd, 503
+    86dd ffffff94       37: jeq         r0, 0x88e3, 503
+    86dd ffffff94       42: jne         r0, 0x806, 115
+    86dd ffffff94      115: jne         r0, 0x800, 215
+    86dd ffffff94      215: jeq         r0, 0x86dd, 239
+    86dd ffffff94      239: ldb         r0, [20]
+      3a ffffff94      241: jeq         r0, 0x3a, 255
+      3a ffffff94      255: ldb         r0, [54]
+      86 ffffff94      257: li          r1, -84
+      86 ffffffac      259: jeq         r0, 0x85, 503
+      86 ffffffac      262: jne         r0, 0x88, 290
+      86 ffffffac      290: ldm         r0, m[14]
+      96 ffffffac      292: jne         r0, 0x96, 495
+      96 ffffffac      295: ldm         r0, m[15]
+       0 ffffffac      297: jgt         r0, 0x258, 495
+       0 ffffffac      302: li          r0, 0
+       0 ffffffac      303: jbsne       r0, 0xf, 495, 428e66343deb28a24b792e9086dd68
+       0 ffffffac      321: li          r0, 18
+      12 ffffffac      323: jbsne       r0, 0x26, 495, 00603afffe8000000000000002005efffe000265fe80000000000000408e66fffe343deb8600
+      12 ffffffac      364: li          r0, 58
+      3a ffffffac      366: jbsne       r0, 0x2, 495, 4000
+      3a ffffffac      371: ldh         r0, [60]
+     e10 ffffffac      373: jlt         r0, 0x258, 495
+     e10 ffffffac      378: li          r0, 62
+      3e ffffffac      380: jbsne       r0, 0x14, 495, 0000000000000000010100005e00026519050000
+      3e ffffffac      403: ldw         r0, [82]
+     e10 ffffffac      405: jlt         r0, 0x258, 495
+     e10 ffffffac      410: li          r0, 86
+      56 ffffffac      412: jbsne       r0, 0x24, 495, 2001486048600000000000000000884420014860486000000000000000008888030440c0
+      56 ffffffac      451: ldw         r0, [122]
+  278d00 ffffffac      453: jlt         r0, 0x258, 495
+  278d00 ffffffac      458: ldw         r0, [126]
+   93a80 ffffffac      460: jlt         r0, 0x258, 495
+   93a80 ffffffac      465: li          r0, 130
+      82 ffffffac      468: jbsne       r0, 0x14, 495, 000000002a0079e10abc0e000000000000000000
+      82 ffffffac      491: li          r1, -56
+      82 ffffffc8      493: jmp         503
+      82 ffffffc8      503: lddw        r0, [r1]
+      1b ffffffc8      504: add         r0, 1
+      1c ffffffc8      506: stdw        r0, [r1]
+      1c ffffffc8      507: jmp         DROP
       1c ffffffc8      510: DROP
 Packet dropped
 Data: 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002a
diff --git a/testdata/one_ra_with_counters_age_30.output b/testdata/one_ra_with_counters_age_30.output
index bf0d6ed..b2727f6 100644
--- a/testdata/one_ra_with_counters_age_30.output
+++ b/testdata/one_ra_with_counters_age_30.output
@@ -1,57 +1,57 @@
       R0       R1       PC  Instruction
 -------------------------------------------------
-       0        0        0: li    r1, -4
-       0 fffffffc        2: lddw  r0, [r1]
-      29 fffffffc        3: add   r0, 1
-      2a fffffffc        5: stdw  r0, [r1]
-      2a fffffffc        6: ldh   r0, [12]
-    86dd fffffffc        8: li    r1, -104
-    86dd ffffff98       10: jlt   r0, 0x600, 503
-    86dd ffffff98       15: li    r1, -108
-    86dd ffffff94       17: jeq   r0, 0x88a2, 503
-    86dd ffffff94       22: jeq   r0, 0x88a4, 503
-    86dd ffffff94       27: jeq   r0, 0x88b8, 503
-    86dd ffffff94       32: jeq   r0, 0x88cd, 503
-    86dd ffffff94       37: jeq   r0, 0x88e3, 503
-    86dd ffffff94       42: jne   r0, 0x806, 115
-    86dd ffffff94      115: jne   r0, 0x800, 215
-    86dd ffffff94      215: jeq   r0, 0x86dd, 239
-    86dd ffffff94      239: ldb   r0, [20]
-      3a ffffff94      241: jeq   r0, 0x3a, 255
-      3a ffffff94      255: ldb   r0, [54]
-      86 ffffff94      257: li    r1, -84
-      86 ffffffac      259: jeq   r0, 0x85, 503
-      86 ffffffac      262: jne   r0, 0x88, 290
-      86 ffffffac      290: ldm   r0, m[14]
-      96 ffffffac      292: jne   r0, 0x96, 495
-      96 ffffffac      295: ldm   r0, m[15]
-      1e ffffffac      297: jgt   r0, 0x258, 495
-      1e ffffffac      302: li    r0, 0
-       0 ffffffac      303: jnebs r0, 0xf, 495, 428e66343deb28a24b792e9086dd68
-       0 ffffffac      321: li    r0, 18
-      12 ffffffac      323: jnebs r0, 0x26, 495, 00603afffe8000000000000002005efffe000265fe80000000000000408e66fffe343deb8600
-      12 ffffffac      364: li    r0, 58
-      3a ffffffac      366: jnebs r0, 0x2, 495, 4000
-      3a ffffffac      371: ldh   r0, [60]
-     e10 ffffffac      373: jlt   r0, 0x258, 495
-     e10 ffffffac      378: li    r0, 62
-      3e ffffffac      380: jnebs r0, 0x14, 495, 0000000000000000010100005e00026519050000
-      3e ffffffac      403: ldw   r0, [82]
-     e10 ffffffac      405: jlt   r0, 0x258, 495
-     e10 ffffffac      410: li    r0, 86
-      56 ffffffac      412: jnebs r0, 0x24, 495, 2001486048600000000000000000884420014860486000000000000000008888030440c0
-      56 ffffffac      451: ldw   r0, [122]
-  278d00 ffffffac      453: jlt   r0, 0x258, 495
-  278d00 ffffffac      458: ldw   r0, [126]
-   93a80 ffffffac      460: jlt   r0, 0x258, 495
-   93a80 ffffffac      465: li    r0, 130
-      82 ffffffac      468: jnebs r0, 0x14, 495, 000000002a0079e10abc0e000000000000000000
-      82 ffffffac      491: li    r1, -56
-      82 ffffffc8      493: jmp   503
-      82 ffffffc8      503: lddw  r0, [r1]
-      1b ffffffc8      504: add   r0, 1
-      1c ffffffc8      506: stdw  r0, [r1]
-      1c ffffffc8      507: jmp   DROP
+       0        0        0: li          r1, -4
+       0 fffffffc        2: lddw        r0, [r1]
+      29 fffffffc        3: add         r0, 1
+      2a fffffffc        5: stdw        r0, [r1]
+      2a fffffffc        6: ldh         r0, [12]
+    86dd fffffffc        8: li          r1, -104
+    86dd ffffff98       10: jlt         r0, 0x600, 503
+    86dd ffffff98       15: li          r1, -108
+    86dd ffffff94       17: jeq         r0, 0x88a2, 503
+    86dd ffffff94       22: jeq         r0, 0x88a4, 503
+    86dd ffffff94       27: jeq         r0, 0x88b8, 503
+    86dd ffffff94       32: jeq         r0, 0x88cd, 503
+    86dd ffffff94       37: jeq         r0, 0x88e3, 503
+    86dd ffffff94       42: jne         r0, 0x806, 115
+    86dd ffffff94      115: jne         r0, 0x800, 215
+    86dd ffffff94      215: jeq         r0, 0x86dd, 239
+    86dd ffffff94      239: ldb         r0, [20]
+      3a ffffff94      241: jeq         r0, 0x3a, 255
+      3a ffffff94      255: ldb         r0, [54]
+      86 ffffff94      257: li          r1, -84
+      86 ffffffac      259: jeq         r0, 0x85, 503
+      86 ffffffac      262: jne         r0, 0x88, 290
+      86 ffffffac      290: ldm         r0, m[14]
+      96 ffffffac      292: jne         r0, 0x96, 495
+      96 ffffffac      295: ldm         r0, m[15]
+      1e ffffffac      297: jgt         r0, 0x258, 495
+      1e ffffffac      302: li          r0, 0
+       0 ffffffac      303: jbsne       r0, 0xf, 495, 428e66343deb28a24b792e9086dd68
+       0 ffffffac      321: li          r0, 18
+      12 ffffffac      323: jbsne       r0, 0x26, 495, 00603afffe8000000000000002005efffe000265fe80000000000000408e66fffe343deb8600
+      12 ffffffac      364: li          r0, 58
+      3a ffffffac      366: jbsne       r0, 0x2, 495, 4000
+      3a ffffffac      371: ldh         r0, [60]
+     e10 ffffffac      373: jlt         r0, 0x258, 495
+     e10 ffffffac      378: li          r0, 62
+      3e ffffffac      380: jbsne       r0, 0x14, 495, 0000000000000000010100005e00026519050000
+      3e ffffffac      403: ldw         r0, [82]
+     e10 ffffffac      405: jlt         r0, 0x258, 495
+     e10 ffffffac      410: li          r0, 86
+      56 ffffffac      412: jbsne       r0, 0x24, 495, 2001486048600000000000000000884420014860486000000000000000008888030440c0
+      56 ffffffac      451: ldw         r0, [122]
+  278d00 ffffffac      453: jlt         r0, 0x258, 495
+  278d00 ffffffac      458: ldw         r0, [126]
+   93a80 ffffffac      460: jlt         r0, 0x258, 495
+   93a80 ffffffac      465: li          r0, 130
+      82 ffffffac      468: jbsne       r0, 0x14, 495, 000000002a0079e10abc0e000000000000000000
+      82 ffffffac      491: li          r1, -56
+      82 ffffffc8      493: jmp         503
+      82 ffffffc8      503: lddw        r0, [r1]
+      1b ffffffc8      504: add         r0, 1
+      1c ffffffc8      506: stdw        r0, [r1]
+      1c ffffffc8      507: jmp         DROP
       1c ffffffc8      510: DROP
 Packet dropped
 Data: 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002a
diff --git a/testdata/one_ra_with_counters_age_600.output b/testdata/one_ra_with_counters_age_600.output
index 231cac7..6538ceb 100644
--- a/testdata/one_ra_with_counters_age_600.output
+++ b/testdata/one_ra_with_counters_age_600.output
@@ -1,57 +1,57 @@
       R0       R1       PC  Instruction
 -------------------------------------------------
-       0        0        0: li    r1, -4
-       0 fffffffc        2: lddw  r0, [r1]
-      29 fffffffc        3: add   r0, 1
-      2a fffffffc        5: stdw  r0, [r1]
-      2a fffffffc        6: ldh   r0, [12]
-    86dd fffffffc        8: li    r1, -104
-    86dd ffffff98       10: jlt   r0, 0x600, 503
-    86dd ffffff98       15: li    r1, -108
-    86dd ffffff94       17: jeq   r0, 0x88a2, 503
-    86dd ffffff94       22: jeq   r0, 0x88a4, 503
-    86dd ffffff94       27: jeq   r0, 0x88b8, 503
-    86dd ffffff94       32: jeq   r0, 0x88cd, 503
-    86dd ffffff94       37: jeq   r0, 0x88e3, 503
-    86dd ffffff94       42: jne   r0, 0x806, 115
-    86dd ffffff94      115: jne   r0, 0x800, 215
-    86dd ffffff94      215: jeq   r0, 0x86dd, 239
-    86dd ffffff94      239: ldb   r0, [20]
-      3a ffffff94      241: jeq   r0, 0x3a, 255
-      3a ffffff94      255: ldb   r0, [54]
-      86 ffffff94      257: li    r1, -84
-      86 ffffffac      259: jeq   r0, 0x85, 503
-      86 ffffffac      262: jne   r0, 0x88, 290
-      86 ffffffac      290: ldm   r0, m[14]
-      96 ffffffac      292: jne   r0, 0x96, 495
-      96 ffffffac      295: ldm   r0, m[15]
-     258 ffffffac      297: jgt   r0, 0x258, 495
-     258 ffffffac      302: li    r0, 0
-       0 ffffffac      303: jnebs r0, 0xf, 495, 428e66343deb28a24b792e9086dd68
-       0 ffffffac      321: li    r0, 18
-      12 ffffffac      323: jnebs r0, 0x26, 495, 00603afffe8000000000000002005efffe000265fe80000000000000408e66fffe343deb8600
-      12 ffffffac      364: li    r0, 58
-      3a ffffffac      366: jnebs r0, 0x2, 495, 4000
-      3a ffffffac      371: ldh   r0, [60]
-     e10 ffffffac      373: jlt   r0, 0x258, 495
-     e10 ffffffac      378: li    r0, 62
-      3e ffffffac      380: jnebs r0, 0x14, 495, 0000000000000000010100005e00026519050000
-      3e ffffffac      403: ldw   r0, [82]
-     e10 ffffffac      405: jlt   r0, 0x258, 495
-     e10 ffffffac      410: li    r0, 86
-      56 ffffffac      412: jnebs r0, 0x24, 495, 2001486048600000000000000000884420014860486000000000000000008888030440c0
-      56 ffffffac      451: ldw   r0, [122]
-  278d00 ffffffac      453: jlt   r0, 0x258, 495
-  278d00 ffffffac      458: ldw   r0, [126]
-   93a80 ffffffac      460: jlt   r0, 0x258, 495
-   93a80 ffffffac      465: li    r0, 130
-      82 ffffffac      468: jnebs r0, 0x14, 495, 000000002a0079e10abc0e000000000000000000
-      82 ffffffac      491: li    r1, -56
-      82 ffffffc8      493: jmp   503
-      82 ffffffc8      503: lddw  r0, [r1]
-      1b ffffffc8      504: add   r0, 1
-      1c ffffffc8      506: stdw  r0, [r1]
-      1c ffffffc8      507: jmp   DROP
+       0        0        0: li          r1, -4
+       0 fffffffc        2: lddw        r0, [r1]
+      29 fffffffc        3: add         r0, 1
+      2a fffffffc        5: stdw        r0, [r1]
+      2a fffffffc        6: ldh         r0, [12]
+    86dd fffffffc        8: li          r1, -104
+    86dd ffffff98       10: jlt         r0, 0x600, 503
+    86dd ffffff98       15: li          r1, -108
+    86dd ffffff94       17: jeq         r0, 0x88a2, 503
+    86dd ffffff94       22: jeq         r0, 0x88a4, 503
+    86dd ffffff94       27: jeq         r0, 0x88b8, 503
+    86dd ffffff94       32: jeq         r0, 0x88cd, 503
+    86dd ffffff94       37: jeq         r0, 0x88e3, 503
+    86dd ffffff94       42: jne         r0, 0x806, 115
+    86dd ffffff94      115: jne         r0, 0x800, 215
+    86dd ffffff94      215: jeq         r0, 0x86dd, 239
+    86dd ffffff94      239: ldb         r0, [20]
+      3a ffffff94      241: jeq         r0, 0x3a, 255
+      3a ffffff94      255: ldb         r0, [54]
+      86 ffffff94      257: li          r1, -84
+      86 ffffffac      259: jeq         r0, 0x85, 503
+      86 ffffffac      262: jne         r0, 0x88, 290
+      86 ffffffac      290: ldm         r0, m[14]
+      96 ffffffac      292: jne         r0, 0x96, 495
+      96 ffffffac      295: ldm         r0, m[15]
+     258 ffffffac      297: jgt         r0, 0x258, 495
+     258 ffffffac      302: li          r0, 0
+       0 ffffffac      303: jbsne       r0, 0xf, 495, 428e66343deb28a24b792e9086dd68
+       0 ffffffac      321: li          r0, 18
+      12 ffffffac      323: jbsne       r0, 0x26, 495, 00603afffe8000000000000002005efffe000265fe80000000000000408e66fffe343deb8600
+      12 ffffffac      364: li          r0, 58
+      3a ffffffac      366: jbsne       r0, 0x2, 495, 4000
+      3a ffffffac      371: ldh         r0, [60]
+     e10 ffffffac      373: jlt         r0, 0x258, 495
+     e10 ffffffac      378: li          r0, 62
+      3e ffffffac      380: jbsne       r0, 0x14, 495, 0000000000000000010100005e00026519050000
+      3e ffffffac      403: ldw         r0, [82]
+     e10 ffffffac      405: jlt         r0, 0x258, 495
+     e10 ffffffac      410: li          r0, 86
+      56 ffffffac      412: jbsne       r0, 0x24, 495, 2001486048600000000000000000884420014860486000000000000000008888030440c0
+      56 ffffffac      451: ldw         r0, [122]
+  278d00 ffffffac      453: jlt         r0, 0x258, 495
+  278d00 ffffffac      458: ldw         r0, [126]
+   93a80 ffffffac      460: jlt         r0, 0x258, 495
+   93a80 ffffffac      465: li          r0, 130
+      82 ffffffac      468: jbsne       r0, 0x14, 495, 000000002a0079e10abc0e000000000000000000
+      82 ffffffac      491: li          r1, -56
+      82 ffffffc8      493: jmp         503
+      82 ffffffc8      503: lddw        r0, [r1]
+      1b ffffffc8      504: add         r0, 1
+      1c ffffffc8      506: stdw        r0, [r1]
+      1c ffffffc8      507: jmp         DROP
       1c ffffffc8      510: DROP
 Packet dropped
 Data: 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002a
diff --git a/testdata/one_ra_with_counters_age_601.output b/testdata/one_ra_with_counters_age_601.output
index b272afd..1789ae0 100644
--- a/testdata/one_ra_with_counters_age_601.output
+++ b/testdata/one_ra_with_counters_age_601.output
@@ -1,36 +1,36 @@
       R0       R1       PC  Instruction
 -------------------------------------------------
-       0        0        0: li    r1, -4
-       0 fffffffc        2: lddw  r0, [r1]
-      29 fffffffc        3: add   r0, 1
-      2a fffffffc        5: stdw  r0, [r1]
-      2a fffffffc        6: ldh   r0, [12]
-    86dd fffffffc        8: li    r1, -104
-    86dd ffffff98       10: jlt   r0, 0x600, 503
-    86dd ffffff98       15: li    r1, -108
-    86dd ffffff94       17: jeq   r0, 0x88a2, 503
-    86dd ffffff94       22: jeq   r0, 0x88a4, 503
-    86dd ffffff94       27: jeq   r0, 0x88b8, 503
-    86dd ffffff94       32: jeq   r0, 0x88cd, 503
-    86dd ffffff94       37: jeq   r0, 0x88e3, 503
-    86dd ffffff94       42: jne   r0, 0x806, 115
-    86dd ffffff94      115: jne   r0, 0x800, 215
-    86dd ffffff94      215: jeq   r0, 0x86dd, 239
-    86dd ffffff94      239: ldb   r0, [20]
-      3a ffffff94      241: jeq   r0, 0x3a, 255
-      3a ffffff94      255: ldb   r0, [54]
-      86 ffffff94      257: li    r1, -84
-      86 ffffffac      259: jeq   r0, 0x85, 503
-      86 ffffffac      262: jne   r0, 0x88, 290
-      86 ffffffac      290: ldm   r0, m[14]
-      96 ffffffac      292: jne   r0, 0x96, 495
-      96 ffffffac      295: ldm   r0, m[15]
-     259 ffffffac      297: jgt   r0, 0x258, 495
-     259 ffffffac      495: li    r1, -28
-     259 ffffffe4      497: lddw  r0, [r1]
-       0 ffffffe4      498: add   r0, 1
-       1 ffffffe4      500: stdw  r0, [r1]
-       1 ffffffe4      501: jmp   PASS
+       0        0        0: li          r1, -4
+       0 fffffffc        2: lddw        r0, [r1]
+      29 fffffffc        3: add         r0, 1
+      2a fffffffc        5: stdw        r0, [r1]
+      2a fffffffc        6: ldh         r0, [12]
+    86dd fffffffc        8: li          r1, -104
+    86dd ffffff98       10: jlt         r0, 0x600, 503
+    86dd ffffff98       15: li          r1, -108
+    86dd ffffff94       17: jeq         r0, 0x88a2, 503
+    86dd ffffff94       22: jeq         r0, 0x88a4, 503
+    86dd ffffff94       27: jeq         r0, 0x88b8, 503
+    86dd ffffff94       32: jeq         r0, 0x88cd, 503
+    86dd ffffff94       37: jeq         r0, 0x88e3, 503
+    86dd ffffff94       42: jne         r0, 0x806, 115
+    86dd ffffff94      115: jne         r0, 0x800, 215
+    86dd ffffff94      215: jeq         r0, 0x86dd, 239
+    86dd ffffff94      239: ldb         r0, [20]
+      3a ffffff94      241: jeq         r0, 0x3a, 255
+      3a ffffff94      255: ldb         r0, [54]
+      86 ffffff94      257: li          r1, -84
+      86 ffffffac      259: jeq         r0, 0x85, 503
+      86 ffffffac      262: jne         r0, 0x88, 290
+      86 ffffffac      290: ldm         r0, m[14]
+      96 ffffffac      292: jne         r0, 0x96, 495
+      96 ffffffac      295: ldm         r0, m[15]
+     259 ffffffac      297: jgt         r0, 0x258, 495
+     259 ffffffac      495: li          r1, -28
+     259 ffffffe4      497: lddw        r0, [r1]
+       0 ffffffe4      498: add         r0, 1
+       1 ffffffe4      500: stdw        r0, [r1]
+       1 ffffffe4      501: jmp         PASS
        1 ffffffe4      509: PASS
 Packet passed
 Data: 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001b0000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000002a
diff --git a/apf_interpreter.c b/v4/apf_interpreter.c
similarity index 100%
rename from apf_interpreter.c
rename to v4/apf_interpreter.c
diff --git a/apf_interpreter.h b/v4/apf_interpreter.h
similarity index 100%
rename from apf_interpreter.h
rename to v4/apf_interpreter.h
diff --git a/v5/Android.bp b/v5/Android.bp
index 23bfa9c..19faf78 100644
--- a/v5/Android.bp
+++ b/v5/Android.bp
@@ -25,6 +25,7 @@
         "-Wall",
         "-Werror",
         "-Werror=implicit-fallthrough",
+        "-Werror=missing-prototypes",
         "-Werror=strict-prototypes",
         "-Wnullable-to-nonnull-conversion",
         "-Wsign-compare",
@@ -40,7 +41,33 @@
     defaults: ["apfv5_defaults"],
     srcs: [
         "apf_interpreter.c",
-        "test_buf_allocator.c"
+        "test_buf_allocator.c",
     ],
     sdk_version: "24",
 }
+
+sh_test_host {
+    name: "apf_assemble_test",
+    src: "apf_interpreter_assemble.sh",
+    filename: "apf_assemble_test.sh",
+    test_config: "apf_assemble_test.xml",
+    data: [
+        // the sources:
+        "apf_*.h",
+        "apf.h",
+        "apf_interpreter_source.c",
+        // the assembled output:
+        "apf_interpreter.c",
+    ],
+    target: {
+        darwin: {
+            enabled: false,
+        },
+        windows: {
+            enabled: false,
+        },
+    },
+    test_options: {
+        unit_test: true,
+    },
+}
diff --git a/v5/apf.h b/v5/apf.h
index b0b34af..d94dba6 100644
--- a/v5/apf.h
+++ b/v5/apf.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023, The Android Open Source Project
+ * Copyright 2024, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,130 +17,160 @@
 #ifndef ANDROID_APF_APF_H
 #define ANDROID_APF_APF_H
 
-// A brief overview of APF:
-//
-// APF machine is composed of:
-//  1. A read-only program consisting of bytecodes as described below.
-//  2. Two 32-bit registers, called R0 and R1.
-//  3. Sixteen 32-bit temporary memory slots (cleared between packets).
-//  4. A read-only packet.
-// The program is executed by the interpreter below and parses the packet
-// to determine if the application processor (AP) should be woken up to
-// handle the packet or if can be dropped.
-//
-// APF bytecode description:
-//
-// The APF interpreter uses big-endian byte order for loads from the packet
-// and for storing immediates in instructions.
-//
-// Each instruction starts with a byte composed of:
-//  Top 5 bits form "opcode" field, see *_OPCODE defines below.
-//  Next 2 bits form "size field", which indicate the length of an immediate
-//  value which follows the first byte.  Values in this field:
-//                 0 => immediate value is 0 and no bytes follow.
-//                 1 => immediate value is 1 byte big.
-//                 2 => immediate value is 2 bytes big.
-//                 3 => immediate value is 4 bytes big.
-//  Bottom bit forms "register" field, which indicates which register this
-//  instruction operates on.
-//
-//  There are three main categories of instructions:
-//  Load instructions
-//    These instructions load byte(s) of the packet into a register.
-//    They load either 1, 2 or 4 bytes, as determined by the "opcode" field.
-//    They load into the register specified by the "register" field.
-//    The immediate value that follows the first byte of the instruction is
-//    the byte offset from the beginning of the packet to load from.
-//    There are "indexing" loads which add the value in R1 to the byte offset
-//    to load from. The "opcode" field determines which loads are "indexing".
-//  Arithmetic instructions
-//    These instructions perform simple operations, like addition, on register
-//    values. The result of these instructions is always written into R0. One
-//    argument of the arithmetic operation is R0's value. The other argument
-//    of the arithmetic operation is determined by the "register" field:
-//            If the "register" field is 0 then the immediate value following
-//            the first byte of the instruction is used as the other argument
-//            to the arithmetic operation.
-//            If the "register" field is 1 then R1's value is used as the other
-//            argument to the arithmetic operation.
-//  Conditional jump instructions
-//    These instructions compare register R0's value with another value, and if
-//    the comparison succeeds, jump (i.e. adjust the program counter). The
-//    immediate value that follows the first byte of the instruction
-//    represents the jump target offset, i.e. the value added to the program
-//    counter if the comparison succeeds. The other value compared is
-//    determined by the "register" field:
-//            If the "register" field is 0 then another immediate value
-//            follows the jump target offset. This immediate value is of the
-//            same size as the jump target offset, and represents the value
-//            to compare against.
-//            If the "register" field is 1 then register R1's value is
-//            compared against.
-//    The type of comparison (e.g. equal to, greater than etc) is determined
-//    by the "opcode" field. The comparison interprets both values being
-//    compared as unsigned values.
-//
-//  Miscellaneous details:
-//
-//  Pre-filled temporary memory slot values
-//    When the APF program begins execution, three of the sixteen memory slots
-//    are pre-filled by the interpreter with values that may be useful for
-//    programs:
-//      Slot #11 contains the size (in bytes) of the APF program.
-//      Slot #12 contains the total size of the APF buffer (program + data).
-//      Slot #13 is filled with the IPv4 header length. This value is calculated
-//               by loading the first byte of the IPv4 header and taking the
-//               bottom 4 bits and multiplying their value by 4. This value is
-//               set to zero if the first 4 bits after the link layer header are
-//               not 4, indicating not IPv4.
-//      Slot #14 is filled with size of the packet in bytes, including the
-//               link-layer header if any.
-//      Slot #15 is filled with the filter age in seconds. This is the number of
-//               seconds since the AP sent the program to the chipset. This may
-//               be used by filters that should have a particular lifetime. For
-//               example, it can be used to rate-limit particular packets to one
-//               every N seconds.
-//  Special jump targets:
-//    When an APF program executes a jump to the byte immediately after the last
-//      byte of the progam (i.e., one byte past the end of the program), this
-//      signals the program has completed and determined the packet should be
-//      passed to the AP.
-//    When an APF program executes a jump two bytes past the end of the program,
-//      this signals the program has completed and determined the packet should
-//      be dropped.
-//  Jump if byte sequence doesn't match:
-//    This is a special instruction to facilitate matching long sequences of
-//    bytes in the packet. Initially it is encoded like a conditional jump
-//    instruction with two exceptions:
-//      The first byte of the instruction is always followed by two immediate
-//        fields: The first immediate field is the jump target offset like other
-//        conditional jump instructions. The second immediate field specifies the
-//        number of bytes to compare.
-//      These two immediate fields are followed by a sequence of bytes. These
-//        bytes are compared with the bytes in the packet starting from the
-//        position specified by the value of the register specified by the
-//        "register" field of the instruction.
+/* A brief overview of APF:
+ *
+ * APF machine is composed of:
+ *  1. A read-only program consisting of bytecodes as described below.
+ *  2. Two 32-bit registers, called R0 and R1.
+ *  3. Sixteen 32-bit temporary memory slots (cleared between packets).
+ *  4. A read-only packet.
+ *  5. An optional read-write transmit buffer.
+ * The program is executed by the interpreter below and parses the packet
+ * to determine if the application processor (AP) should be woken up to
+ * handle the packet or if it can be dropped.  The program may also choose
+ * to allocate/transmit/deallocate the transmit buffer.
+ *
+ * APF bytecode description:
+ *
+ * The APF interpreter uses big-endian byte order for loads from the packet
+ * and for storing immediates in instructions.
+ *
+ * Each instruction starts with a byte composed of:
+ *  Top 5 bits form "opcode" field, see *_OPCODE defines below.
+ *  Next 2 bits form "size field", which indicates the length of an immediate
+ *  value which follows the first byte.  Values in this field:
+ *                 0 => immediate value is 0 and no bytes follow.
+ *                 1 => immediate value is 1 byte big.
+ *                 2 => immediate value is 2 bytes big.
+ *                 3 => immediate value is 4 bytes big.
+ *  Bottom bit forms "register" field, which (usually) indicates which register
+ *  this instruction operates on.
+ *
+ *  There are four main categories of instructions:
+ *  Load instructions
+ *    These instructions load byte(s) of the packet into a register.
+ *    They load either 1, 2 or 4 bytes, as determined by the "opcode" field.
+ *    They load into the register specified by the "register" field.
+ *    The immediate value that follows the first byte of the instruction is
+ *    the byte offset from the beginning of the packet to load from.
+ *    There are "indexing" loads which add the value in R1 to the byte offset
+ *    to load from. The "opcode" field determines which loads are "indexing".
+ *  Arithmetic instructions
+ *    These instructions perform simple operations, like addition, on register
+ *    values. The result of these instructions is always written into R0. One
+ *    argument of the arithmetic operation is R0's value. The other argument
+ *    of the arithmetic operation is determined by the "register" field:
+ *            If the "register" field is 0 then the immediate value following
+ *            the first byte of the instruction is used as the other argument
+ *            to the arithmetic operation.
+ *            If the "register" field is 1 then R1's value is used as the other
+ *            argument to the arithmetic operation.
+ *  Conditional jump instructions
+ *    These instructions compare register R0's value with another value, and if
+ *    the comparison succeeds, jump (i.e. adjust the program counter). The
+ *    immediate value that follows the first byte of the instruction
+ *    represents the jump target offset, i.e. the value added to the program
+ *    counter if the comparison succeeds. The other value compared is
+ *    determined by the "register" field:
+ *            If the "register" field is 0 then another immediate value
+ *            follows the jump target offset. This immediate value is of the
+ *            same size as the jump target offset, and represents the value
+ *            to compare against.
+ *            If the "register" field is 1 then register R1's value is
+ *            compared against.
+ *    The type of comparison (e.g. equal to, greater than etc) is determined
+ *    by the "opcode" field. The comparison interprets both values being
+ *    compared as unsigned values.
+ *  Miscellaneous instructions
+ *    Instructions for:
+ *      - allocating/transmitting/deallocating transmit buffer
+ *      - building the transmit packet (copying bytes into it)
+ *      - read/writing data section
+ *
+ *  Miscellaneous details:
+ *
+ *  Pre-filled temporary memory slot values
+ *    When the APF program begins execution, six of the sixteen memory slots
+ *    are pre-filled by the interpreter with values that may be useful for
+ *    programs:
+ *      #0 to #7 are zero initialized.
+ *      Slot #8  is initialized with apf version (on APF >4).
+ *      Slot #9  this is slot #15 with greater resolution (1/16384ths of a second)
+ *      Slot #10 starts at zero, implicitly used as tx buffer output pointer.
+ *      Slot #11 contains the size (in bytes) of the APF program.
+ *      Slot #12 contains the total size of the APF program + data.
+ *      Slot #13 is filled with the IPv4 header length. This value is calculated
+ *               by loading the first byte of the IPv4 header and taking the
+ *               bottom 4 bits and multiplying their value by 4. This value is
+ *               set to zero if the first 4 bits after the link layer header are
+ *               not 4, indicating not IPv4.
+ *      Slot #14 is filled with size of the packet in bytes, including the
+ *               ethernet link-layer header.
+ *      Slot #15 is filled with the filter age in seconds. This is the number of
+ *               seconds since the host installed the program. This may
+ *               be used by filters that should have a particular lifetime. For
+ *               example, it can be used to rate-limit particular packets to one
+ *               every N seconds.
+ *  Special jump targets:
+ *    When an APF program executes a jump to the byte immediately after the last
+ *      byte of the progam (i.e., one byte past the end of the program), this
+ *      signals the program has completed and determined the packet should be
+ *      passed to the AP.
+ *    When an APF program executes a jump two bytes past the end of the program,
+ *      this signals the program has completed and determined the packet should
+ *      be dropped.
+ *  Jump if byte sequence doesn't match:
+ *    This is a special instruction to facilitate matching long sequences of
+ *    bytes in the packet. Initially it is encoded like a conditional jump
+ *    instruction with two exceptions:
+ *      The first byte of the instruction is always followed by two immediate
+ *        fields: The first immediate field is the jump target offset like other
+ *        conditional jump instructions. The second immediate field specifies the
+ *        number of bytes to compare.
+ *      These two immediate fields are followed by a sequence of bytes. These
+ *        bytes are compared with the bytes in the packet starting from the
+ *        position specified by the value of the register specified by the
+ *        "register" field of the instruction.
+ */
 
 // Number of temporary memory slots, see ldm/stm instructions.
 #define MEMORY_ITEMS 16
 // Upon program execution, some temporary memory slots are prefilled:
 
-// Offset inside the output buffer where the next byte of output packet should
-// be written to.
-#define MEMORY_OFFSET_OUTPUT_BUFFER_OFFSET 10
-#define MEMORY_OFFSET_PROGRAM_SIZE 11     // Size of program (in bytes)
-#define MEMORY_OFFSET_DATA_SIZE 12        // Total size of program + data
-#define MEMORY_OFFSET_IPV4_HEADER_SIZE 13 // 4*([APF_FRAME_HEADER_SIZE]&15)
-#define MEMORY_OFFSET_PACKET_SIZE 14      // Size of packet in bytes.
-#define MEMORY_OFFSET_FILTER_AGE 15       // Age since filter installed in seconds.
+typedef union {
+  struct {
+    u32 pad[8];               // 0..7
+    u32 apf_version;          // 8:  Initialized with apf_version()
+    u32 filter_age_16384ths;  // 9:  Age since filter installed in 1/16384 seconds.
+    u32 tx_buf_offset;        // 10: Offset in tx_buf where next byte will be written
+    u32 program_size;         // 11: Size of program (in bytes)
+    u32 ram_len;              // 12: Total size of program + data, ie. ram_len
+    u32 ipv4_header_size;     // 13: 4*([APF_FRAME_HEADER_SIZE]&15)
+    u32 packet_size;          // 14: Size of packet in bytes.
+    u32 filter_age;           // 15: Age since filter installed in seconds.
+  } named;
+  u32 slot[MEMORY_ITEMS];
+} memory_type;
 
-// Leave 0 opcode unused as it's a good indicator of accidental incorrect execution (e.g. data).
-#define LDB_OPCODE 1    // Load 1 byte from immediate offset, e.g. "ldb R0, [5]"
+/* ---------------------------------------------------------------------------------------------- */
+
+// Standard opcodes.
+
+/* Unconditionally pass (if R=0) or drop (if R=1) packet and optionally increment counter.
+ * An optional non-zero unsigned immediate value can be provided to encode the counter number.
+ * The counter is located (-4 * counter number) bytes from the end of the data region.
+ * It is a U32 big-endian value and is always incremented by 1.
+ * This is more or less equivalent to: lddw R0, -4*N; add R0, 1; stdw R0, -4*N; {pass,drop}
+ * e.g. "pass", "pass 1", "drop", "drop 1"
+ */
+#define PASSDROP_OPCODE 0
+
+#define LDB_OPCODE 1    // Load 1 byte  from immediate offset, e.g. "ldb R0, [5]"
 #define LDH_OPCODE 2    // Load 2 bytes from immediate offset, e.g. "ldh R0, [5]"
 #define LDW_OPCODE 3    // Load 4 bytes from immediate offset, e.g. "ldw R0, [5]"
-#define LDBX_OPCODE 4   // Load 1 byte from immediate offset plus register, e.g. "ldbx R0, [5+R0]"
-#define LDHX_OPCODE 5   // Load 2 byte from immediate offset plus register, e.g. "ldhx R0, [5+R0]"
-#define LDWX_OPCODE 6   // Load 4 byte from immediate offset plus register, e.g. "ldwx R0, [5+R0]"
+#define LDBX_OPCODE 4   // Load 1 byte  from immediate offset plus register, e.g. "ldbx R0, [5+R0]"
+#define LDHX_OPCODE 5   // Load 2 bytes from immediate offset plus register, e.g. "ldhx R0, [5+R0]"
+#define LDWX_OPCODE 6   // Load 4 bytes from immediate offset plus register, e.g. "ldwx R0, [5+R0]"
 #define ADD_OPCODE 7    // Add, e.g. "add R0,5"
 #define MUL_OPCODE 8    // Multiply, e.g. "mul R0,5"
 #define DIV_OPCODE 9    // Divide, e.g. "div R0,5"
@@ -154,19 +184,34 @@
 #define JGT_OPCODE 17   // Compare greater than and branch, e.g. "jgt R0,5,label"
 #define JLT_OPCODE 18   // Compare less than and branch, e.g. "jlt R0,5,label"
 #define JSET_OPCODE 19  // Compare any bits set and branch, e.g. "jset R0,5,label"
-#define JNEBS_OPCODE 20 // Compare not equal byte sequence, e.g. "jnebs R0,5,label,0x1122334455"
+#define JBSMATCH_OPCODE 20 // Compare byte sequence [R=0 not] equal, e.g. "jbsne R0,2,label,0x1122"
+                           // NOTE: Only APFv6+ implements R=1 'jbseq' version
 #define EXT_OPCODE 21   // Immediate value is one of *_EXT_OPCODE
-#define LDDW_OPCODE 22  // Load 4 bytes from data address (register + simm): "lddw R0, [5+R1]"
-#define STDW_OPCODE 23  // Store 4 bytes to data address (register + simm): "stdw R0, [5+R1]"
-#define WRITE_OPCODE 24 // Write 1, 2 or 4 bytes imm to the output buffer, e.g. "WRITE 5"
-// Copy the data from input packet or APF data region to output buffer. Register bit is
-// used to specify the source of data copy: R=0 means copy from packet, R=1 means copy
-// from APF data region. The source offset is encoded in the first imm and the copy length
-// is encoded in the second imm. "e.g. MEMCOPY(R=0), 5, 5"
-#define MEMCOPY_OPCODE 25
+#define LDDW_OPCODE 22  // Load 4 bytes from data address (register + signed imm): "lddw R0, [5+R1]"
+                        // LDDW/STDW in APFv6+ *mode* load/store from counter specified in imm.
+#define STDW_OPCODE 23  // Store 4 bytes to data address (register + signed imm): "stdw R0, [5+R1]"
 
-// Extended opcodes. These all have an opcode of EXT_OPCODE
-// and specify the actual opcode in the immediate field.
+/* Write 1, 2 or 4 byte immediate to the output buffer and auto-increment the output buffer pointer.
+ * Immediate length field specifies size of write.  R must be 0.  imm_len != 0.
+ * e.g. "write 5"
+ */
+#define WRITE_OPCODE 24
+
+/* Copy bytes from input packet/APF program/data region to output buffer and
+ * auto-increment the output buffer pointer.
+ * Register bit is used to specify the source of data copy.
+ * R=0 means copy from packet.
+ * R=1 means copy from APF program/data region.
+ * The source offset is stored in imm1, copy length is stored in u8 imm2.
+ * e.g. "pktcopy 0, 16" or "datacopy 0, 16"
+ */
+#define PKTDATACOPY_OPCODE 25
+
+/* ---------------------------------------------------------------------------------------------- */
+
+// Extended opcodes.
+// These all have an opcode of EXT_OPCODE and specify the actual opcode in the immediate field.
+
 #define LDM_EXT_OPCODE 0   // Load from temporary memory, e.g. "ldm R0,5"
   // Values 0-15 represent loading the different temporary memory slots.
 #define STM_EXT_OPCODE 16  // Store to temporary memory, e.g. "stm R0,5"
@@ -175,21 +220,82 @@
 #define NEG_EXT_OPCODE 33  // Negate, e.g. "neg R0"
 #define SWAP_EXT_OPCODE 34 // Swap, e.g. "swap R0,R1"
 #define MOV_EXT_OPCODE 35  // Move, e.g. "move R0,R1"
-#define ALLOC_EXT_OPCODE 36 // Allocate buffer, "e.g. ALLOC R0"
-#define TRANS_EXT_OPCODE 37 // Transmit buffer, "e.g. TRANS R0"
-#define EWRITE1_EXT_OPCODE 38 // Write 1 byte from register to the output buffer, e.g. "EWRITE1 R0"
-#define EWRITE2_EXT_OPCODE 39 // Write 2 bytes from register to the output buffer, e.g. "EWRITE2 R0"
-#define EWRITE4_EXT_OPCODE 40 // Write 4 bytes from register to the output buffer, e.g. "EWRITE4 R0"
-// Copy the data from input packet to output buffer. The source offset is encoded as [Rx + second imm].
-// The copy length is encoded in the third imm. "e.g. EPKTCOPY [R0 + 5], 5"
-#define EPKTCOPY 41
-// Copy the data from APF data region to output buffer. The source offset is encoded as [Rx + second imm].
-// The copy length is encoded in the third imm. "e.g. EDATACOPY [R0 + 5], 5"
-#define EDATACOPY 42
-//  It is executed as a jump, it tells how many bytes of the program regions
-//  are used to store the data and followed by the actual data bytes.
-// "e.g. data 5, abcde"
-#define DATA_EXT_OPCODE 43
+
+/* Allocate writable output buffer.
+ * R=0: register R0 specifies the length
+ * R=1: length provided in u16 imm2
+ * e.g. "allocate R0" or "allocate 123"
+ * On failure automatically executes 'pass 3'
+ */
+#define ALLOCATE_EXT_OPCODE 36
+/* Transmit and deallocate the buffer (transmission can be delayed until the program
+ * terminates).  Length of buffer is the output buffer pointer (0 means discard).
+ * R=1 iff udp style L4 checksum
+ * u8 imm2 - ip header offset from start of buffer (255 for non-ip packets)
+ * u8 imm3 - offset from start of buffer to store L4 checksum (255 for no L4 checksum)
+ * u8 imm4 - offset from start of buffer to begin L4 checksum calculation (present iff imm3 != 255)
+ * u16 imm5 - partial checksum value to include in L4 checksum (present iff imm3 != 255)
+ * "e.g. transmit"
+ */
+#define TRANSMIT_EXT_OPCODE 37
+/* Write 1, 2 or 4 byte value from register to the output buffer and auto-increment the
+ * output buffer pointer.
+ * e.g. "ewrite1 r0" or "ewrite2 r1"
+ */
+#define EWRITE1_EXT_OPCODE 38
+#define EWRITE2_EXT_OPCODE 39
+#define EWRITE4_EXT_OPCODE 40
+
+/* Copy bytes from input packet/APF program/data region to output buffer and
+ * auto-increment the output buffer pointer.
+ * Register bit is used to specify the source of data copy.
+ * R=0 means copy from packet.
+ * R=1 means copy from APF program/data region.
+ * The source offset is stored in R0, copy length is stored in u8 imm2 or R1.
+ * e.g. "epktcopy r0, 16", "edatacopy r0, 16", "epktcopy r0, r1", "edatacopy r0, r1"
+ */
+#define EPKTDATACOPYIMM_EXT_OPCODE 41
+#define EPKTDATACOPYR1_EXT_OPCODE 42
+/* Jumps if the UDP payload content (starting at R0) does [not] match one
+ * of the specified QNAMEs in question records, applying case insensitivity.
+ * SAFE version PASSES corrupt packets, while the other one DROPS.
+ * R=0/1 meaning 'does not match'/'matches'
+ * R0: Offset to UDP payload content
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(u8): Question type (PTR/SRV/TXT/A/AAAA)
+ * imm4(bytes): null terminated list of null terminated LV-encoded QNAMEs
+ * e.g.: "jdnsqeq R0,label,0xc,\002aa\005local\0\0", "jdnsqne R0,label,0xc,\002aa\005local\0\0"
+ */
+#define JDNSQMATCH_EXT_OPCODE 43
+#define JDNSQMATCHSAFE_EXT_OPCODE 45
+/* Jumps if the UDP payload content (starting at R0) does [not] match one
+ * of the specified NAMEs in answers/authority/additional records, applying
+ * case insensitivity.
+ * SAFE version PASSES corrupt packets, while the other one DROPS.
+ * R=0/1 meaning 'does not match'/'matches'
+ * R0: Offset to UDP payload content
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(bytes): null terminated list of null terminated LV-encoded NAMEs
+ * e.g.: "jdnsaeq R0,label,0xc,\002aa\005local\0\0", "jdnsane R0,label,0xc,\002aa\005local\0\0"
+ */
+#define JDNSAMATCH_EXT_OPCODE 44
+#define JDNSAMATCHSAFE_EXT_OPCODE 46
+
+/* Jump if register is [not] one of the list of values
+ * R bit - specifies the register (R0/R1) to test
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(u8): top 5 bits - number of following u8/be16/be32 values - 1
+ *        middle 2 bits - 1..4 length of immediates
+ *        bottom 1 bit  - =0 jmp if in set, =1 if not in set
+ * imm4(imm3 * 1/2/3/4 bytes): the *UNIQUE* values to compare against
+ */
+#define JONEOF_EXT_OPCODE 47
+
+// This extended opcode is used to implement PKTDATACOPY_OPCODE
+#define PKTDATACOPYIMM_EXT_OPCODE 65536
 
 #define EXTRACT_OPCODE(i) (((i) >> 3) & 31)
 #define EXTRACT_REGISTER(i) ((i) & 1)
diff --git a/v5/apf_assemble_test.xml b/v5/apf_assemble_test.xml
new file mode 100644
index 0000000..5e3c00b
--- /dev/null
+++ b/v5/apf_assemble_test.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2024 The Android Open Source Project
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<configuration description="Config for running APF interpreter assembly test through atest">
+    <option name="test-suite-tag" value="apf_assemble_test" />
+    <option name="null-device" value="true" />
+    <test class="com.android.tradefed.testtype.binary.ExecutableHostTest" >
+        <option name="binary" value="apf_assemble_test.sh" />
+        <!-- Script assumes a relative path with the tests/ folders -->
+        <option name="relative-path-execution" value="true" />
+        <option name="per-binary-timeout" value="30s" />
+    </test>
+</configuration>
diff --git a/v5/apf_checksum.h b/v5/apf_checksum.h
new file mode 120000
index 0000000..71ae895
--- /dev/null
+++ b/v5/apf_checksum.h
@@ -0,0 +1 @@
+../apf_checksum.h
\ No newline at end of file
diff --git a/v5/apf_defs.h b/v5/apf_defs.h
new file mode 120000
index 0000000..7e1dfa0
--- /dev/null
+++ b/v5/apf_defs.h
@@ -0,0 +1 @@
+../apf_defs.h
\ No newline at end of file
diff --git a/v5/apf_dns.h b/v5/apf_dns.h
new file mode 120000
index 0000000..504778f
--- /dev/null
+++ b/v5/apf_dns.h
@@ -0,0 +1 @@
+../apf_dns.h
\ No newline at end of file
diff --git a/v5/apf_interpreter.c b/v5/apf_interpreter.c
index 6679c72..06af66d 100644
--- a/v5/apf_interpreter.c
+++ b/v5/apf_interpreter.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023, The Android Open Source Project
+ * Copyright 2024, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,397 +16,1061 @@
 
 #include "apf_interpreter.h"
 
-// TODO: Remove the dependency of the standard library and make the interpreter self-contained.
-#include <string.h>// For memcmp
+#include <string.h>  /* For memcmp, memcpy, memset */
 
-#include "apf.h"
+#if __GNUC__ >= 7 || __clang__
+#define FALLTHROUGH __attribute__((fallthrough))
+#else
+#define FALLTHROUGH
+#endif
 
-// User hook for interpreter debug tracing.
+typedef enum { False, True } Boolean;
+
+/* Begin include of apf_defs.h */
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+
+typedef enum {
+  error_program = -2,
+  error_packet = -1,
+  nomatch = False,
+  match = True
+} match_result_type;
+
+#define ETH_P_IP	0x0800
+#define ETH_P_IPV6	0x86DD
+
+#define ETH_HLEN	14
+#define IPV4_HLEN	20
+#define IPV6_HLEN	40
+#define TCP_HLEN	20
+#define UDP_HLEN	8
+
+#define FUNC(x) x; x
+/* End include of apf_defs.h */
+/* Begin include of apf.h */
+/*
+ * Copyright 2024, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_APF_APF_H
+#define ANDROID_APF_APF_H
+
+/* A brief overview of APF:
+ *
+ * APF machine is composed of:
+ *  1. A read-only program consisting of bytecodes as described below.
+ *  2. Two 32-bit registers, called R0 and R1.
+ *  3. Sixteen 32-bit temporary memory slots (cleared between packets).
+ *  4. A read-only packet.
+ *  5. An optional read-write transmit buffer.
+ * The program is executed by the interpreter below and parses the packet
+ * to determine if the application processor (AP) should be woken up to
+ * handle the packet or if it can be dropped.  The program may also choose
+ * to allocate/transmit/deallocate the transmit buffer.
+ *
+ * APF bytecode description:
+ *
+ * The APF interpreter uses big-endian byte order for loads from the packet
+ * and for storing immediates in instructions.
+ *
+ * Each instruction starts with a byte composed of:
+ *  Top 5 bits form "opcode" field, see *_OPCODE defines below.
+ *  Next 2 bits form "size field", which indicates the length of an immediate
+ *  value which follows the first byte.  Values in this field:
+ *                 0 => immediate value is 0 and no bytes follow.
+ *                 1 => immediate value is 1 byte big.
+ *                 2 => immediate value is 2 bytes big.
+ *                 3 => immediate value is 4 bytes big.
+ *  Bottom bit forms "register" field, which (usually) indicates which register
+ *  this instruction operates on.
+ *
+ *  There are four main categories of instructions:
+ *  Load instructions
+ *    These instructions load byte(s) of the packet into a register.
+ *    They load either 1, 2 or 4 bytes, as determined by the "opcode" field.
+ *    They load into the register specified by the "register" field.
+ *    The immediate value that follows the first byte of the instruction is
+ *    the byte offset from the beginning of the packet to load from.
+ *    There are "indexing" loads which add the value in R1 to the byte offset
+ *    to load from. The "opcode" field determines which loads are "indexing".
+ *  Arithmetic instructions
+ *    These instructions perform simple operations, like addition, on register
+ *    values. The result of these instructions is always written into R0. One
+ *    argument of the arithmetic operation is R0's value. The other argument
+ *    of the arithmetic operation is determined by the "register" field:
+ *            If the "register" field is 0 then the immediate value following
+ *            the first byte of the instruction is used as the other argument
+ *            to the arithmetic operation.
+ *            If the "register" field is 1 then R1's value is used as the other
+ *            argument to the arithmetic operation.
+ *  Conditional jump instructions
+ *    These instructions compare register R0's value with another value, and if
+ *    the comparison succeeds, jump (i.e. adjust the program counter). The
+ *    immediate value that follows the first byte of the instruction
+ *    represents the jump target offset, i.e. the value added to the program
+ *    counter if the comparison succeeds. The other value compared is
+ *    determined by the "register" field:
+ *            If the "register" field is 0 then another immediate value
+ *            follows the jump target offset. This immediate value is of the
+ *            same size as the jump target offset, and represents the value
+ *            to compare against.
+ *            If the "register" field is 1 then register R1's value is
+ *            compared against.
+ *    The type of comparison (e.g. equal to, greater than etc) is determined
+ *    by the "opcode" field. The comparison interprets both values being
+ *    compared as unsigned values.
+ *  Miscellaneous instructions
+ *    Instructions for:
+ *      - allocating/transmitting/deallocating transmit buffer
+ *      - building the transmit packet (copying bytes into it)
+ *      - read/writing data section
+ *
+ *  Miscellaneous details:
+ *
+ *  Pre-filled temporary memory slot values
+ *    When the APF program begins execution, six of the sixteen memory slots
+ *    are pre-filled by the interpreter with values that may be useful for
+ *    programs:
+ *      #0 to #7 are zero initialized.
+ *      Slot #8  is initialized with apf version (on APF >4).
+ *      Slot #9  this is slot #15 with greater resolution (1/16384ths of a second)
+ *      Slot #10 starts at zero, implicitly used as tx buffer output pointer.
+ *      Slot #11 contains the size (in bytes) of the APF program.
+ *      Slot #12 contains the total size of the APF program + data.
+ *      Slot #13 is filled with the IPv4 header length. This value is calculated
+ *               by loading the first byte of the IPv4 header and taking the
+ *               bottom 4 bits and multiplying their value by 4. This value is
+ *               set to zero if the first 4 bits after the link layer header are
+ *               not 4, indicating not IPv4.
+ *      Slot #14 is filled with size of the packet in bytes, including the
+ *               ethernet link-layer header.
+ *      Slot #15 is filled with the filter age in seconds. This is the number of
+ *               seconds since the host installed the program. This may
+ *               be used by filters that should have a particular lifetime. For
+ *               example, it can be used to rate-limit particular packets to one
+ *               every N seconds.
+ *  Special jump targets:
+ *    When an APF program executes a jump to the byte immediately after the last
+ *      byte of the progam (i.e., one byte past the end of the program), this
+ *      signals the program has completed and determined the packet should be
+ *      passed to the AP.
+ *    When an APF program executes a jump two bytes past the end of the program,
+ *      this signals the program has completed and determined the packet should
+ *      be dropped.
+ *  Jump if byte sequence doesn't match:
+ *    This is a special instruction to facilitate matching long sequences of
+ *    bytes in the packet. Initially it is encoded like a conditional jump
+ *    instruction with two exceptions:
+ *      The first byte of the instruction is always followed by two immediate
+ *        fields: The first immediate field is the jump target offset like other
+ *        conditional jump instructions. The second immediate field specifies the
+ *        number of bytes to compare.
+ *      These two immediate fields are followed by a sequence of bytes. These
+ *        bytes are compared with the bytes in the packet starting from the
+ *        position specified by the value of the register specified by the
+ *        "register" field of the instruction.
+ */
+
+/* Number of temporary memory slots, see ldm/stm instructions. */
+#define MEMORY_ITEMS 16
+/* Upon program execution, some temporary memory slots are prefilled: */
+
+typedef union {
+  struct {
+    u32 pad[8];               /* 0..7 */
+    u32 apf_version;          /* 8:  Initialized with apf_version() */
+    u32 filter_age_16384ths;  /* 9:  Age since filter installed in 1/16384 seconds. */
+    u32 tx_buf_offset;        /* 10: Offset in tx_buf where next byte will be written */
+    u32 program_size;         /* 11: Size of program (in bytes) */
+    u32 ram_len;              /* 12: Total size of program + data, ie. ram_len */
+    u32 ipv4_header_size;     /* 13: 4*([APF_FRAME_HEADER_SIZE]&15) */
+    u32 packet_size;          /* 14: Size of packet in bytes. */
+    u32 filter_age;           /* 15: Age since filter installed in seconds. */
+  } named;
+  u32 slot[MEMORY_ITEMS];
+} memory_type;
+
+/* ---------------------------------------------------------------------------------------------- */
+
+/* Standard opcodes. */
+
+/* Unconditionally pass (if R=0) or drop (if R=1) packet and optionally increment counter.
+ * An optional non-zero unsigned immediate value can be provided to encode the counter number.
+ * The counter is located (-4 * counter number) bytes from the end of the data region.
+ * It is a U32 big-endian value and is always incremented by 1.
+ * This is more or less equivalent to: lddw R0, -4*N; add R0, 1; stdw R0, -4*N; {pass,drop}
+ * e.g. "pass", "pass 1", "drop", "drop 1"
+ */
+#define PASSDROP_OPCODE 0
+
+#define LDB_OPCODE 1    /* Load 1 byte  from immediate offset, e.g. "ldb R0, [5]" */
+#define LDH_OPCODE 2    /* Load 2 bytes from immediate offset, e.g. "ldh R0, [5]" */
+#define LDW_OPCODE 3    /* Load 4 bytes from immediate offset, e.g. "ldw R0, [5]" */
+#define LDBX_OPCODE 4   /* Load 1 byte  from immediate offset plus register, e.g. "ldbx R0, [5+R0]" */
+#define LDHX_OPCODE 5   /* Load 2 bytes from immediate offset plus register, e.g. "ldhx R0, [5+R0]" */
+#define LDWX_OPCODE 6   /* Load 4 bytes from immediate offset plus register, e.g. "ldwx R0, [5+R0]" */
+#define ADD_OPCODE 7    /* Add, e.g. "add R0,5" */
+#define MUL_OPCODE 8    /* Multiply, e.g. "mul R0,5" */
+#define DIV_OPCODE 9    /* Divide, e.g. "div R0,5" */
+#define AND_OPCODE 10   /* And, e.g. "and R0,5" */
+#define OR_OPCODE 11    /* Or, e.g. "or R0,5" */
+#define SH_OPCODE 12    /* Left shift, e.g. "sh R0, 5" or "sh R0, -5" (shifts right) */
+#define LI_OPCODE 13    /* Load signed immediate, e.g. "li R0,5" */
+#define JMP_OPCODE 14   /* Unconditional jump, e.g. "jmp label" */
+#define JEQ_OPCODE 15   /* Compare equal and branch, e.g. "jeq R0,5,label" */
+#define JNE_OPCODE 16   /* Compare not equal and branch, e.g. "jne R0,5,label" */
+#define JGT_OPCODE 17   /* Compare greater than and branch, e.g. "jgt R0,5,label" */
+#define JLT_OPCODE 18   /* Compare less than and branch, e.g. "jlt R0,5,label" */
+#define JSET_OPCODE 19  /* Compare any bits set and branch, e.g. "jset R0,5,label" */
+#define JBSMATCH_OPCODE 20 /* Compare byte sequence [R=0 not] equal, e.g. "jbsne R0,2,label,0x1122" */
+                           /* NOTE: Only APFv6+ implements R=1 'jbseq' version */
+#define EXT_OPCODE 21   /* Immediate value is one of *_EXT_OPCODE */
+#define LDDW_OPCODE 22  /* Load 4 bytes from data address (register + signed imm): "lddw R0, [5+R1]" */
+                        /* LDDW/STDW in APFv6+ *mode* load/store from counter specified in imm. */
+#define STDW_OPCODE 23  /* Store 4 bytes to data address (register + signed imm): "stdw R0, [5+R1]" */
+
+/* Write 1, 2 or 4 byte immediate to the output buffer and auto-increment the output buffer pointer.
+ * Immediate length field specifies size of write.  R must be 0.  imm_len != 0.
+ * e.g. "write 5"
+ */
+#define WRITE_OPCODE 24
+
+/* Copy bytes from input packet/APF program/data region to output buffer and
+ * auto-increment the output buffer pointer.
+ * Register bit is used to specify the source of data copy.
+ * R=0 means copy from packet.
+ * R=1 means copy from APF program/data region.
+ * The source offset is stored in imm1, copy length is stored in u8 imm2.
+ * e.g. "pktcopy 0, 16" or "datacopy 0, 16"
+ */
+#define PKTDATACOPY_OPCODE 25
+
+/* ---------------------------------------------------------------------------------------------- */
+
+/* Extended opcodes. */
+/* These all have an opcode of EXT_OPCODE and specify the actual opcode in the immediate field. */
+
+#define LDM_EXT_OPCODE 0   /* Load from temporary memory, e.g. "ldm R0,5" */
+  /* Values 0-15 represent loading the different temporary memory slots. */
+#define STM_EXT_OPCODE 16  /* Store to temporary memory, e.g. "stm R0,5" */
+  /* Values 16-31 represent storing to the different temporary memory slots. */
+#define NOT_EXT_OPCODE 32  /* Not, e.g. "not R0" */
+#define NEG_EXT_OPCODE 33  /* Negate, e.g. "neg R0" */
+#define SWAP_EXT_OPCODE 34 /* Swap, e.g. "swap R0,R1" */
+#define MOV_EXT_OPCODE 35  /* Move, e.g. "move R0,R1" */
+
+/* Allocate writable output buffer.
+ * R=0: register R0 specifies the length
+ * R=1: length provided in u16 imm2
+ * e.g. "allocate R0" or "allocate 123"
+ * On failure automatically executes 'pass 3'
+ */
+#define ALLOCATE_EXT_OPCODE 36
+/* Transmit and deallocate the buffer (transmission can be delayed until the program
+ * terminates).  Length of buffer is the output buffer pointer (0 means discard).
+ * R=1 iff udp style L4 checksum
+ * u8 imm2 - ip header offset from start of buffer (255 for non-ip packets)
+ * u8 imm3 - offset from start of buffer to store L4 checksum (255 for no L4 checksum)
+ * u8 imm4 - offset from start of buffer to begin L4 checksum calculation (present iff imm3 != 255)
+ * u16 imm5 - partial checksum value to include in L4 checksum (present iff imm3 != 255)
+ * "e.g. transmit"
+ */
+#define TRANSMIT_EXT_OPCODE 37
+/* Write 1, 2 or 4 byte value from register to the output buffer and auto-increment the
+ * output buffer pointer.
+ * e.g. "ewrite1 r0" or "ewrite2 r1"
+ */
+#define EWRITE1_EXT_OPCODE 38
+#define EWRITE2_EXT_OPCODE 39
+#define EWRITE4_EXT_OPCODE 40
+
+/* Copy bytes from input packet/APF program/data region to output buffer and
+ * auto-increment the output buffer pointer.
+ * Register bit is used to specify the source of data copy.
+ * R=0 means copy from packet.
+ * R=1 means copy from APF program/data region.
+ * The source offset is stored in R0, copy length is stored in u8 imm2 or R1.
+ * e.g. "epktcopy r0, 16", "edatacopy r0, 16", "epktcopy r0, r1", "edatacopy r0, r1"
+ */
+#define EPKTDATACOPYIMM_EXT_OPCODE 41
+#define EPKTDATACOPYR1_EXT_OPCODE 42
+/* Jumps if the UDP payload content (starting at R0) does [not] match one
+ * of the specified QNAMEs in question records, applying case insensitivity.
+ * SAFE version PASSES corrupt packets, while the other one DROPS.
+ * R=0/1 meaning 'does not match'/'matches'
+ * R0: Offset to UDP payload content
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(u8): Question type (PTR/SRV/TXT/A/AAAA)
+ * imm4(bytes): null terminated list of null terminated LV-encoded QNAMEs
+ * e.g.: "jdnsqeq R0,label,0xc,\002aa\005local\0\0", "jdnsqne R0,label,0xc,\002aa\005local\0\0"
+ */
+#define JDNSQMATCH_EXT_OPCODE 43
+#define JDNSQMATCHSAFE_EXT_OPCODE 45
+/* Jumps if the UDP payload content (starting at R0) does [not] match one
+ * of the specified NAMEs in answers/authority/additional records, applying
+ * case insensitivity.
+ * SAFE version PASSES corrupt packets, while the other one DROPS.
+ * R=0/1 meaning 'does not match'/'matches'
+ * R0: Offset to UDP payload content
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(bytes): null terminated list of null terminated LV-encoded NAMEs
+ * e.g.: "jdnsaeq R0,label,0xc,\002aa\005local\0\0", "jdnsane R0,label,0xc,\002aa\005local\0\0"
+ */
+#define JDNSAMATCH_EXT_OPCODE 44
+#define JDNSAMATCHSAFE_EXT_OPCODE 46
+
+/* Jump if register is [not] one of the list of values
+ * R bit - specifies the register (R0/R1) to test
+ * imm1: Extended opcode
+ * imm2: Jump label offset
+ * imm3(u8): top 5 bits - number of following u8/be16/be32 values - 1
+ *        middle 2 bits - 1..4 length of immediates
+ *        bottom 1 bit  - =0 jmp if in set, =1 if not in set
+ * imm4(imm3 * 1/2/3/4 bytes): the *UNIQUE* values to compare against
+ */
+#define JONEOF_EXT_OPCODE 47
+
+/* This extended opcode is used to implement PKTDATACOPY_OPCODE */
+#define PKTDATACOPYIMM_EXT_OPCODE 65536
+
+#define EXTRACT_OPCODE(i) (((i) >> 3) & 31)
+#define EXTRACT_REGISTER(i) ((i) & 1)
+#define EXTRACT_IMM_LENGTH(i) (((i) >> 1) & 3)
+
+#endif  /* ANDROID_APF_APF_H */
+/* End include of apf.h */
+/* Begin include of apf_utils.h */
+static u32 read_be16(const u8* buf) {
+    return buf[0] * 256u + buf[1];
+}
+
+static void store_be16(u8* const buf, const u16 v) {
+    buf[0] = (u8)(v >> 8);
+    buf[1] = (u8)v;
+}
+
+static u8 uppercase(u8 c) {
+    return (c >= 'a') && (c <= 'z') ? c - ('a' - 'A') : c;
+}
+/* End include of apf_utils.h */
+/* Begin include of apf_dns.h */
+/**
+ * Compares a (Q)NAME starting at udp[*ofs] with the target name.
+ *
+ * @param needle - non-NULL - pointer to DNS encoded target name to match against.
+ *   example: [11]_googlecast[4]_tcp[5]local[0]  (where [11] is a byte with value 11)
+ * @param needle_bound - non-NULL - points at first invalid byte past needle.
+ * @param udp - non-NULL - pointer to the start of the UDP payload (DNS header).
+ * @param udp_len - length of the UDP payload.
+ * @param ofs - non-NULL - pointer to the offset of the beginning of the (Q)NAME.
+ *   On non-error return will be updated to point to the first unread offset,
+ *   ie. the next position after the (Q)NAME.
+ *
+ * @return 1 if matched, 0 if not matched, -1 if error in packet, -2 if error in program.
+ */
+FUNC(match_result_type apf_internal_match_single_name(const u8* needle,
+                                    const u8* const needle_bound,
+                                    const u8* const udp,
+                                    const u32 udp_len,
+                                    u32* const ofs)) {
+    u32 first_unread_offset = *ofs;
+    Boolean is_qname_match = True;
+    int lvl;
+
+    /* DNS names are <= 255 characters including terminating 0, since >= 1 char + '.' per level => max. 127 levels */
+    for (lvl = 1; lvl <= 127; ++lvl) {
+        if (*ofs >= udp_len) return error_packet;
+        u8 v = udp[(*ofs)++];
+        if (v >= 0xC0) { /* RFC 1035 4.1.4 - handle message compression */
+            if (*ofs >= udp_len) return error_packet;
+            u8 w = udp[(*ofs)++];
+            if (*ofs > first_unread_offset) first_unread_offset = *ofs;
+            u32 new_ofs = (v - 0xC0) * 256u + w;
+            if (new_ofs >= *ofs) return error_packet;  /* RFC 1035 4.1.4 allows only backward pointers */
+            *ofs = new_ofs;
+        } else if (v > 63) {
+            return error_packet;  /* RFC 1035 2.3.4 - label size is 1..63. */
+        } else if (v) {
+            u8 label_size = v;
+            if (*ofs + label_size > udp_len) return error_packet;
+            if (needle >= needle_bound) return error_program;
+            if (is_qname_match) {
+                u8 len = *needle++;
+                if (len == label_size) {
+                    if (needle + label_size > needle_bound) return error_program;
+                    while (label_size--) {
+                        u8 w = udp[(*ofs)++];
+                        is_qname_match &= (uppercase(w) == *needle++);
+                    }
+                } else {
+                    if (len != 0xFF) is_qname_match = False;
+                    *ofs += label_size;
+                }
+            } else {
+                is_qname_match = False;
+                *ofs += label_size;
+            }
+        } else { /* reached the end of the name */
+            if (first_unread_offset > *ofs) *ofs = first_unread_offset;
+            return (is_qname_match && *needle == 0) ? match : nomatch;
+        }
+    }
+    return error_packet;  /* too many dns domain name levels */
+}
+
+/**
+ * Check if DNS packet contains any of the target names with the provided
+ * question_type.
+ *
+ * @param needles - non-NULL - pointer to DNS encoded target nameS to match against.
+ *   example: [3]foo[3]com[0][3]bar[3]net[0][0]  -- note ends with an extra NULL byte.
+ * @param needle_bound - non-NULL - points at first invalid byte past needles.
+ * @param udp - non-NULL - pointer to the start of the UDP payload (DNS header).
+ * @param udp_len - length of the UDP payload.
+ * @param question_type - question type to match against or -1 to match answers.
+ *
+ * @return 1 if matched, 0 if not matched, -1 if error in packet, -2 if error in program.
+ */
+FUNC(match_result_type apf_internal_match_names(const u8* needles,
+                              const u8* const needle_bound,
+                              const u8* const udp,
+                              const u32 udp_len,
+                              const int question_type)) {
+    if (udp_len < 12) return error_packet;  /* lack of dns header */
+
+    /* dns header: be16 tid, flags, num_{questions,answers,authority,additional} */
+    u32 num_questions = read_be16(udp + 4);
+    u32 num_answers = read_be16(udp + 6) + read_be16(udp + 8) + read_be16(udp + 10);
+
+    /* loop until we hit final needle, which is a null byte */
+    while (True) {
+        if (needles >= needle_bound) return error_program;
+        if (!*needles) return nomatch;  /* we've run out of needles without finding a match */
+        u32 ofs = 12;  /* dns header is 12 bytes */
+        u32 i;
+        /* match questions */
+        for (i = 0; i < num_questions; ++i) {
+            match_result_type m = apf_internal_match_single_name(needles, needle_bound, udp, udp_len, &ofs);
+            if (m < nomatch) return m;
+            if (ofs + 2 > udp_len) return error_packet;
+            int qtype = (int)read_be16(udp + ofs);
+            ofs += 4; /* skip be16 qtype & qclass */
+            if (question_type == -1) continue;
+            if (m == nomatch) continue;
+            if (qtype == 0xFF /* QTYPE_ANY */ || qtype == question_type) return match;
+        }
+        /* match answers */
+        if (question_type == -1) for (i = 0; i < num_answers; ++i) {
+            match_result_type m = apf_internal_match_single_name(needles, needle_bound, udp, udp_len, &ofs);
+            if (m < nomatch) return m;
+            ofs += 8; /* skip be16 type, class & be32 ttl */
+            if (ofs + 2 > udp_len) return error_packet;
+            ofs += 2 + read_be16(udp + ofs);  /* skip be16 rdata length field, plus length bytes */
+            if (m == match) return match;
+        }
+        /* move needles pointer to the next needle. */
+        do {
+            u8 len = *needles++;
+            if (len == 0xFF) continue;
+            if (len > 63) return error_program;
+            needles += len;
+            if (needles >= needle_bound) return error_program;
+        } while (*needles);
+        needles++;  /* skip the NULL byte at the end of *a* DNS name */
+    }
+}
+/* End include of apf_dns.h */
+/* Begin include of apf_checksum.h */
+/**
+ * Calculate big endian 16-bit sum of a buffer (max 128kB),
+ * then fold and negate it, producing a 16-bit result in [0..FFFE].
+ */
+FUNC(u16 apf_internal_calc_csum(u32 sum, const u8* const buf, const s32 len)) {
+    s32 i;
+    for (i = 0; i < len; ++i) sum += buf[i] * ((i & 1) ? 1u : 256u);
+
+    sum = (sum & 0xFFFF) + (sum >> 16);  /* max after this is 1FFFE */
+    u16 csum = sum + (sum >> 16);
+    return ~csum;  /* assuming sum > 0 on input, this is in [0..FFFE] */
+}
+
+static u16 fix_udp_csum(u16 csum) {
+    return csum ? csum : 0xFFFF;
+}
+
+/**
+ * Calculate and store packet checksums and return dscp.
+ *
+ * @param pkt - pointer to the very start of the to-be-transmitted packet,
+ *              ie. the start of the ethernet header (if one is present)
+ *     WARNING: at minimum 266 bytes of buffer pointed to by 'pkt' pointer
+ *              *MUST* be writable.
+ * (IPv4 header checksum is a 2 byte value, 10 bytes after ip_ofs,
+ * which has a maximum value of 254.  Thus 254[ip_ofs] + 10 + 2[u16] = 266)
+ *
+ * @param len - length of the packet (this may be < 266).
+ * @param ip_ofs - offset from beginning of pkt to IPv4 or IPv6 header:
+ *                 IP version detected based on top nibble of this byte,
+ *                 for IPv4 we will calculate and store IP header checksum,
+ *                 but only for the first 20 bytes of the header,
+ *                 prior to calling this the IPv4 header checksum field
+ *                 must be initialized to the partial checksum of the IPv4
+ *                 options (0 if none)
+ *                 255 means there is no IP header (for example ARP)
+ *                 DSCP will be retrieved from this IP header (0 if none).
+ * @param partial_csum - additional value to include in L4 checksum
+ * @param csum_start - offset from beginning of pkt to begin L4 checksum
+ *                     calculation (until end of pkt specified by len)
+ * @param csum_ofs - offset from beginning of pkt to store L4 checksum
+ *                   255 means do not calculate/store L4 checksum
+ * @param udp - True iff we should generate a UDP style L4 checksum (0 -> 0xFFFF)
+ *
+ * @return 6-bit DSCP value [0..63], garbage on parse error.
+ */
+FUNC(int apf_internal_csum_and_return_dscp(u8* const pkt, const s32 len, const u8 ip_ofs,
+  const u16 partial_csum, const u8 csum_start, const u8 csum_ofs, const Boolean udp)) {
+    if (csum_ofs < 255) {
+        /* note that apf_internal_calc_csum() treats negative lengths as zero */
+        u32 csum = apf_internal_calc_csum(partial_csum, pkt + csum_start, len - csum_start);
+        if (udp) csum = fix_udp_csum(csum);
+        store_be16(pkt + csum_ofs, csum);
+    }
+    if (ip_ofs < 255) {
+        u8 ip = pkt[ip_ofs] >> 4;
+        if (ip == 4) {
+            store_be16(pkt + ip_ofs + 10, apf_internal_calc_csum(0, pkt + ip_ofs, IPV4_HLEN));
+            return pkt[ip_ofs + 1] >> 2;  /* DSCP */
+        } else if (ip == 6) {
+            return (read_be16(pkt + ip_ofs) >> 6) & 0x3F;  /* DSCP */
+        }
+    }
+    return 0;
+}
+/* End include of apf_checksum.h */
+
+/* User hook for interpreter debug tracing. */
 #ifdef APF_TRACE_HOOK
-extern void APF_TRACE_HOOK(uint32_t pc, const uint32_t* regs, const uint8_t* program,
-                           uint32_t program_len, const uint8_t *packet, uint32_t packet_len,
-                           const uint32_t* memory, uint32_t ram_len);
+extern void APF_TRACE_HOOK(u32 pc, const u32* regs, const u8* program,
+                           u32 program_len, const u8 *packet, u32 packet_len,
+                           const u32* memory, u32 ram_len);
 #else
 #define APF_TRACE_HOOK(pc, regs, program, program_len, packet, packet_len, memory, memory_len) \
     do { /* nop*/                                                                              \
     } while (0)
 #endif
 
-// Frame header size should be 14
-#define APF_FRAME_HEADER_SIZE 14
-// Return code indicating "packet" should accepted.
+/* Return code indicating "packet" should accepted. */
 #define PASS_PACKET 1
-// Return code indicating "packet" should be dropped.
+/* Return code indicating "packet" should be dropped. */
 #define DROP_PACKET 0
-// Verify an internal condition and accept packet if it fails.
+/* Verify an internal condition and accept packet if it fails. */
 #define ASSERT_RETURN(c) if (!(c)) return PASS_PACKET
-// If "c" is of an unsigned type, generate a compile warning that gets promoted to an error.
-// This makes bounds checking simpler because ">= 0" can be avoided. Otherwise adding
-// superfluous ">= 0" with unsigned expressions generates compile warnings.
-#define ENFORCE_UNSIGNED(c) ((c)==(uint32_t)(c))
+/* If "c" is of an unsigned type, generate a compile warning that gets promoted to an error. */
+/* This makes bounds checking simpler because ">= 0" can be avoided. Otherwise adding */
+/* superfluous ">= 0" with unsigned expressions generates compile warnings. */
+#define ENFORCE_UNSIGNED(c) ((c)==(u32)(c))
 
-uint32_t apf_version(void) {
-    return 20231214;
+u32 apf_version(void) {
+    return 20240315;
 }
 
-int apf_run(void* ctx, uint8_t* const program, const uint32_t program_len,
-            const uint32_t ram_len, const uint8_t* const packet,
-            const uint32_t packet_len, const uint32_t filter_age_16384ths) {
-// Is offset within program bounds?
-#define IN_PROGRAM_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < program_len)
-// Is offset within ram bounds?
-#define IN_RAM_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < ram_len)
-// Is offset within packet bounds?
-#define IN_PACKET_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < packet_len)
-// Is access to offset |p| length |size| within data bounds?
+typedef struct {
+    void *caller_ctx;  /* Passed in to interpreter, passed through to alloc/transmit. */
+    u8* tx_buf;        /* The output buffer pointer */
+    u32 tx_buf_len;    /* The length of the output buffer */
+    u8* program;       /* Pointer to program/data buffer */
+    u32 program_len;   /* Length of the program */
+    u32 ram_len;       /* Length of the entire apf program/data region */
+    const u8* packet;  /* Pointer to input packet buffer */
+    u32 packet_len;    /* Length of the input packet buffer */
+    u8 v6;             /* Set to 1 by first jmpdata (APFv6+) instruction */
+    u32 pc;            /* Program counter. */
+    u32 R[2];          /* Register values. */
+    memory_type mem;   /* Memory slot values. */
+} apf_context;
+
+FUNC(int apf_internal_do_transmit_buffer(apf_context* ctx, u32 pkt_len, u8 dscp)) {
+    int ret = apf_transmit_buffer(ctx->caller_ctx, ctx->tx_buf, pkt_len, dscp);
+    ctx->tx_buf = NULL;
+    ctx->tx_buf_len = 0;
+    return ret;
+}
+
+static int do_discard_buffer(apf_context* ctx) {
+    return apf_internal_do_transmit_buffer(ctx, 0 /* pkt_len */, 0 /* dscp */);
+}
+
+/* Decode the imm length, does not do range checking. */
+/* But note that program is at least 20 bytes shorter than ram, so first few */
+/* immediates can always be safely decoded without exceeding ram buffer. */
+static u32 decode_imm(apf_context* ctx, u32 length) {
+    u32 i, v = 0;
+    for (i = 0; i < length; ++i) v = (v << 8) | ctx->program[ctx->pc++];
+    return v;
+}
+
+#define DECODE_U8() (ctx->program[ctx->pc++])
+
+static u16 decode_be16(apf_context* ctx) {
+    u16 v = ctx->program[ctx->pc++];
+    v <<= 8;
+    v |= ctx->program[ctx->pc++];
+    return v;
+}
+
+static int do_apf_run(apf_context* ctx) {
+/* Is offset within ram bounds? */
+#define IN_RAM_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < ctx->ram_len)
+/* Is offset within packet bounds? */
+#define IN_PACKET_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < ctx->packet_len)
+/* Is access to offset |p| length |size| within data bounds? */
 #define IN_DATA_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
                                  ENFORCE_UNSIGNED(size) && \
-                                 (p) + (size) <= ram_len && \
-                                 (p) >= program_len && \
-                                 (p) + (size) >= (p))  // catch wraparounds
-// Accept packet if not within program bounds
-#define ASSERT_IN_PROGRAM_BOUNDS(p) ASSERT_RETURN(IN_PROGRAM_BOUNDS(p))
-// Accept packet if not within ram bounds
+                                 (p) + (size) <= ctx->ram_len && \
+                                 (p) + (size) >= (p))  /* catch wraparounds */
+/* Accept packet if not within ram bounds */
 #define ASSERT_IN_RAM_BOUNDS(p) ASSERT_RETURN(IN_RAM_BOUNDS(p))
-// Accept packet if not within packet bounds
+/* Accept packet if not within packet bounds */
 #define ASSERT_IN_PACKET_BOUNDS(p) ASSERT_RETURN(IN_PACKET_BOUNDS(p))
-// Accept packet if not within data bounds
+/* Accept packet if not within data bounds */
 #define ASSERT_IN_DATA_BOUNDS(p, size) ASSERT_RETURN(IN_DATA_BOUNDS(p, size))
 
-  // Program counter.
-  uint32_t pc = 0;
-// Accept packet if not within program or not ahead of program counter
-#define ASSERT_FORWARD_IN_PROGRAM(p) ASSERT_RETURN(IN_PROGRAM_BOUNDS(p) && (p) >= pc)
-  // Memory slot values.
-  uint32_t memory[MEMORY_ITEMS] = {};
-  // Fill in pre-filled memory slot values.
-  memory[MEMORY_OFFSET_OUTPUT_BUFFER_OFFSET] = 0;
-  memory[MEMORY_OFFSET_PROGRAM_SIZE] = program_len;
-  memory[MEMORY_OFFSET_DATA_SIZE] = ram_len;
-  memory[MEMORY_OFFSET_PACKET_SIZE] = packet_len;
-  memory[MEMORY_OFFSET_FILTER_AGE] = filter_age_16384ths >> 14;
-  ASSERT_IN_PACKET_BOUNDS(APF_FRAME_HEADER_SIZE);
-  // Only populate if IP version is IPv4.
-  if ((packet[APF_FRAME_HEADER_SIZE] & 0xf0) == 0x40) {
-      memory[MEMORY_OFFSET_IPV4_HEADER_SIZE] = (packet[APF_FRAME_HEADER_SIZE] & 15) * 4;
-  }
-  // Register values.
-  uint32_t registers[2] = {};
-  // Count of instructions remaining to execute. This is done to ensure an
-  // upper bound on execution time. It should never be hit and is only for
-  // safety. Initialize to the number of bytes in the program which is an
-  // upper bound on the number of instructions in the program.
-  uint32_t instructions_remaining = program_len;
+    /* Counters start at end of RAM and count *backwards* so this array takes negative integers. */
+    u32 *counter = (u32*)(ctx->program + ctx->ram_len);
 
-  // The output buffer pointer
-  uint8_t* allocated_buffer = NULL;
-  // The length of the output buffer
-  uint32_t allocated_buffer_len = 0;
-// Is access to offset |p| length |size| within output buffer bounds?
+    ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
+    /* Only populate if IP version is IPv4. */
+    if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
+        ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
+    }
+    /* Count of instructions remaining to execute. This is done to ensure an */
+    /* upper bound on execution time. It should never be hit and is only for */
+    /* safety. Initialize to the number of bytes in the program which is an */
+    /* upper bound on the number of instructions in the program. */
+    u32 instructions_remaining = ctx->program_len;
+
+/* Is access to offset |p| length |size| within output buffer bounds? */
 #define IN_OUTPUT_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
                                  ENFORCE_UNSIGNED(size) && \
-                                 (p) + (size) <= allocated_buffer_len && \
+                                 (p) + (size) <= ctx->tx_buf_len && \
                                  (p) + (size) >= (p))
-// Accept packet if not write within allocated output buffer
+/* Accept packet if not write within allocated output buffer */
 #define ASSERT_IN_OUTPUT_BOUNDS(p, size) ASSERT_RETURN(IN_OUTPUT_BOUNDS(p, size))
 
-// Decode the imm length.
-#define DECODE_IMM(value, length)                                              \
-    do {                                                                       \
-        uint32_t i;                                                            \
-        for (i = 0; i < (length) && pc < program_len; i++)                     \
-            value = (value << 8) | program[pc++];                              \
-    } while (0)
+    do {
+        APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
+                       ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
+        if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
+        if (ctx->pc >= ctx->program_len) return PASS_PACKET;
 
-  do {
-      APF_TRACE_HOOK(pc, registers, program, program_len, packet, packet_len, memory, ram_len);
-      if (pc == program_len) {
-          return PASS_PACKET;
-      } else if (pc == (program_len + 1)) {
-          return DROP_PACKET;
-      }
-      ASSERT_IN_PROGRAM_BOUNDS(pc);
-      const uint8_t bytecode = program[pc++];
-      const uint32_t opcode = EXTRACT_OPCODE(bytecode);
-      const uint32_t reg_num = EXTRACT_REGISTER(bytecode);
-#define REG (registers[reg_num])
-#define OTHER_REG (registers[reg_num ^ 1])
-      // All instructions have immediate fields, so load them now.
-      const uint32_t len_field = EXTRACT_IMM_LENGTH(bytecode);
-      uint32_t imm = 0;
-      int32_t signed_imm = 0;
-      if (len_field != 0) {
-          const uint32_t imm_len = 1 << (len_field - 1);
-          ASSERT_FORWARD_IN_PROGRAM(pc + imm_len - 1);
-          DECODE_IMM(imm, imm_len);
-          // Sign extend imm into signed_imm.
-          signed_imm = (int32_t) (imm << ((4 - imm_len) * 8));
-          signed_imm >>= (4 - imm_len) * 8;
-      }
+        const u8 bytecode = ctx->program[ctx->pc++];
+        const u32 opcode = EXTRACT_OPCODE(bytecode);
+        const u32 reg_num = EXTRACT_REGISTER(bytecode);
+#define REG (ctx->R[reg_num])
+#define OTHER_REG (ctx->R[reg_num ^ 1])
+        /* All instructions have immediate fields, so load them now. */
+        const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+        u32 imm = 0;
+        s32 signed_imm = 0;
+        if (len_field != 0) {
+            const u32 imm_len = 1 << (len_field - 1);
+            imm = decode_imm(ctx, imm_len); /* 1st imm, at worst bytes 1-4 past opcode/program_len */
+            /* Sign extend imm into signed_imm. */
+            signed_imm = (s32)(imm << ((4 - imm_len) * 8));
+            signed_imm >>= (4 - imm_len) * 8;
+        }
 
-      switch (opcode) {
+        /* See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm. */
+#define ARITH_REG (ctx->R[reg_num & ctx->v6])
+        u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
+        s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
+
+        u32 pktcopy_src_offset = 0;  /* used for various pktdatacopy opcodes */
+        switch (opcode) {
+          case PASSDROP_OPCODE: {  /* APFv6+ */
+            if (len_field > 2) return PASS_PACKET;  /* max 64K counters (ie. imm < 64K) */
+            if (imm) {
+                if (4 * imm > ctx->ram_len) return PASS_PACKET;
+                counter[-(s32)imm]++;
+            }
+            return reg_num ? DROP_PACKET : PASS_PACKET;
+          }
           case LDB_OPCODE:
           case LDH_OPCODE:
           case LDW_OPCODE:
           case LDBX_OPCODE:
           case LDHX_OPCODE:
           case LDWX_OPCODE: {
-              uint32_t offs = imm;
-              if (opcode >= LDBX_OPCODE) {
-                  // Note: this can overflow and actually decrease offs.
-                  offs += registers[1];
-              }
-              ASSERT_IN_PACKET_BOUNDS(offs);
-              uint32_t load_size = 0;
-              switch (opcode) {
-                  case LDB_OPCODE:
-                  case LDBX_OPCODE:
-                    load_size = 1;
-                    break;
-                  case LDH_OPCODE:
-                  case LDHX_OPCODE:
-                    load_size = 2;
-                    break;
-                  case LDW_OPCODE:
-                  case LDWX_OPCODE:
-                    load_size = 4;
-                    break;
-                  // Immediately enclosing switch statement guarantees
-                  // opcode cannot be any other value.
-              }
-              const uint32_t end_offs = offs + (load_size - 1);
-              // Catch overflow/wrap-around.
-              ASSERT_RETURN(end_offs >= offs);
-              ASSERT_IN_PACKET_BOUNDS(end_offs);
-              uint32_t val = 0;
-              while (load_size--)
-                  val = (val << 8) | packet[offs++];
-              REG = val;
-              break;
+            u32 offs = imm;
+            /* Note: this can overflow and actually decrease offs. */
+            if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
+            ASSERT_IN_PACKET_BOUNDS(offs);
+            u32 load_size = 0;
+            switch (opcode) {
+              case LDB_OPCODE:
+              case LDBX_OPCODE:
+                load_size = 1;
+                break;
+              case LDH_OPCODE:
+              case LDHX_OPCODE:
+                load_size = 2;
+                break;
+              case LDW_OPCODE:
+              case LDWX_OPCODE:
+                load_size = 4;
+                break;
+              /* Immediately enclosing switch statement guarantees */
+              /* opcode cannot be any other value. */
+            }
+            const u32 end_offs = offs + (load_size - 1);
+            /* Catch overflow/wrap-around. */
+            ASSERT_RETURN(end_offs >= offs);
+            ASSERT_IN_PACKET_BOUNDS(end_offs);
+            u32 val = 0;
+            while (load_size--) val = (val << 8) | ctx->packet[offs++];
+            REG = val;
+            break;
           }
           case JMP_OPCODE:
-              // This can jump backwards. Infinite looping prevented by instructions_remaining.
-              pc += imm;
-              break;
+            if (reg_num && !ctx->v6) {  /* APFv6+ */
+                /* First invocation of APFv6 jmpdata instruction */
+                counter[-1] = 0x12345678;  /* endianness marker */
+                counter[-2]++;  /* total packets ++ */
+                ctx->v6 = (u8)True;
+            }
+            /* This can jump backwards. Infinite looping prevented by instructions_remaining. */
+            ctx->pc += imm;
+            break;
           case JEQ_OPCODE:
           case JNE_OPCODE:
           case JGT_OPCODE:
           case JLT_OPCODE:
-          case JSET_OPCODE:
-          case JNEBS_OPCODE: {
-              // Load second immediate field.
-              uint32_t cmp_imm = 0;
-              if (reg_num == 1) {
-                  cmp_imm = registers[1];
-              } else if (len_field != 0) {
-                  uint32_t cmp_imm_len = 1 << (len_field - 1);
-                  ASSERT_FORWARD_IN_PROGRAM(pc + cmp_imm_len - 1);
-                  DECODE_IMM(cmp_imm, cmp_imm_len);
-              }
-              switch (opcode) {
-                  case JEQ_OPCODE:
-                      if (registers[0] == cmp_imm)
-                          pc += imm;
-                      break;
-                  case JNE_OPCODE:
-                      if (registers[0] != cmp_imm)
-                          pc += imm;
-                      break;
-                  case JGT_OPCODE:
-                      if (registers[0] > cmp_imm)
-                          pc += imm;
-                      break;
-                  case JLT_OPCODE:
-                      if (registers[0] < cmp_imm)
-                          pc += imm;
-                      break;
-                  case JSET_OPCODE:
-                      if (registers[0] & cmp_imm)
-                          pc += imm;
-                      break;
-                  case JNEBS_OPCODE: {
-                      // cmp_imm is size in bytes of data to compare.
-                      // pc is offset of program bytes to compare.
-                      // imm is jump target offset.
-                      // REG is offset of packet bytes to compare.
-                      ASSERT_FORWARD_IN_PROGRAM(pc + cmp_imm - 1);
-                      ASSERT_IN_PACKET_BOUNDS(REG);
-                      const uint32_t last_packet_offs = REG + cmp_imm - 1;
-                      ASSERT_RETURN(last_packet_offs >= REG);
-                      ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                      if (memcmp(program + pc, packet + REG, cmp_imm))
-                          pc += imm;
-                      // skip past comparison bytes
-                      pc += cmp_imm;
-                      break;
-                  }
-              }
-              break;
+          case JSET_OPCODE: {
+            /* with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op */
+            if (len_field == 0) break;
+            /* Load second immediate field. */
+            u32 cmp_imm = 0;
+            if (reg_num == 1) {
+                cmp_imm = ctx->R[1];
+            } else {
+                u32 cmp_imm_len = 1 << (len_field - 1);
+                cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+            }
+            switch (opcode) {
+              case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
+              case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
+              case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
+              case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
+              case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
+            }
+            break;
           }
-          case ADD_OPCODE:
-              registers[0] += reg_num ? registers[1] : imm;
-              break;
-          case MUL_OPCODE:
-              registers[0] *= reg_num ? registers[1] : imm;
-              break;
-          case DIV_OPCODE: {
-              const uint32_t div_operand = reg_num ? registers[1] : imm;
-              ASSERT_RETURN(div_operand);
-              registers[0] /= div_operand;
-              break;
+          case JBSMATCH_OPCODE: {
+            /* with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op */
+            if (len_field == 0) break;
+            /* Load second immediate field. */
+            u32 cmp_imm_len = 1 << (len_field - 1);
+            u32 cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+            /* cmp_imm is size in bytes of data to compare. */
+            /* pc is offset of program bytes to compare. */
+            /* imm is jump target offset. */
+            /* R0 is offset of packet bytes to compare. */
+            if (cmp_imm > 0xFFFF) return PASS_PACKET;
+            Boolean do_jump = !reg_num;
+            /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
+            if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
+            ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+            const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+            ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+            ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+            do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+            /* skip past comparison bytes */
+            ctx->pc += cmp_imm;
+            if (do_jump) ctx->pc += imm;
+            break;
           }
-          case AND_OPCODE:
-              registers[0] &= reg_num ? registers[1] : imm;
-              break;
-          case OR_OPCODE:
-              registers[0] |= reg_num ? registers[1] : imm;
-              break;
-          case SH_OPCODE: {
-              const int32_t shift_val = reg_num ? (int32_t)registers[1] : signed_imm;
-              if (shift_val > 0)
-                  registers[0] <<= shift_val;
-              else
-                  registers[0] >>= -shift_val;
-              break;
+          /* There is a difference in APFv4 and APFv6 arithmetic behaviour! */
+          /* APFv4:  R[0] op= Rbit ? R[1] : imm;  (and it thus doesn't make sense to have R=1 && len_field>0) */
+          /* APFv6+: REG  op= len_field ? imm : OTHER_REG;  (note: this is *DIFFERENT* with R=1 len_field==0) */
+          /* Furthermore APFv4 uses unsigned imm (except SH), while APFv6 uses signed_imm for ADD/AND/SH. */
+          case ADD_OPCODE: ARITH_REG += (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case MUL_OPCODE: ARITH_REG *= arith_imm; break;
+          case AND_OPCODE: ARITH_REG &= (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case OR_OPCODE:  ARITH_REG |= arith_imm; break;
+          case DIV_OPCODE: {  /* see above comment! */
+            const u32 div_operand = arith_imm;
+            ASSERT_RETURN(div_operand);
+            ARITH_REG /= div_operand;
+            break;
+          }
+          case SH_OPCODE: {  /* see above comment! */
+            if (arith_signed_imm >= 0)
+                ARITH_REG <<= arith_signed_imm;
+            else
+                ARITH_REG >>= -arith_signed_imm;
+            break;
           }
           case LI_OPCODE:
-              REG = (uint32_t) signed_imm;
-              break;
+            REG = (u32)signed_imm;
+            break;
+          case PKTDATACOPY_OPCODE:
+            pktcopy_src_offset = imm;
+            imm = PKTDATACOPYIMM_EXT_OPCODE;
+            FALLTHROUGH;
           case EXT_OPCODE:
-              if (
-// If LDM_EXT_OPCODE is 0 and imm is compared with it, a compiler error will result,
-// instead just enforce that imm is unsigned (so it's always greater or equal to 0).
-#if LDM_EXT_OPCODE == 0
-                  ENFORCE_UNSIGNED(imm) &&
-#else
-                  imm >= LDM_EXT_OPCODE &&
-#endif
-                  imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
-                REG = memory[imm - LDM_EXT_OPCODE];
-              } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
-                memory[imm - STM_EXT_OPCODE] = REG;
-              } else switch (imm) {
-                  case NOT_EXT_OPCODE:
-                    REG = ~REG;
-                    break;
-                  case NEG_EXT_OPCODE:
-                    REG = -REG;
-                    break;
-                  case SWAP_EXT_OPCODE: {
-                    uint32_t tmp = REG;
-                    REG = OTHER_REG;
-                    OTHER_REG = tmp;
-                    break;
-                  }
-                  case MOV_EXT_OPCODE:
-                    REG = OTHER_REG;
-                    break;
-                  case ALLOC_EXT_OPCODE:
-                    ASSERT_RETURN(allocated_buffer == NULL);
-                    allocated_buffer_len = REG;
-                    allocated_buffer =
-                        apf_allocate_buffer(ctx, allocated_buffer_len);
-                    ASSERT_RETURN(allocated_buffer != NULL);
-                    memory[MEMORY_OFFSET_OUTPUT_BUFFER_OFFSET] = 0;
-                    break;
-                  case TRANS_EXT_OPCODE:
-                    ASSERT_RETURN(allocated_buffer != NULL);
-                    uint32_t pkt_len = memory[MEMORY_OFFSET_OUTPUT_BUFFER_OFFSET];
-                    // If pkt_len > allocate_buffer_len, it means sth. wrong
-                    // happened and the allocated_buffer should be deallocated.
-                    if (pkt_len > allocated_buffer_len) {
-                        apf_transmit_buffer(
-                            ctx,
-                            allocated_buffer,
-                            0 /* len */,
-                            0 /* dscp */);
-                        return PASS_PACKET;
-                    }
-                    // TODO: calculate packet checksum and get dscp
-                    apf_transmit_buffer(
-                        ctx,
-                        allocated_buffer,
-                        pkt_len,
-                        0 /* dscp */);
-                    allocated_buffer = NULL;
-                    break;
-                  case DATA_EXT_OPCODE: {
-                    ASSERT_FORWARD_IN_PROGRAM(pc + 1);
-                    uint32_t skip_len = 0;
-                    DECODE_IMM(skip_len, 2);
-                    ASSERT_FORWARD_IN_PROGRAM(pc + skip_len - 1);
-                    pc += skip_len;
-                    break;
-                  }
-                  // Unknown extended opcode
-                  default:
-                    // Bail out
+            if (/* imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0 */
+                imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
+                REG = ctx->mem.slot[imm - LDM_EXT_OPCODE];
+            } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
+                ctx->mem.slot[imm - STM_EXT_OPCODE] = REG;
+            } else switch (imm) {
+              case NOT_EXT_OPCODE: REG = ~REG;      break;
+              case NEG_EXT_OPCODE: REG = -REG;      break;
+              case MOV_EXT_OPCODE: REG = OTHER_REG; break;
+              case SWAP_EXT_OPCODE: {
+                u32 tmp = REG;
+                REG = OTHER_REG;
+                OTHER_REG = tmp;
+                break;
+              }
+              case ALLOCATE_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf == NULL);
+                if (reg_num == 0) {
+                    ctx->tx_buf_len = REG;
+                } else {
+                    ctx->tx_buf_len = decode_be16(ctx); /* 2nd imm, at worst 6 B past prog_len */
+                }
+                /* checksumming functions requires minimum 266 byte buffer for correctness */
+                if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
+                ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
+                if (!ctx->tx_buf) {  /* allocate failure */
+                    ctx->tx_buf_len = 0;
+                    counter[-3]++;
                     return PASS_PACKET;
+                }
+                memset(ctx->tx_buf, 0, ctx->tx_buf_len);
+                ctx->mem.named.tx_buf_offset = 0;
+                break;
+              case TRANSMIT_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 pkt_len = ctx->mem.named.tx_buf_offset;
+                /* If pkt_len > allocate_buffer_len, it means sth. wrong */
+                /* happened and the tx_buf should be deallocated. */
+                if (pkt_len > ctx->tx_buf_len) {
+                    do_discard_buffer(ctx);
+                    return PASS_PACKET;
+                }
+                /* tx_buf_len cannot be large because we'd run out of RAM, */
+                /* so the above unsigned comparison effectively guarantees casting pkt_len */
+                /* to a signed value does not result in it going negative. */
+                u8 ip_ofs = DECODE_U8();              /* 2nd imm, at worst 5 B past prog_len */
+                u8 csum_ofs = DECODE_U8();            /* 3rd imm, at worst 6 B past prog_len */
+                u8 csum_start = 0;
+                u16 partial_csum = 0;
+                if (csum_ofs < 255) {
+                    csum_start = DECODE_U8();         /* 4th imm, at worst 7 B past prog_len */
+                    partial_csum = decode_be16(ctx);  /* 5th imm, at worst 9 B past prog_len */
+                }
+                int dscp = apf_internal_csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
+                                                partial_csum, csum_start, csum_ofs,
+                                                (Boolean)reg_num);
+                int ret = apf_internal_do_transmit_buffer(ctx, pkt_len, dscp);
+                if (ret) { counter[-4]++; return PASS_PACKET; } /* transmit failure */
+                break;
+              case EPKTDATACOPYIMM_EXT_OPCODE:  /* 41 */
+              case EPKTDATACOPYR1_EXT_OPCODE:   /* 42 */
+                pktcopy_src_offset = ctx->R[0];
+                FALLTHROUGH;
+              case PKTDATACOPYIMM_EXT_OPCODE: { /* 65536 */
+                u32 copy_len = ctx->R[1];
+                if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
+                    copy_len = DECODE_U8();  /* 2nd imm, at worst 8 bytes past prog_len */
+                }
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 dst_offs = ctx->mem.named.tx_buf_offset;
+                ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
+                if (reg_num == 0) {  /* copy from packet */
+                    ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
+                    const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
+                    ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
+                    ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
+                } else {  /* copy from data */
+                    ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
+                }
+                dst_offs += copy_len;
+                ctx->mem.named.tx_buf_offset = dst_offs;
+                break;
               }
-              break;
-          case LDDW_OPCODE: {
-              uint32_t offs = OTHER_REG + (uint32_t) signed_imm;
-              uint32_t size = 4;
-              uint32_t val = 0;
-              // Negative offsets wrap around the end of the address space.
-              // This allows us to efficiently access the end of the
-              // address space with one-byte immediates without using %=.
-              if (offs & 0x80000000) {
-                  offs = ram_len + offs;  // unsigned overflow intended
+              case JDNSQMATCH_EXT_OPCODE:       /* 43 */
+              case JDNSAMATCH_EXT_OPCODE:       /* 44 */
+              case JDNSQMATCHSAFE_EXT_OPCODE:   /* 45 */
+              case JDNSAMATCHSAFE_EXT_OPCODE: { /* 46 */
+                const u32 imm_len = 1 << (len_field - 1); /* EXT_OPCODE, thus len_field > 0 */
+                u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
+                int qtype = -1;
+                if (imm & 1) { /* JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes */
+                    qtype = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
+                }
+                u32 udp_payload_offset = ctx->R[0];
+                match_result_type match_rst = apf_internal_match_names(ctx->program + ctx->pc,
+                                                          ctx->program + ctx->program_len,
+                                                          ctx->packet + udp_payload_offset,
+                                                          ctx->packet_len - udp_payload_offset,
+                                                          qtype);
+                if (match_rst == error_program) return PASS_PACKET;
+                if (match_rst == error_packet) {
+                    counter[-5]++; /* increment error dns packet counter */
+                    return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
+                }
+                while (ctx->pc + 1 < ctx->program_len &&
+                       (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
+                    ctx->pc++;
+                }
+                ctx->pc += 2;  /* skip the final double 0 needle end */
+                /* relies on reg_num in {0,1} and match_rst being {False=0, True=1} */
+                if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
+                break;
               }
-              ASSERT_IN_DATA_BOUNDS(offs, size);
-              while (size--)
-                  val = (val << 8) | program[offs++];
-              REG = val;
-              break;
+              case EWRITE1_EXT_OPCODE:
+              case EWRITE2_EXT_OPCODE:
+              case EWRITE4_EXT_OPCODE: {
+                ASSERT_RETURN(ctx->tx_buf);
+                const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
+                ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+                u32 i;
+                for (i = 0; i < write_len; ++i) {
+                    ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                        (u8)(REG >> (write_len - 1 - i) * 8);
+                }
+                break;
+              }
+              case JONEOF_EXT_OPCODE: {
+                const u32 imm_len = 1 << (len_field - 1); /* ext opcode len_field guaranteed > 0 */
+                u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
+                u8 imm3 = DECODE_U8();  /* 3rd imm, at worst 9 bytes past prog_len */
+                Boolean jmp = imm3 & 1;  /* =0 jmp on match, =1 jmp on no match */
+                u8 len = ((imm3 >> 1) & 3) + 1;  /* size [1..4] in bytes of an element */
+                u8 cnt = (imm3 >> 3) + 1;  /* number [1..32] of elements in set */
+                if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
+                while (cnt--) {
+                    u32 v = 0;
+                    int i;
+                    for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
+                    if (REG == v) jmp ^= True;
+                }
+                if (jmp) ctx->pc += jump_offs;
+                return PASS_PACKET;
+              }
+              default:  /* Unknown extended opcode */
+                return PASS_PACKET;  /* Bail out */
+            }
+            break;
+          case LDDW_OPCODE:
+          case STDW_OPCODE:
+            if (ctx->v6) {
+                if (!imm) return PASS_PACKET;
+                if (imm > 0xFFFF) return PASS_PACKET;
+                if (imm * 4 > ctx->ram_len) return PASS_PACKET;
+                if (opcode == LDDW_OPCODE) {
+                    REG = counter[-(s32)imm];
+                } else {
+                    counter[-(s32)imm] = REG;
+                }
+            } else {
+                u32 offs = OTHER_REG + (u32)signed_imm;
+                /* Negative offsets wrap around the end of the address space. */
+                /* This allows us to efficiently access the end of the */
+                /* address space with one-byte immediates without using %=. */
+                if (offs & 0x80000000) offs += ctx->ram_len;  /* unsigned overflow intended */
+                u32 size = 4;
+                ASSERT_IN_DATA_BOUNDS(offs, size);
+                if (opcode == LDDW_OPCODE) {
+                    u32 val = 0;
+                    while (size--) val = (val << 8) | ctx->program[offs++];
+                    REG = val;
+                } else {
+                    u32 val = REG;
+                    while (size--) {
+                        ctx->program[offs++] = (val >> 24);
+                        val <<= 8;
+                    }
+                }
+            }
+            break;
+          case WRITE_OPCODE: {
+            ASSERT_RETURN(ctx->tx_buf);
+            ASSERT_RETURN(len_field);
+            const u32 write_len = 1 << (len_field - 1);
+            ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+            u32 i;
+            for (i = 0; i < write_len; ++i) {
+                ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                    (u8)(imm >> (write_len - 1 - i) * 8);
+            }
+            break;
           }
-          case STDW_OPCODE: {
-              uint32_t offs = OTHER_REG + (uint32_t) signed_imm;
-              uint32_t size = 4;
-              uint32_t val = REG;
-              // Negative offsets wrap around the end of the address space.
-              // This allows us to efficiently access the end of the
-              // address space with one-byte immediates without using %=.
-              if (offs & 0x80000000) {
-                  offs = ram_len + offs;  // unsigned overflow intended
-              }
-              ASSERT_IN_DATA_BOUNDS(offs, size);
-              while (size--) {
-                  program[offs++] = (val >> 24);
-                  val <<= 8;
-              }
-              break;
-          }
-          case MEMCOPY_OPCODE: {
-              ASSERT_RETURN(allocated_buffer != NULL);
-              ASSERT_RETURN(len_field > 0);
-              uint32_t src_offs = imm;
-              uint32_t copy_len = 0;
-              DECODE_IMM(copy_len, 1);
-              uint32_t dst_offs = memory[MEMORY_OFFSET_OUTPUT_BUFFER_OFFSET];
-              ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
-              // reg_num == 0 copy from packet, reg_num == 1 copy from data.
-              if (reg_num == 0) {
-                  ASSERT_IN_PACKET_BOUNDS(src_offs);
-                  const uint32_t last_packet_offs = src_offs + copy_len - 1;
-                  ASSERT_RETURN(last_packet_offs >= src_offs);
-                  ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
-                  memmove(allocated_buffer + dst_offs, packet + src_offs,
-                          copy_len);
-              } else {
-                  ASSERT_IN_RAM_BOUNDS(src_offs + copy_len - 1);
-                  memmove(allocated_buffer + dst_offs, program + src_offs,
-                          copy_len);
-              }
-              dst_offs += copy_len;
-              memory[MEMORY_OFFSET_OUTPUT_BUFFER_OFFSET] = dst_offs;
-              break;
-          }
-          // Unknown opcode
-          default:
-              // Bail out
-              return PASS_PACKET;
-      }
-  } while (instructions_remaining--);
-  return PASS_PACKET;
+          default:  /* Unknown opcode */
+            return PASS_PACKET;  /* Bail out */
+        }
+    } while (instructions_remaining--);
+    return PASS_PACKET;
+}
+
+int apf_run(void* ctx, u32* const program, const u32 program_len,
+            const u32 ram_len, const u8* const packet,
+            const u32 packet_len, const u32 filter_age_16384ths) {
+    /* Due to direct 32-bit read/write access to counters at end of ram */
+    /* APFv6 interpreter requires program & ram_len to be 4 byte aligned. */
+    if (3 & (uintptr_t)program) return PASS_PACKET;
+    if (3 & ram_len) return PASS_PACKET;
+
+    /* We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB */
+    /* Similarly LDDW/STDW have special meaning for negative ram offsets. */
+    /* We also don't want garbage like program_len == 0xFFFFFFFF */
+    if ((program_len | ram_len) >> 31) return PASS_PACKET;
+
+    /* APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid */
+    /* This cannot wrap due to previous check. */
+    if (program_len + 20 > ram_len) return PASS_PACKET;
+
+    apf_context apf_ctx = { 0 };
+    apf_ctx.caller_ctx = ctx;
+    apf_ctx.program = (u8*)program;
+    apf_ctx.program_len = program_len;
+    apf_ctx.ram_len = ram_len;
+    apf_ctx.packet = packet;
+    apf_ctx.packet_len = packet_len;
+    /* Fill in pre-filled memory slot values. */
+    apf_ctx.mem.named.program_size = program_len;
+    apf_ctx.mem.named.ram_len = ram_len;
+    apf_ctx.mem.named.packet_size = packet_len;
+    apf_ctx.mem.named.apf_version = apf_version();
+    apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
+    apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
+
+    int ret = do_apf_run(&apf_ctx);
+    if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
+    return ret;
 }
diff --git a/v5/apf_interpreter.h b/v5/apf_interpreter.h
index 6ef4cc9..fe3e7af 100644
--- a/v5/apf_interpreter.h
+++ b/v5/apf_interpreter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023, The Android Open Source Project
+ * Copyright 2024, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,12 +33,15 @@
  * Allocates a buffer for the APF program to build a reply packet.
  *
  * Unless in a critical low memory state, the firmware must allow allocating at
- * least one 1500 byte buffer for every call to apf_run(). The interpreter will
+ * least one 1514 byte buffer for every call to apf_run(). The interpreter will
  * have at most one active allocation at any given time, and will always either
  * transmit or deallocate the buffer before apf_run() returns.
  *
  * It is OK if the firmware decides to limit allocations to at most one per
- * apf_run() invocation.
+ * apf_run() invocation. This allows the firmware to delay transmitting
+ * the buffer until after apf_run() has returned (by keeping track of whether
+ * a buffer was allocated/deallocated/scheduled for transmit) and may
+ * allow the use of a single statically allocated 1514+ byte buffer.
  *
  * The firmware MAY choose to allocate a larger buffer than requested, and
  * give the apf_interpreter a pointer to the middle of the buffer. This will
@@ -49,7 +52,7 @@
  * @param size - the minimum size of buffer to allocate
  * @return the pointer to the allocated region. The function can return NULL to
  *         indicate allocation failure, for example if too many buffers are
- *         pending transmit. Returning NULL will most likely result in the
+ *         pending transmit. Returning NULL will most likely result in
  *         apf_run() returning PASS.
  */
 uint8_t* apf_allocate_buffer(void* ctx, uint32_t size);
@@ -62,27 +65,27 @@
  *
  * The content of the buffer between [ptr, ptr + len) are the bytes to be
  * transmitted, starting from the ethernet header and not including any
- * CRC bytes at the end.
+ * ethernet CRC bytes at the end.
  *
  * The firmware is expected to make its best effort to transmit. If it
  * exhausts retries, or if there is no channel for too long and the transmit
  * queue is full, then it is OK for the packet to be dropped. The firmware should
- * prefer to fail allocation if transmit is likely to fail.
+ * prefer to fail allocation if it can predict transmit will fail.
  *
- * apf_transmit_buffer() should be asynchronous, which means the actual packet
+ * apf_transmit_buffer() may be asynchronous, which means the actual packet
  * transmission can happen sometime after the function returns.
  *
  * @param ctx - unmodified ctx pointer passed into apf_run().
  * @param ptr - pointer to the transmit buffer, must have been previously
- *             returned by apf_allocate_buffer and not deallocated.
+ *              returned by apf_allocate_buffer() and not deallocated.
  * @param len - the number of bytes to be transmitted (possibly less than
  *              the allocated buffer), 0 means don't transmit the buffer
- *              but only deallocate it
- * @param dscp - the upper 6 bits of the TOS field in the IPv4 header or traffic
- *             class field in the IPv6 header.
+ *              but only deallocate it.
+ * @param dscp - value in [0..63] - the upper 6 bits of the TOS field in
+ *               the IPv4 header or traffic class field in the IPv6 header.
  * @return non-zero if the firmware *knows* the transmit will fail, zero if
- *         the firmware thinks the transmit will succeed. Returning an error
- *         will likely result in apf_run() returning PASS.
+ *         the transmit succeeded or the firmware thinks it will succeed.
+ *         Returning an error will likely result in apf_run() returning PASS.
  */
 int apf_transmit_buffer(void* ctx, uint8_t* ptr, uint32_t len, uint8_t dscp);
 
@@ -90,7 +93,7 @@
  * Runs an APF program over a packet.
  *
  * The return value of apf_run indicates whether the packet should
- * be passed or dropped. As a part of apf_run execution, the APF
+ * be passed or dropped. As a part of apf_run() execution, the APF
  * program can call apf_allocate_buffer()/apf_transmit_buffer() to construct
  * a reply packet and transmit it.
  *
@@ -104,25 +107,54 @@
  *        +--------------------+------------------------+
  *
  * @param ctx - pointer to any additional context required for allocation and transmit.
-                may be null if no such context is required. this is opaque to
-                the interpreter and will be passed through unmodified
-                to apf_allocate_buffer() and apf_transmit_buffer() calls.
+ *              May be NULL if no such context is required. This is opaque to
+ *              the interpreter and will be passed through unmodified
+ *              to apf_allocate_buffer() and apf_transmit_buffer() calls.
  * @param program - the program bytecode, followed by the writable data region.
+ *                  Note: this *MUST* be a 4 byte aligned region.
  * @param program_len - the length in bytes of the read-only portion of the APF
  *                    buffer pointed to by {@code program}.
+ *                    This is determined by the size of the loaded APF program.
  * @param ram_len - total length of the APF buffer pointed to by
  *                  {@code program}, including the read-only bytecode
  *                  portion and the read-write data portion.
+ *                  This is expected to be a constant which doesn't change
+ *                  value even when a new APF program is loaded.
+ *                  Note: this *MUST* be a multiple of 4.
  * @param packet - the packet bytes, starting from the ethernet header.
  * @param packet_len - the length of {@code packet} in bytes, not
  *                     including trailers/CRC.
  * @param filter_age_16384ths - the number of 1/16384 seconds since the filter
  *                     was programmed.
  *
- * @return non-zero if packet should be passed, zero if packet should
- *                  be dropped.
+ * @return non-zero if packet should be passed,
+ *         zero if packet should be dropped.
+ *
+ * NOTE: How to calculate filter_age_16384ths:
+ *
+ * - if you have a u64 clock source counting nanoseconds:
+ *     u64 nanoseconds = current_nanosecond_time_u64() - filter_installation_nanosecond_time_u64;
+ *     u32 filter_age_16384ths = (u32)((nanoseconds << 5) / 1953125);
+ *
+ * - if you have a u64 clock source counting microseconds:
+ *     u64 microseconds = current_microsecond_time_u64() - filter_installation_microsecond_time_u64;
+ *     u32 filter_age_16384ths = (u32)((microseconds << 8) / 15625);
+ *
+ * - if you have a u64 clock source counting milliseconds:
+ *     u64 milliseconds = current_millisecond_time_u64() - filter_installation_millisecond_time_u64;
+ *     u32 filter_age_16384ths = (u32)((milliseconds << 11) / 125);
+ *
+ * - if you have a u32 clock source counting milliseconds and cannot use 64-bit arithmetic:
+ *     u32 milliseconds = current_millisecond_time_u32() - filter_installation_millisecond_time_u32;
+ *     u32 filter_age_16384ths = ((((((milliseconds << 4) / 5) << 2) / 5) << 2) / 5) << 3;
+ *   or the less precise:
+ *     u32 filter_age_16384ths = ((milliseconds << 4) / 125) << 7;
+ *
+ * - if you have a u32 clock source counting seconds:
+ *     u32 seconds = current_second_time_u32() - filter_installation_second_time_u32;
+ *     u32 filter_age_16384ths = seconds << 14;
  */
-int apf_run(void* ctx, uint8_t* const program, const uint32_t program_len,
+int apf_run(void* ctx, uint32_t* const program, const uint32_t program_len,
             const uint32_t ram_len, const uint8_t* const packet,
             const uint32_t packet_len, const uint32_t filter_age_16384ths);
 
@@ -130,4 +162,4 @@
 }
 #endif
 
-#endif  // APF_INTERPRETER_V5_H_
+#endif  /* APF_INTERPRETER_V5_H_ */
diff --git a/v5/apf_interpreter_assemble.sh b/v5/apf_interpreter_assemble.sh
new file mode 100755
index 0000000..6650bf0
--- /dev/null
+++ b/v5/apf_interpreter_assemble.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+set -e
+set -u
+
+rename() {
+  sed -r 's@(^|[^A-Za-z0-9_])'"$1"'([^A-Za-z0-9_]|$)@\1'"$2"'\2@g;'
+}
+
+apf_internal_function() {
+  rename "$1" "apf_internal_$1"
+}
+
+do_assemble() {
+  local -r RE_INCLUDE='^#include "([a-z_]+[.]h)"$'
+  local -r RE_UNDEF='^#undef ([_A-Za-z0-9]+)$'
+  local -r RE_DEFINE='^#define ([_A-Za-z0-9]+) (.*)$'
+
+  local line
+  while IFS='' read -r line; do
+    if [[ "${line}" =~ ${RE_INCLUDE} ]]; then
+      local include_name="${BASH_REMATCH[1]}"
+      case "${include_name}" in
+        apf_interpreter.h)
+          echo "#include \"${BASH_REMATCH[1]}\""
+          ;;
+        *)
+          echo "/* Begin include of ${include_name} */"
+          cat "${include_name}"
+          echo "/* End include of ${include_name} */"
+          ;;
+      esac
+    elif [[ "${line}" =~ ${RE_UNDEF} ]]; then
+      case "${BASH_REMATCH[1]}" in
+        bool|true|false) : ;;
+        *) echo "${line}" ;;
+      esac
+    elif [[ "${line}" =~ ${RE_DEFINE} ]]; then
+      case "${BASH_REMATCH[1]}" in
+        bool|true|false) : ;;
+        *) echo "${line}" ;;
+      esac
+    else
+      echo "${line}"
+    fi
+  done < apf_interpreter_source.c \
+  | sed -r 's@(^|[^:])//(.*)$@\1/*\2 */@;'\
+  | rename bool Boolean \
+  | rename true True \
+  | rename false False \
+  | apf_internal_function match_single_name \
+  | apf_internal_function match_names \
+  | apf_internal_function calc_csum \
+  | apf_internal_function csum_and_return_dscp \
+  | apf_internal_function do_transmit_buffer
+  # The above sed converts // comments into /* */ comments for c89,
+  # and converts bool/true/false into Boolean/True/False
+  # and converts non-static functions to have an apf_internal_ prefix
+}
+
+do_test() {
+  diff -q <(do_assemble) apf_interpreter.c
+}
+
+main() {
+  cd "${0%/*}"
+
+  local -r me="${0##*/}"
+  case "${me}" in
+    apf_interpreter_assemble.sh)
+      do_assemble > apf_interpreter.c
+      ;;
+    apf_assemble_test.sh)
+      do_test
+      ;;
+    *)
+      echo "Unknown $0" 1>&2
+      return 1
+      ;;
+  esac
+}
+
+main "$@"; exit
diff --git a/v5/apf_interpreter_source.c b/v5/apf_interpreter_source.c
new file mode 100644
index 0000000..9740a18
--- /dev/null
+++ b/v5/apf_interpreter_source.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright 2024, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "apf_interpreter.h"
+
+#include <string.h>  // For memcmp, memcpy, memset
+
+#if __GNUC__ >= 7 || __clang__
+#define FALLTHROUGH __attribute__((fallthrough))
+#else
+#define FALLTHROUGH
+#endif
+
+#undef bool
+#undef true
+#undef false
+typedef enum { False, True } Boolean;
+#define bool Boolean
+#define true True
+#define false False
+
+#include "apf_defs.h"
+#include "apf.h"
+#include "apf_utils.h"
+#include "apf_dns.h"
+#include "apf_checksum.h"
+
+// User hook for interpreter debug tracing.
+#ifdef APF_TRACE_HOOK
+extern void APF_TRACE_HOOK(u32 pc, const u32* regs, const u8* program,
+                           u32 program_len, const u8 *packet, u32 packet_len,
+                           const u32* memory, u32 ram_len);
+#else
+#define APF_TRACE_HOOK(pc, regs, program, program_len, packet, packet_len, memory, memory_len) \
+    do { /* nop*/                                                                              \
+    } while (0)
+#endif
+
+// Return code indicating "packet" should accepted.
+#define PASS_PACKET 1
+// Return code indicating "packet" should be dropped.
+#define DROP_PACKET 0
+// Verify an internal condition and accept packet if it fails.
+#define ASSERT_RETURN(c) if (!(c)) return PASS_PACKET
+// If "c" is of an unsigned type, generate a compile warning that gets promoted to an error.
+// This makes bounds checking simpler because ">= 0" can be avoided. Otherwise adding
+// superfluous ">= 0" with unsigned expressions generates compile warnings.
+#define ENFORCE_UNSIGNED(c) ((c)==(u32)(c))
+
+u32 apf_version(void) {
+    return 20240315;
+}
+
+typedef struct {
+    void *caller_ctx;  // Passed in to interpreter, passed through to alloc/transmit.
+    u8* tx_buf;        // The output buffer pointer
+    u32 tx_buf_len;    // The length of the output buffer
+    u8* program;       // Pointer to program/data buffer
+    u32 program_len;   // Length of the program
+    u32 ram_len;       // Length of the entire apf program/data region
+    const u8* packet;  // Pointer to input packet buffer
+    u32 packet_len;    // Length of the input packet buffer
+    u8 v6;             // Set to 1 by first jmpdata (APFv6+) instruction
+    u32 pc;            // Program counter.
+    u32 R[2];          // Register values.
+    memory_type mem;   // Memory slot values.
+} apf_context;
+
+FUNC(int do_transmit_buffer(apf_context* ctx, u32 pkt_len, u8 dscp)) {
+    int ret = apf_transmit_buffer(ctx->caller_ctx, ctx->tx_buf, pkt_len, dscp);
+    ctx->tx_buf = NULL;
+    ctx->tx_buf_len = 0;
+    return ret;
+}
+
+static int do_discard_buffer(apf_context* ctx) {
+    return do_transmit_buffer(ctx, 0 /* pkt_len */, 0 /* dscp */);
+}
+
+// Decode the imm length, does not do range checking.
+// But note that program is at least 20 bytes shorter than ram, so first few
+// immediates can always be safely decoded without exceeding ram buffer.
+static u32 decode_imm(apf_context* ctx, u32 length) {
+    u32 i, v = 0;
+    for (i = 0; i < length; ++i) v = (v << 8) | ctx->program[ctx->pc++];
+    return v;
+}
+
+#define DECODE_U8() (ctx->program[ctx->pc++])
+
+static u16 decode_be16(apf_context* ctx) {
+    u16 v = ctx->program[ctx->pc++];
+    v <<= 8;
+    v |= ctx->program[ctx->pc++];
+    return v;
+}
+
+static int do_apf_run(apf_context* ctx) {
+// Is offset within ram bounds?
+#define IN_RAM_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < ctx->ram_len)
+// Is offset within packet bounds?
+#define IN_PACKET_BOUNDS(p) (ENFORCE_UNSIGNED(p) && (p) < ctx->packet_len)
+// Is access to offset |p| length |size| within data bounds?
+#define IN_DATA_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
+                                 ENFORCE_UNSIGNED(size) && \
+                                 (p) + (size) <= ctx->ram_len && \
+                                 (p) + (size) >= (p))  // catch wraparounds
+// Accept packet if not within ram bounds
+#define ASSERT_IN_RAM_BOUNDS(p) ASSERT_RETURN(IN_RAM_BOUNDS(p))
+// Accept packet if not within packet bounds
+#define ASSERT_IN_PACKET_BOUNDS(p) ASSERT_RETURN(IN_PACKET_BOUNDS(p))
+// Accept packet if not within data bounds
+#define ASSERT_IN_DATA_BOUNDS(p, size) ASSERT_RETURN(IN_DATA_BOUNDS(p, size))
+
+    // Counters start at end of RAM and count *backwards* so this array takes negative integers.
+    u32 *counter = (u32*)(ctx->program + ctx->ram_len);
+
+    ASSERT_IN_PACKET_BOUNDS(ETH_HLEN);
+    // Only populate if IP version is IPv4.
+    if ((ctx->packet[ETH_HLEN] & 0xf0) == 0x40) {
+        ctx->mem.named.ipv4_header_size = (ctx->packet[ETH_HLEN] & 15) * 4;
+    }
+    // Count of instructions remaining to execute. This is done to ensure an
+    // upper bound on execution time. It should never be hit and is only for
+    // safety. Initialize to the number of bytes in the program which is an
+    // upper bound on the number of instructions in the program.
+    u32 instructions_remaining = ctx->program_len;
+
+// Is access to offset |p| length |size| within output buffer bounds?
+#define IN_OUTPUT_BOUNDS(p, size) (ENFORCE_UNSIGNED(p) && \
+                                 ENFORCE_UNSIGNED(size) && \
+                                 (p) + (size) <= ctx->tx_buf_len && \
+                                 (p) + (size) >= (p))
+// Accept packet if not write within allocated output buffer
+#define ASSERT_IN_OUTPUT_BOUNDS(p, size) ASSERT_RETURN(IN_OUTPUT_BOUNDS(p, size))
+
+    do {
+        APF_TRACE_HOOK(ctx->pc, ctx->R, ctx->program, ctx->program_len,
+                       ctx->packet, ctx->packet_len, ctx->mem.slot, ctx->ram_len);
+        if (ctx->pc == ctx->program_len + 1) return DROP_PACKET;
+        if (ctx->pc >= ctx->program_len) return PASS_PACKET;
+
+        const u8 bytecode = ctx->program[ctx->pc++];
+        const u32 opcode = EXTRACT_OPCODE(bytecode);
+        const u32 reg_num = EXTRACT_REGISTER(bytecode);
+#define REG (ctx->R[reg_num])
+#define OTHER_REG (ctx->R[reg_num ^ 1])
+        // All instructions have immediate fields, so load them now.
+        const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+        u32 imm = 0;
+        s32 signed_imm = 0;
+        if (len_field != 0) {
+            const u32 imm_len = 1 << (len_field - 1);
+            imm = decode_imm(ctx, imm_len); // 1st imm, at worst bytes 1-4 past opcode/program_len
+            // Sign extend imm into signed_imm.
+            signed_imm = (s32)(imm << ((4 - imm_len) * 8));
+            signed_imm >>= (4 - imm_len) * 8;
+        }
+
+        // See comment at ADD_OPCODE for the reason for ARITH_REG/arith_imm/arith_signed_imm.
+#define ARITH_REG (ctx->R[reg_num & ctx->v6])
+        u32 arith_imm = (ctx->v6) ? (len_field ? imm : OTHER_REG) : (reg_num ? ctx->R[1] : imm);
+        s32 arith_signed_imm = (ctx->v6) ? (len_field ? signed_imm : (s32)OTHER_REG) : (reg_num ? (s32)ctx->R[1] : signed_imm);
+
+        u32 pktcopy_src_offset = 0;  // used for various pktdatacopy opcodes
+        switch (opcode) {
+          case PASSDROP_OPCODE: {  // APFv6+
+            if (len_field > 2) return PASS_PACKET;  // max 64K counters (ie. imm < 64K)
+            if (imm) {
+                if (4 * imm > ctx->ram_len) return PASS_PACKET;
+                counter[-(s32)imm]++;
+            }
+            return reg_num ? DROP_PACKET : PASS_PACKET;
+          }
+          case LDB_OPCODE:
+          case LDH_OPCODE:
+          case LDW_OPCODE:
+          case LDBX_OPCODE:
+          case LDHX_OPCODE:
+          case LDWX_OPCODE: {
+            u32 offs = imm;
+            // Note: this can overflow and actually decrease offs.
+            if (opcode >= LDBX_OPCODE) offs += ctx->R[1];
+            ASSERT_IN_PACKET_BOUNDS(offs);
+            u32 load_size = 0;
+            switch (opcode) {
+              case LDB_OPCODE:
+              case LDBX_OPCODE:
+                load_size = 1;
+                break;
+              case LDH_OPCODE:
+              case LDHX_OPCODE:
+                load_size = 2;
+                break;
+              case LDW_OPCODE:
+              case LDWX_OPCODE:
+                load_size = 4;
+                break;
+              // Immediately enclosing switch statement guarantees
+              // opcode cannot be any other value.
+            }
+            const u32 end_offs = offs + (load_size - 1);
+            // Catch overflow/wrap-around.
+            ASSERT_RETURN(end_offs >= offs);
+            ASSERT_IN_PACKET_BOUNDS(end_offs);
+            u32 val = 0;
+            while (load_size--) val = (val << 8) | ctx->packet[offs++];
+            REG = val;
+            break;
+          }
+          case JMP_OPCODE:
+            if (reg_num && !ctx->v6) {  // APFv6+
+                // First invocation of APFv6 jmpdata instruction
+                counter[-1] = 0x12345678;  // endianness marker
+                counter[-2]++;  // total packets ++
+                ctx->v6 = (u8)true;
+            }
+            // This can jump backwards. Infinite looping prevented by instructions_remaining.
+            ctx->pc += imm;
+            break;
+          case JEQ_OPCODE:
+          case JNE_OPCODE:
+          case JGT_OPCODE:
+          case JLT_OPCODE:
+          case JSET_OPCODE: {
+            // with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op
+            if (len_field == 0) break;
+            // Load second immediate field.
+            u32 cmp_imm = 0;
+            if (reg_num == 1) {
+                cmp_imm = ctx->R[1];
+            } else {
+                u32 cmp_imm_len = 1 << (len_field - 1);
+                cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+            }
+            switch (opcode) {
+              case JEQ_OPCODE:  if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
+              case JNE_OPCODE:  if (ctx->R[0] != cmp_imm) ctx->pc += imm; break;
+              case JGT_OPCODE:  if (ctx->R[0] >  cmp_imm) ctx->pc += imm; break;
+              case JLT_OPCODE:  if (ctx->R[0] <  cmp_imm) ctx->pc += imm; break;
+              case JSET_OPCODE: if (ctx->R[0] &  cmp_imm) ctx->pc += imm; break;
+            }
+            break;
+          }
+          case JBSMATCH_OPCODE: {
+            // with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op
+            if (len_field == 0) break;
+            // Load second immediate field.
+            u32 cmp_imm_len = 1 << (len_field - 1);
+            u32 cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+            // cmp_imm is size in bytes of data to compare.
+            // pc is offset of program bytes to compare.
+            // imm is jump target offset.
+            // R0 is offset of packet bytes to compare.
+            if (cmp_imm > 0xFFFF) return PASS_PACKET;
+            bool do_jump = !reg_num;
+            // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
+            if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return PASS_PACKET;
+            ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+            const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+            ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+            ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+            do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+            // skip past comparison bytes
+            ctx->pc += cmp_imm;
+            if (do_jump) ctx->pc += imm;
+            break;
+          }
+          // There is a difference in APFv4 and APFv6 arithmetic behaviour!
+          // APFv4:  R[0] op= Rbit ? R[1] : imm;  (and it thus doesn't make sense to have R=1 && len_field>0)
+          // APFv6+: REG  op= len_field ? imm : OTHER_REG;  (note: this is *DIFFERENT* with R=1 len_field==0)
+          // Furthermore APFv4 uses unsigned imm (except SH), while APFv6 uses signed_imm for ADD/AND/SH.
+          case ADD_OPCODE: ARITH_REG += (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case MUL_OPCODE: ARITH_REG *= arith_imm; break;
+          case AND_OPCODE: ARITH_REG &= (ctx->v6) ? (u32)arith_signed_imm : arith_imm; break;
+          case OR_OPCODE:  ARITH_REG |= arith_imm; break;
+          case DIV_OPCODE: {  // see above comment!
+            const u32 div_operand = arith_imm;
+            ASSERT_RETURN(div_operand);
+            ARITH_REG /= div_operand;
+            break;
+          }
+          case SH_OPCODE: {  // see above comment!
+            if (arith_signed_imm >= 0)
+                ARITH_REG <<= arith_signed_imm;
+            else
+                ARITH_REG >>= -arith_signed_imm;
+            break;
+          }
+          case LI_OPCODE:
+            REG = (u32)signed_imm;
+            break;
+          case PKTDATACOPY_OPCODE:
+            pktcopy_src_offset = imm;
+            imm = PKTDATACOPYIMM_EXT_OPCODE;
+            FALLTHROUGH;
+          case EXT_OPCODE:
+            if (// imm >= LDM_EXT_OPCODE &&  -- but note imm is u32 and LDM_EXT_OPCODE is 0
+                imm < (LDM_EXT_OPCODE + MEMORY_ITEMS)) {
+                REG = ctx->mem.slot[imm - LDM_EXT_OPCODE];
+            } else if (imm >= STM_EXT_OPCODE && imm < (STM_EXT_OPCODE + MEMORY_ITEMS)) {
+                ctx->mem.slot[imm - STM_EXT_OPCODE] = REG;
+            } else switch (imm) {
+              case NOT_EXT_OPCODE: REG = ~REG;      break;
+              case NEG_EXT_OPCODE: REG = -REG;      break;
+              case MOV_EXT_OPCODE: REG = OTHER_REG; break;
+              case SWAP_EXT_OPCODE: {
+                u32 tmp = REG;
+                REG = OTHER_REG;
+                OTHER_REG = tmp;
+                break;
+              }
+              case ALLOCATE_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf == NULL);
+                if (reg_num == 0) {
+                    ctx->tx_buf_len = REG;
+                } else {
+                    ctx->tx_buf_len = decode_be16(ctx); // 2nd imm, at worst 6 B past prog_len
+                }
+                // checksumming functions requires minimum 266 byte buffer for correctness
+                if (ctx->tx_buf_len < 266) ctx->tx_buf_len = 266;
+                ctx->tx_buf = apf_allocate_buffer(ctx->caller_ctx, ctx->tx_buf_len);
+                if (!ctx->tx_buf) {  // allocate failure
+                    ctx->tx_buf_len = 0;
+                    counter[-3]++;
+                    return PASS_PACKET;
+                }
+                memset(ctx->tx_buf, 0, ctx->tx_buf_len);
+                ctx->mem.named.tx_buf_offset = 0;
+                break;
+              case TRANSMIT_EXT_OPCODE:
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 pkt_len = ctx->mem.named.tx_buf_offset;
+                // If pkt_len > allocate_buffer_len, it means sth. wrong
+                // happened and the tx_buf should be deallocated.
+                if (pkt_len > ctx->tx_buf_len) {
+                    do_discard_buffer(ctx);
+                    return PASS_PACKET;
+                }
+                // tx_buf_len cannot be large because we'd run out of RAM,
+                // so the above unsigned comparison effectively guarantees casting pkt_len
+                // to a signed value does not result in it going negative.
+                u8 ip_ofs = DECODE_U8();              // 2nd imm, at worst 5 B past prog_len
+                u8 csum_ofs = DECODE_U8();            // 3rd imm, at worst 6 B past prog_len
+                u8 csum_start = 0;
+                u16 partial_csum = 0;
+                if (csum_ofs < 255) {
+                    csum_start = DECODE_U8();         // 4th imm, at worst 7 B past prog_len
+                    partial_csum = decode_be16(ctx);  // 5th imm, at worst 9 B past prog_len
+                }
+                int dscp = csum_and_return_dscp(ctx->tx_buf, (s32)pkt_len, ip_ofs,
+                                                partial_csum, csum_start, csum_ofs,
+                                                (bool)reg_num);
+                int ret = do_transmit_buffer(ctx, pkt_len, dscp);
+                if (ret) { counter[-4]++; return PASS_PACKET; } // transmit failure
+                break;
+              case EPKTDATACOPYIMM_EXT_OPCODE:  // 41
+              case EPKTDATACOPYR1_EXT_OPCODE:   // 42
+                pktcopy_src_offset = ctx->R[0];
+                FALLTHROUGH;
+              case PKTDATACOPYIMM_EXT_OPCODE: { // 65536
+                u32 copy_len = ctx->R[1];
+                if (imm != EPKTDATACOPYR1_EXT_OPCODE) {
+                    copy_len = DECODE_U8();  // 2nd imm, at worst 8 bytes past prog_len
+                }
+                ASSERT_RETURN(ctx->tx_buf);
+                u32 dst_offs = ctx->mem.named.tx_buf_offset;
+                ASSERT_IN_OUTPUT_BOUNDS(dst_offs, copy_len);
+                if (reg_num == 0) {  // copy from packet
+                    ASSERT_IN_PACKET_BOUNDS(pktcopy_src_offset);
+                    const u32 last_packet_offs = pktcopy_src_offset + copy_len - 1;
+                    ASSERT_RETURN(last_packet_offs >= pktcopy_src_offset);
+                    ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->packet + pktcopy_src_offset, copy_len);
+                } else {  // copy from data
+                    ASSERT_IN_RAM_BOUNDS(pktcopy_src_offset + copy_len - 1);
+                    memcpy(ctx->tx_buf + dst_offs, ctx->program + pktcopy_src_offset, copy_len);
+                }
+                dst_offs += copy_len;
+                ctx->mem.named.tx_buf_offset = dst_offs;
+                break;
+              }
+              case JDNSQMATCH_EXT_OPCODE:       // 43
+              case JDNSAMATCH_EXT_OPCODE:       // 44
+              case JDNSQMATCHSAFE_EXT_OPCODE:   // 45
+              case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
+                const u32 imm_len = 1 << (len_field - 1); // EXT_OPCODE, thus len_field > 0
+                u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
+                int qtype = -1;
+                if (imm & 1) { // JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes
+                    qtype = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
+                }
+                u32 udp_payload_offset = ctx->R[0];
+                match_result_type match_rst = match_names(ctx->program + ctx->pc,
+                                                          ctx->program + ctx->program_len,
+                                                          ctx->packet + udp_payload_offset,
+                                                          ctx->packet_len - udp_payload_offset,
+                                                          qtype);
+                if (match_rst == error_program) return PASS_PACKET;
+                if (match_rst == error_packet) {
+                    counter[-5]++; // increment error dns packet counter
+                    return (imm >= JDNSQMATCHSAFE_EXT_OPCODE) ? PASS_PACKET : DROP_PACKET;
+                }
+                while (ctx->pc + 1 < ctx->program_len &&
+                       (ctx->program[ctx->pc] || ctx->program[ctx->pc + 1])) {
+                    ctx->pc++;
+                }
+                ctx->pc += 2;  // skip the final double 0 needle end
+                // relies on reg_num in {0,1} and match_rst being {false=0, true=1}
+                if (!(reg_num ^ (u32)match_rst)) ctx->pc += jump_offs;
+                break;
+              }
+              case EWRITE1_EXT_OPCODE:
+              case EWRITE2_EXT_OPCODE:
+              case EWRITE4_EXT_OPCODE: {
+                ASSERT_RETURN(ctx->tx_buf);
+                const u32 write_len = 1 << (imm - EWRITE1_EXT_OPCODE);
+                ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+                u32 i;
+                for (i = 0; i < write_len; ++i) {
+                    ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                        (u8)(REG >> (write_len - 1 - i) * 8);
+                }
+                break;
+              }
+              case JONEOF_EXT_OPCODE: {
+                const u32 imm_len = 1 << (len_field - 1); // ext opcode len_field guaranteed > 0
+                u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
+                u8 imm3 = DECODE_U8();  // 3rd imm, at worst 9 bytes past prog_len
+                bool jmp = imm3 & 1;  // =0 jmp on match, =1 jmp on no match
+                u8 len = ((imm3 >> 1) & 3) + 1;  // size [1..4] in bytes of an element
+                u8 cnt = (imm3 >> 3) + 1;  // number [1..32] of elements in set
+                if (ctx->pc + cnt * len > ctx->program_len) return PASS_PACKET;
+                while (cnt--) {
+                    u32 v = 0;
+                    int i;
+                    for (i = 0; i < len; ++i) v = (v << 8) | DECODE_U8();
+                    if (REG == v) jmp ^= true;
+                }
+                if (jmp) ctx->pc += jump_offs;
+                return PASS_PACKET;
+              }
+              default:  // Unknown extended opcode
+                return PASS_PACKET;  // Bail out
+            }
+            break;
+          case LDDW_OPCODE:
+          case STDW_OPCODE:
+            if (ctx->v6) {
+                if (!imm) return PASS_PACKET;
+                if (imm > 0xFFFF) return PASS_PACKET;
+                if (imm * 4 > ctx->ram_len) return PASS_PACKET;
+                if (opcode == LDDW_OPCODE) {
+                    REG = counter[-(s32)imm];
+                } else {
+                    counter[-(s32)imm] = REG;
+                }
+            } else {
+                u32 offs = OTHER_REG + (u32)signed_imm;
+                // Negative offsets wrap around the end of the address space.
+                // This allows us to efficiently access the end of the
+                // address space with one-byte immediates without using %=.
+                if (offs & 0x80000000) offs += ctx->ram_len;  // unsigned overflow intended
+                u32 size = 4;
+                ASSERT_IN_DATA_BOUNDS(offs, size);
+                if (opcode == LDDW_OPCODE) {
+                    u32 val = 0;
+                    while (size--) val = (val << 8) | ctx->program[offs++];
+                    REG = val;
+                } else {
+                    u32 val = REG;
+                    while (size--) {
+                        ctx->program[offs++] = (val >> 24);
+                        val <<= 8;
+                    }
+                }
+            }
+            break;
+          case WRITE_OPCODE: {
+            ASSERT_RETURN(ctx->tx_buf);
+            ASSERT_RETURN(len_field);
+            const u32 write_len = 1 << (len_field - 1);
+            ASSERT_IN_OUTPUT_BOUNDS(ctx->mem.named.tx_buf_offset, write_len);
+            u32 i;
+            for (i = 0; i < write_len; ++i) {
+                ctx->tx_buf[ctx->mem.named.tx_buf_offset++] =
+                    (u8)(imm >> (write_len - 1 - i) * 8);
+            }
+            break;
+          }
+          default:  // Unknown opcode
+            return PASS_PACKET;  // Bail out
+        }
+    } while (instructions_remaining--);
+    return PASS_PACKET;
+}
+
+int apf_run(void* ctx, u32* const program, const u32 program_len,
+            const u32 ram_len, const u8* const packet,
+            const u32 packet_len, const u32 filter_age_16384ths) {
+    // Due to direct 32-bit read/write access to counters at end of ram
+    // APFv6 interpreter requires program & ram_len to be 4 byte aligned.
+    if (3 & (uintptr_t)program) return PASS_PACKET;
+    if (3 & ram_len) return PASS_PACKET;
+
+    // We rely on ram_len + 65536 not overflowing, so require ram_len < 2GiB
+    // Similarly LDDW/STDW have special meaning for negative ram offsets.
+    // We also don't want garbage like program_len == 0xFFFFFFFF
+    if ((program_len | ram_len) >> 31) return PASS_PACKET;
+
+    // APFv6 requires at least 5 u32 counters at the end of ram, this makes counter[-5]++ valid
+    // This cannot wrap due to previous check.
+    if (program_len + 20 > ram_len) return PASS_PACKET;
+
+    apf_context apf_ctx = { 0 };
+    apf_ctx.caller_ctx = ctx;
+    apf_ctx.program = (u8*)program;
+    apf_ctx.program_len = program_len;
+    apf_ctx.ram_len = ram_len;
+    apf_ctx.packet = packet;
+    apf_ctx.packet_len = packet_len;
+    // Fill in pre-filled memory slot values.
+    apf_ctx.mem.named.program_size = program_len;
+    apf_ctx.mem.named.ram_len = ram_len;
+    apf_ctx.mem.named.packet_size = packet_len;
+    apf_ctx.mem.named.apf_version = apf_version();
+    apf_ctx.mem.named.filter_age = filter_age_16384ths >> 14;
+    apf_ctx.mem.named.filter_age_16384ths = filter_age_16384ths;
+
+    int ret = do_apf_run(&apf_ctx);
+    if (apf_ctx.tx_buf) do_discard_buffer(&apf_ctx);
+    return ret;
+}
diff --git a/v5/apf_utils.h b/v5/apf_utils.h
new file mode 120000
index 0000000..1ac8063
--- /dev/null
+++ b/v5/apf_utils.h
@@ -0,0 +1 @@
+../apf_utils.h
\ No newline at end of file
diff --git a/v5/test_buf_allocator.c b/v5/test_buf_allocator.c
index a889dea..e30815e 100644
--- a/v5/test_buf_allocator.c
+++ b/v5/test_buf_allocator.c
@@ -19,8 +19,7 @@
 #include "apf_interpreter.h"
 #include "test_buf_allocator.h"
 
-uint8_t apf_test_buffer[APF_TX_BUFFER_SIZE];
-uint8_t apf_test_tx_packet[APF_TX_BUFFER_SIZE];
+uint8_t apf_test_buffer[sizeof(apf_test_buffer)];
 uint32_t apf_test_tx_packet_len;
 uint8_t apf_test_tx_dscp;
 
@@ -30,10 +29,9 @@
  * Clean up the apf_test_buffer and return the pointer to beginning of the buffer region.
  */
 uint8_t* apf_allocate_buffer(__attribute__ ((unused)) void* ctx, uint32_t size) {
-  if (size > APF_TX_BUFFER_SIZE) {
+  if (size > sizeof(apf_test_buffer)) {
     return NULL;
   }
-  memset(apf_test_buffer, 0, APF_TX_BUFFER_SIZE * sizeof(apf_test_buffer[0]));
   return apf_test_buffer;
 }
 
@@ -44,8 +42,9 @@
  */
 int apf_transmit_buffer(__attribute__((unused)) void* ctx, uint8_t* ptr,
                         uint32_t len, uint8_t dscp) {
+  if (len && len < ETH_HLEN) return -1;
+  if (ptr != apf_test_buffer) return -1;
   apf_test_tx_packet_len = len;
   apf_test_tx_dscp = dscp;
-  memcpy(apf_test_tx_packet, ptr, (size_t) len);
   return 0;
 }
diff --git a/v5/test_buf_allocator.h b/v5/test_buf_allocator.h
index 3916cf8..c5fa5c4 100644
--- a/v5/test_buf_allocator.h
+++ b/v5/test_buf_allocator.h
@@ -14,15 +14,13 @@
  * limitations under the License.
  */
 
-#include <stdint.h>
-
 #ifndef TEST_BUF_ALLOCATOR
 #define TEST_BUF_ALLOCATOR
 
-#define APF_TX_BUFFER_SIZE 1500
+#include <stdint.h>
+#include <linux/if_ether.h>
 
-extern uint8_t apf_test_buffer[APF_TX_BUFFER_SIZE];
-extern uint8_t apf_test_tx_packet[APF_TX_BUFFER_SIZE];
+extern uint8_t apf_test_buffer[1514];
 extern uint32_t apf_test_tx_packet_len;
 extern uint8_t apf_test_tx_dscp;
 
diff --git a/v6/apf_interpreter.c b/v6/apf_interpreter.c
new file mode 100644
index 0000000..ef7a6b8
--- /dev/null
+++ b/v6/apf_interpreter.c
@@ -0,0 +1,2 @@
+APFv6 is not yet finalized.
+There is a beta version available at v5/
diff --git a/v6/apf_interpreter.h b/v6/apf_interpreter.h
new file mode 100644
index 0000000..ef7a6b8
--- /dev/null
+++ b/v6/apf_interpreter.h
@@ -0,0 +1,2 @@
+APFv6 is not yet finalized.
+There is a beta version available at v5/