Merge master@5406228 into git_qt-dev-plus-aosp.
am: 3cd28db90a

Change-Id: I6556ed4a19f2ff18ae4c0b8de93c37209a4e1e60
diff --git a/gen_constants-inl.h b/gen_constants-inl.h
index 752badc..0ea710d 100644
--- a/gen_constants-inl.h
+++ b/gen_constants-inl.h
@@ -5,9 +5,11 @@
 #include <fcntl.h>
 #include <linux/fs.h>
 #include <linux/mman.h>
+#include <linux/net.h>
 #include <linux/prctl.h>
 #include <linux/sched.h>
 #include <linux/serial.h>
+#include <linux/sockios.h>
 #include <linux/termios.h>
 #include <stddef.h>
 #include <signal.h>
diff --git a/tools/Android.bp b/tools/Android.bp
new file mode 100644
index 0000000..5ff3462
--- /dev/null
+++ b/tools/Android.bp
@@ -0,0 +1,78 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+python_binary_host {
+    name: "minijail_compile_seccomp_policy",
+    main: "compile_seccomp_policy.py",
+    srcs: [
+        "arch.py",
+        "bpf.py",
+        "compile_seccomp_policy.py",
+        "compiler.py",
+        "parser.py",
+    ],
+    version: {
+        py2: {
+            enabled: false,
+        },
+        py3: {
+            enabled: true,
+        },
+    },
+}
+
+python_test_host {
+    name: "minijail_parser_unittest",
+    main: "parser_unittest.py",
+    srcs: [
+        "arch.py",
+        "bpf.py",
+        "parser.py",
+        "parser_unittest.py",
+    ],
+    data: [
+        "testdata/arch_64.json",
+    ],
+    version: {
+        py2: {
+            enabled: false,
+        },
+        py3: {
+            enabled: true,
+        },
+    },
+}
+
+python_test_host {
+    name: "minijail_compiler_unittest",
+    main: "compiler_unittest.py",
+    srcs: [
+        "arch.py",
+        "bpf.py",
+        "compiler.py",
+        "compiler_unittest.py",
+        "parser.py",
+    ],
+    data: [
+        "testdata/arch_64.json",
+    ],
+    version: {
+        py2: {
+            enabled: false,
+        },
+        py3: {
+            enabled: true,
+        },
+    },
+}
diff --git a/tools/arch.py b/tools/arch.py
index 6f2dfb2..ac2f32b 100644
--- a/tools/arch.py
+++ b/tools/arch.py
@@ -21,9 +21,10 @@
 
 
 class Arch(
-        collections.namedtuple(
-            'Arch',
-            ['arch_nr', 'arch_name', 'bits', 'syscalls', 'constants'])):
+        collections.namedtuple('Arch', [
+            'arch_nr', 'arch_name', 'bits', 'syscalls', 'constants',
+            'syscall_groups'
+        ])):
     """Holds architecture-specific information."""
 
     def truncate_word(self, value):
@@ -51,4 +52,5 @@
                 bits=constants['bits'],
                 syscalls=constants['syscalls'],
                 constants=constants['constants'],
+                syscall_groups=constants.get('syscall_groups', {}),
             )
diff --git a/tools/bpf.py b/tools/bpf.py
index e89e93f..75db502 100644
--- a/tools/bpf.py
+++ b/tools/bpf.py
@@ -172,6 +172,8 @@
         self._instructions = instructions
 
     def accept(self, visitor):
+        if visitor.visited(self):
+            return
         visitor.visit(self)
 
     @property
@@ -251,6 +253,8 @@
         self.next_block = next_block
 
     def accept(self, visitor):
+        if visitor.visited(self):
+            return
         self.next_block.accept(visitor)
         visitor.visit(self)
 
@@ -274,6 +278,8 @@
         return False
 
     def accept(self, visitor):
+        if visitor.visited(self):
+            return
         self.jt.accept(visitor)
         self.jf.accept(visitor)
         visitor.visit(self)
@@ -299,6 +305,8 @@
         self.jf = jf
 
     def accept(self, visitor):
+        if visitor.visited(self):
+            return
         self.jt.accept(visitor)
         self.jf.accept(visitor)
         visitor.visit(self)
@@ -344,6 +352,8 @@
         self.value = value
 
     def accept(self, visitor):
+        if visitor.visited(self):
+            return
         self.jt.accept(visitor)
         self.jf.accept(visitor)
         visitor.visit(self)
@@ -352,6 +362,15 @@
 class AbstractVisitor(abc.ABC):
     """An abstract visitor."""
 
+    def __init__(self):
+        self._visited = set()
+
+    def visited(self, block):
+        if id(block) in self._visited:
+            return True
+        self._visited.add(id(block))
+        return False
+
     def process(self, block):
         block.accept(self)
         return block
@@ -437,6 +456,7 @@
     """A visitor that copies Blocks."""
 
     def __init__(self):
+        super().__init__()
         self._mapping = {}
 
     def process(self, block):
@@ -445,54 +465,44 @@
         return self._mapping[id(block)]
 
     def visitKillProcess(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = KillProcess()
 
     def visitKillThread(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = KillThread()
 
     def visitTrap(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = Trap()
 
     def visitReturnErrno(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = ReturnErrno(block.errno)
 
     def visitTrace(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = Trace()
 
     def visitLog(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = Log()
 
     def visitAllow(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = Allow()
 
     def visitBasicBlock(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = BasicBlock(block.instructions)
 
     def visitValidateArch(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = ValidateArch(
             block.arch, self._mapping[id(block.next_block)])
 
     def visitSyscallEntry(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = SyscallEntry(
             block.syscall_number,
             self._mapping[id(block.jt)],
@@ -500,15 +510,13 @@
             op=block.op)
 
     def visitWideAtom(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = WideAtom(
             block.arg_offset, block.op, block.value, self._mapping[id(
                 block.jt)], self._mapping[id(block.jf)])
 
     def visitAtom(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
         self._mapping[id(block)] = Atom(block.arg_index, block.op, block.value,
                                         self._mapping[id(block.jt)],
                                         self._mapping[id(block.jf)])
@@ -522,8 +530,7 @@
         self._bits = arch.bits
 
     def visitAtom(self, block):
-        if id(block) in self._mapping:
-            return
+        assert id(block) not in self._mapping
 
         lo = block.value & 0xFFFFFFFF
         hi = (block.value >> 32) & 0xFFFFFFFF
@@ -582,6 +589,7 @@
     """A visitor that flattens a DAG of Block objects."""
 
     def __init__(self, *, arch, kill_action):
+        self._visited = set()
         self._kill_action = kill_action
         self._instructions = []
         self._arch = arch
@@ -621,9 +629,14 @@
             SockFilter(BPF_JMP | BPF_JA, 0, 0, jf_distance),
         ]
 
+    def visited(self, block):
+        if id(block) in self._visited:
+            return True
+        self._visited.add(id(block))
+        return False
+
     def visit(self, block):
-        if id(block) in self._offsets:
-            return
+        assert id(block) not in self._offsets
 
         if isinstance(block, BasicBlock):
             instructions = block.instructions
@@ -651,3 +664,30 @@
         self._instructions = instructions + self._instructions
         self._offsets[id(block)] = -len(self._instructions)
         return
+
+
+class ArgFilterForwardingVisitor:
+    """A visitor that forwards visitation to all arg filters."""
+
+    def __init__(self, visitor):
+        self._visited = set()
+        self.visitor = visitor
+
+    def visited(self, block):
+        if id(block) in self._visited:
+            return True
+        self._visited.add(id(block))
+        return False
+
+    def visit(self, block):
+        # All arg filters are BasicBlocks.
+        if not isinstance(block, BasicBlock):
+            return
+        # But the ALLOW, KILL_PROCESS, TRAP, etc. actions are too and we don't
+        # want to visit them just yet.
+        if (isinstance(block, KillProcess) or isinstance(block, KillThread)
+                or isinstance(block, Trap) or isinstance(block, ReturnErrno)
+                or isinstance(block, Trace) or isinstance(block, Log)
+                or isinstance(block, Allow)):
+            return
+        block.accept(self.visitor)
diff --git a/tools/compile_seccomp_policy.py b/tools/compile_seccomp_policy.py
new file mode 100755
index 0000000..4278f90
--- /dev/null
+++ b/tools/compile_seccomp_policy.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper tool to compile a BPF program from a Minijail seccomp filter.
+
+This script will take a Minijail seccomp policy file and compile it into a
+BPF program suitable for use with Minijail in the current architecture.
+"""
+
+from __future__ import print_function
+
+import argparse
+import sys
+
+import arch
+import bpf
+import compiler
+import parser
+
+
+def parse_args(argv):
+    """Return the parsed CLI arguments for this tool."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--optimization-strategy',
+        default=compiler.OptimizationStrategy.BST,
+        type=compiler.OptimizationStrategy,
+        choices=list(compiler.OptimizationStrategy))
+    parser.add_argument('--include-depth-limit', default=10)
+    parser.add_argument('--arch-json', default='constants.json')
+    parser.add_argument(
+        '--default-action',
+        type=str,
+        help=('Use the specified default action, overriding any @default '
+              'action found in the .policy files. '
+              'This allows the use of permissive actions (allow, log, trace) '
+              'since it is not valid to specify a permissive action in '
+              '.policy files. This is useful for debugging.'))
+    parser.add_argument(
+        '--use-kill-process',
+        action='store_true',
+        help=('Use SECCOMP_RET_KILL_PROCESS instead of '
+              'SECCOMP_RET_KILL_THREAD (requires Linux v4.14+).'))
+    parser.add_argument(
+        'policy', help='The seccomp policy.', type=argparse.FileType('r'))
+    parser.add_argument(
+        'output', help='The BPF program.', type=argparse.FileType('wb'))
+    return parser.parse_args(argv)
+
+
+def main(argv):
+    """Main entrypoint."""
+    opts = parse_args(argv)
+    parsed_arch = arch.Arch.load_from_json(opts.arch_json)
+    policy_compiler = compiler.PolicyCompiler(parsed_arch)
+    if opts.use_kill_process:
+        kill_action = bpf.KillProcess()
+    else:
+        kill_action = bpf.KillThread()
+    override_default_action = None
+    if opts.default_action:
+        parser_state = parser.ParserState('<memory>')
+        parser_state.set_line(opts.default_action)
+        override_default_action = parser.PolicyParser(
+            parsed_arch, kill_action=bpf.KillProcess()).parse_action(
+                parser_state.tokenize())
+    with opts.output as outf:
+        outf.write(
+            policy_compiler.compile_file(
+                opts.policy.name,
+                optimization_strategy=opts.optimization_strategy,
+                kill_action=kill_action,
+                include_depth_limit=opts.include_depth_limit,
+                override_default_action=override_default_action).opcodes)
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
diff --git a/tools/compiler.py b/tools/compiler.py
index 96800f1..4e3881a 100644
--- a/tools/compiler.py
+++ b/tools/compiler.py
@@ -18,10 +18,27 @@
 
 from __future__ import print_function
 
+import enum
+
 import bpf
 import parser  # pylint: disable=wrong-import-order
 
 
+class OptimizationStrategy(enum.Enum):
+    """The available optimization strategies."""
+
+    # Generate a linear chain of syscall number checks. Works best for policies
+    # with very few syscalls.
+    LINEAR = 'linear'
+
+    # Generate a binary search tree for the syscalls. Works best for policies
+    # with a lot of syscalls, where no one syscall dominates.
+    BST = 'bst'
+
+    def __str__(self):
+        return self.value
+
+
 class SyscallPolicyEntry:
     """The parsed version of a seccomp policy line."""
 
@@ -34,10 +51,9 @@
 
     def __repr__(self):
         return ('SyscallPolicyEntry<name: %s, number: %d, '
-                'frequency: %d, filter: %r>') % (self.name, self.number,
-                                                 self.frequency,
-                                                 self.filter.instructions
-                                                 if self.filter else None)
+                'frequency: %d, filter: %r>') % (
+                    self.name, self.number, self.frequency,
+                    self.filter.instructions if self.filter else None)
 
     def simulate(self, arch, syscall_number, *args):
         """Simulate the policy with the given arguments."""
@@ -47,12 +63,243 @@
                             *args)
 
 
+class SyscallPolicyRange:
+    """A contiguous range of SyscallPolicyEntries that have the same action."""
+
+    def __init__(self, *entries):
+        self.numbers = (entries[0].number, entries[-1].number + 1)
+        self.frequency = sum(e.frequency for e in entries)
+        self.accumulated = 0
+        self.filter = entries[0].filter
+
+    def __repr__(self):
+        return 'SyscallPolicyRange<numbers: %r, frequency: %d, filter: %r>' % (
+            self.numbers, self.frequency,
+            self.filter.instructions if self.filter else None)
+
+    def simulate(self, arch, syscall_number, *args):
+        """Simulate the policy with the given arguments."""
+        if not self.filter:
+            return (0, 'ALLOW')
+        return self.filter.simulate(arch, syscall_number, *args)
+
+
+def _convert_to_ranges(entries):
+    entries = list(sorted(entries, key=lambda r: r.number))
+    lower = 0
+    while lower < len(entries):
+        upper = lower + 1
+        while upper < len(entries):
+            if entries[upper - 1].filter != entries[upper].filter:
+                break
+            if entries[upper - 1].number + 1 != entries[upper].number:
+                break
+            upper += 1
+        yield SyscallPolicyRange(*entries[lower:upper])
+        lower = upper
+
+
+def _compile_single_range(entry,
+                          accept_action,
+                          reject_action,
+                          lower_bound=0,
+                          upper_bound=1e99):
+    action = accept_action
+    if entry.filter:
+        action = entry.filter
+    if entry.numbers[1] - entry.numbers[0] == 1:
+        # Single syscall.
+        # Accept if |X == nr|.
+        return (1,
+                bpf.SyscallEntry(
+                    entry.numbers[0], action, reject_action, op=bpf.BPF_JEQ))
+    elif entry.numbers[0] == lower_bound:
+        # Syscall range aligned with the lower bound.
+        # Accept if |X < nr[1]|.
+        return (1,
+                bpf.SyscallEntry(
+                    entry.numbers[1], reject_action, action, op=bpf.BPF_JGE))
+    elif entry.numbers[1] == upper_bound:
+        # Syscall range aligned with the upper bound.
+        # Accept if |X >= nr[0]|.
+        return (1,
+                bpf.SyscallEntry(
+                    entry.numbers[0], action, reject_action, op=bpf.BPF_JGE))
+    # Syscall range in the middle.
+    # Accept if |nr[0] <= X < nr[1]|.
+    upper_entry = bpf.SyscallEntry(
+        entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)
+    return (2,
+            bpf.SyscallEntry(
+                entry.numbers[0], upper_entry, reject_action, op=bpf.BPF_JGE))
+
+
+def _compile_ranges_linear(ranges, accept_action, reject_action):
+    # Compiles the list of ranges into a simple linear list of comparisons. In
+    # order to make the generated code a bit more efficient, we sort the
+    # ranges by frequency, so that the most frequently-called syscalls appear
+    # earlier in the chain.
+    cost = 0
+    accumulated_frequencies = 0
+    next_action = reject_action
+    for entry in sorted(ranges, key=lambda r: r.frequency):
+        current_cost, next_action = _compile_single_range(
+            entry, accept_action, next_action)
+        accumulated_frequencies += entry.frequency
+        cost += accumulated_frequencies * current_cost
+    return (cost, next_action)
+
+
+def _compile_entries_linear(entries, accept_action, reject_action):
+    return _compile_ranges_linear(
+        _convert_to_ranges(entries), accept_action, reject_action)[1]
+
+
+def _compile_entries_bst(entries, accept_action, reject_action):
+    # Instead of generating a linear list of comparisons, this method generates
+    # a binary search tree, where some of the leaves can be linear chains of
+    # comparisons.
+    #
+    # Even though we are going to perform a binary search over the syscall
+    # number, we would still like to rotate some of the internal nodes of the
+    # binary search tree so that more frequently-used syscalls can be accessed
+    # more cheaply (i.e. fewer internal nodes need to be traversed to reach
+    # them).
+    #
+    # This uses Dynamic Programming to generate all possible BSTs efficiently
+    # (in O(n^3)) so that we can get the absolute minimum-cost tree that matches
+    # all syscall entries. It does so by considering all of the O(n^2) possible
+    # sub-intervals, and for each one of those try all of the O(n) partitions of
+    # that sub-interval. At each step, it considers putting the remaining
+    # entries in a linear comparison chain as well as another BST, and chooses
+    # the option that minimizes the total overall cost.
+    #
+    # Between every pair of non-contiguous allowed syscalls, there are two
+    # locally optimal options as to where to set the partition for the
+    # subsequent ranges: aligned to the end of the left subrange or to the
+    # beginning of the right subrange. The fact that these two options have
+    # slightly different costs, combined with the possibility of a subtree to
+    # use the linear chain strategy (which has a completely different cost
+    # model), causes the target cost function that we are trying to optimize to
+    # not be unimodal / convex. This unfortunately means that more clever
+    # techniques like using ternary search (which would reduce the overall
+    # complexity to O(n^2 log n)) do not work in all cases.
+    ranges = list(_convert_to_ranges(entries))
+
+    accumulated = 0
+    for entry in ranges:
+        accumulated += entry.frequency
+        entry.accumulated = accumulated
+
+    # Memoization cache to build the DP table top-down, which is easier to
+    # understand.
+    memoized_costs = {}
+
+    def _generate_syscall_bst(ranges, indices, bounds=(0, 2**64 - 1)):
+        assert bounds[0] <= ranges[indices[0]].numbers[0], (indices, bounds)
+        assert ranges[indices[1] - 1].numbers[1] <= bounds[1], (indices,
+                                                                bounds)
+
+        if bounds in memoized_costs:
+            return memoized_costs[bounds]
+        if indices[1] - indices[0] == 1:
+            if bounds == ranges[indices[0]].numbers:
+                # If bounds are tight around the syscall, it costs nothing.
+                memoized_costs[bounds] = (0, ranges[indices[0]].filter
+                                          or accept_action)
+                return memoized_costs[bounds]
+            result = _compile_single_range(ranges[indices[0]], accept_action,
+                                           reject_action)
+            memoized_costs[bounds] = (result[0] * ranges[indices[0]].frequency,
+                                      result[1])
+            return memoized_costs[bounds]
+
+        # Try the linear model first and use that as the best estimate so far.
+        best_cost = _compile_ranges_linear(ranges[slice(*indices)],
+                                           accept_action, reject_action)
+
+        # Now recursively go through all possible partitions of the interval
+        # currently being considered.
+        previous_accumulated = ranges[indices[0]].accumulated - ranges[
+            indices[0]].frequency
+        bst_comparison_cost = (
+            ranges[indices[1] - 1].accumulated - previous_accumulated)
+        for i, entry in enumerate(ranges[slice(*indices)]):
+            candidates = [entry.numbers[0]]
+            if i:
+                candidates.append(ranges[i - 1 + indices[0]].numbers[1])
+            for cutoff_bound in candidates:
+                if not bounds[0] < cutoff_bound < bounds[1]:
+                    continue
+                if not indices[0] < i + indices[0] < indices[1]:
+                    continue
+                left_subtree = _generate_syscall_bst(
+                    ranges, (indices[0], i + indices[0]),
+                    (bounds[0], cutoff_bound))
+                right_subtree = _generate_syscall_bst(
+                    ranges, (i + indices[0], indices[1]),
+                    (cutoff_bound, bounds[1]))
+                best_cost = min(
+                    best_cost,
+                    (bst_comparison_cost + left_subtree[0] + right_subtree[0],
+                     bpf.SyscallEntry(
+                         cutoff_bound,
+                         right_subtree[1],
+                         left_subtree[1],
+                         op=bpf.BPF_JGE)))
+
+        memoized_costs[bounds] = best_cost
+        return memoized_costs[bounds]
+
+    return _generate_syscall_bst(ranges, (0, len(ranges)))[1]
+
+
 class PolicyCompiler:
     """A parser for the Minijail seccomp policy file format."""
 
     def __init__(self, arch):
         self._arch = arch
 
+    def compile_file(self,
+                     policy_filename,
+                     *,
+                     optimization_strategy,
+                     kill_action,
+                     include_depth_limit=10,
+                     override_default_action=None):
+        """Return a compiled BPF program from the provided policy file."""
+        policy_parser = parser.PolicyParser(
+            self._arch,
+            kill_action=kill_action,
+            include_depth_limit=include_depth_limit,
+            override_default_action=override_default_action)
+        parsed_policy = policy_parser.parse_file(policy_filename)
+        entries = [
+            self.compile_filter_statement(
+                filter_statement, kill_action=kill_action)
+            for filter_statement in parsed_policy.filter_statements
+        ]
+
+        visitor = bpf.FlatteningVisitor(
+            arch=self._arch, kill_action=kill_action)
+        accept_action = bpf.Allow()
+        reject_action = parsed_policy.default_action
+        if entries:
+            if optimization_strategy == OptimizationStrategy.BST:
+                next_action = _compile_entries_bst(entries, accept_action,
+                                                   reject_action)
+            else:
+                next_action = _compile_entries_linear(entries, accept_action,
+                                                      reject_action)
+            next_action.accept(bpf.ArgFilterForwardingVisitor(visitor))
+            reject_action.accept(visitor)
+            accept_action.accept(visitor)
+            bpf.ValidateArch(next_action).accept(visitor)
+        else:
+            reject_action.accept(visitor)
+            bpf.ValidateArch(reject_action).accept(visitor)
+        return visitor.result
+
     def compile_filter_statement(self, filter_statement, *, kill_action):
         """Compile one parser.FilterStatement into BPF."""
         policy_entry = SyscallPolicyEntry(filter_statement.syscall.name,
diff --git a/tools/compiler_unittest.py b/tools/compiler_unittest.py
index ba66e62..ae4c1e5 100755
--- a/tools/compiler_unittest.py
+++ b/tools/compiler_unittest.py
@@ -19,6 +19,8 @@
 from __future__ import print_function
 
 import os
+import random
+import shutil
 import tempfile
 import unittest
 
@@ -231,6 +233,14 @@
             block.simulate(self.arch.arch_nr, self.arch.syscalls['read'], 1,
                            1)[1], 'ALLOW')
 
+    def test_trap(self):
+        """Accept lines that trap unconditionally."""
+        block = self._compile('read: trap')
+
+        self.assertEqual(
+            block.simulate(self.arch.arch_nr, self.arch.syscalls['read'],
+                           0)[1], 'TRAP')
+
     def test_ret_errno(self):
         """Accept lines that return errno."""
         block = self._compile('read : arg0 == 0 || arg0 == 1 ; return 1')
@@ -253,6 +263,22 @@
             block.simulate(self.arch.arch_nr, self.arch.syscalls['read'],
                            0)[1:], ('ERRNO', 1))
 
+    def test_trace(self):
+        """Accept lines that trace unconditionally."""
+        block = self._compile('read: trace')
+
+        self.assertEqual(
+            block.simulate(self.arch.arch_nr, self.arch.syscalls['read'],
+                           0)[1], 'TRACE')
+
+    def test_log(self):
+        """Accept lines that log unconditionally."""
+        block = self._compile('read: log')
+
+        self.assertEqual(
+            block.simulate(self.arch.arch_nr, self.arch.syscalls['read'],
+                           0)[1], 'LOG')
+
     def test_mmap_write_xor_exec(self):
         """Accept the idiomatic filter for mmap."""
         block = self._compile(
@@ -272,5 +298,202 @@
                     'ALLOW')
 
 
+class CompileFileTests(unittest.TestCase):
+    """Tests for PolicyCompiler.compile_file."""
+
+    def setUp(self):
+        self.arch = ARCH_64
+        self.compiler = compiler.PolicyCompiler(self.arch)
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+
+    def _write_file(self, filename, contents):
+        """Helper to write out a file for testing."""
+        path = os.path.join(self.tempdir, filename)
+        with open(path, 'w') as outf:
+            outf.write(contents)
+        return path
+
+    def test_compile(self):
+        """Ensure compilation works with all strategies."""
+        self._write_file(
+            'test.frequency', """
+            read: 1
+            close: 10
+        """)
+        path = self._write_file(
+            'test.policy', """
+            @frequency ./test.frequency
+            read: 1
+            close: 1
+        """)
+
+        program = self.compiler.compile_file(
+            path,
+            optimization_strategy=compiler.OptimizationStrategy.LINEAR,
+            kill_action=bpf.KillProcess())
+        self.assertGreater(
+            bpf.simulate(program.instructions, self.arch.arch_nr,
+                         self.arch.syscalls['read'], 0)[0],
+            bpf.simulate(program.instructions, self.arch.arch_nr,
+                         self.arch.syscalls['close'], 0)[0],
+        )
+
+    def test_compile_bst(self):
+        """Ensure compilation with BST is cheaper than the linear model."""
+        self._write_file(
+            'test.frequency', """
+            read: 1
+            close: 10
+        """)
+        path = self._write_file(
+            'test.policy', """
+            @frequency ./test.frequency
+            read: 1
+            close: 1
+        """)
+
+        for strategy in list(compiler.OptimizationStrategy):
+            program = self.compiler.compile_file(
+                path,
+                optimization_strategy=strategy,
+                kill_action=bpf.KillProcess())
+            self.assertGreater(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls['read'], 0)[0],
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls['close'], 0)[0],
+            )
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls['read'], 0)[1], 'ALLOW')
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls['close'], 0)[1], 'ALLOW')
+
+    def test_compile_empty_file(self):
+        """Accept empty files."""
+        path = self._write_file(
+            'test.policy', """
+            @default kill-thread
+        """)
+
+        for strategy in list(compiler.OptimizationStrategy):
+            program = self.compiler.compile_file(
+                path,
+                optimization_strategy=strategy,
+                kill_action=bpf.KillProcess())
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls['read'], 0)[1], 'KILL_THREAD')
+
+    def test_compile_simulate(self):
+        """Ensure policy reflects script by testing some random scripts."""
+        iterations = 5
+        for i in range(iterations):
+            num_entries = 64 * (i + 1) // iterations
+            syscalls = dict(
+                zip(
+                    random.sample(self.arch.syscalls.keys(), num_entries),
+                    (random.randint(1, 1024) for _ in range(num_entries)),
+                ))
+
+            frequency_contents = '\n'.join(
+                '%s: %d' % s for s in syscalls.items())
+            policy_contents = '@frequency ./test.frequency\n' + '\n'.join(
+                '%s: 1' % s[0] for s in syscalls.items())
+
+            self._write_file('test.frequency', frequency_contents)
+            path = self._write_file('test.policy', policy_contents)
+
+            for strategy in list(compiler.OptimizationStrategy):
+                program = self.compiler.compile_file(
+                    path,
+                    optimization_strategy=strategy,
+                    kill_action=bpf.KillProcess())
+                for name, number in self.arch.syscalls.items():
+                    expected_result = ('ALLOW'
+                                       if name in syscalls else 'KILL_PROCESS')
+                    self.assertEqual(
+                        bpf.simulate(program.instructions, self.arch.arch_nr,
+                                     number, 0)[1], expected_result,
+                        ('syscall name: %s, syscall number: %d, '
+                         'strategy: %s, policy:\n%s') %
+                        (name, number, strategy, policy_contents))
+
+    @unittest.skipIf(not int(os.getenv('SLOW_TESTS', '0')), 'slow')
+    def test_compile_huge_policy(self):
+        """Ensure jumps while compiling a huge policy are still valid."""
+        # Given that the BST strategy is O(n^3), don't choose a crazy large
+        # value, but it still needs to be around 128 so that we exercise the
+        # codegen paths that depend on the length of the jump.
+        #
+        # Immediate jump offsets in BPF comparison instructions are limited to
+        # 256 instructions, so given that every syscall filter consists of a
+        # load and jump instructions, with 128 syscalls there will be at least
+        # one jump that's further than 256 instructions.
+        num_entries = 128
+        syscalls = dict(random.sample(self.arch.syscalls.items(), num_entries))
+        # Here we force every single filter to be distinct. Otherwise the
+        # codegen layer will coalesce filters that compile to the same
+        # instructions.
+        policy_contents = '\n'.join(
+            '%s: arg0 == %d' % s for s in syscalls.items())
+
+        path = self._write_file('test.policy', policy_contents)
+
+        program = self.compiler.compile_file(
+            path,
+            optimization_strategy=compiler.OptimizationStrategy.BST,
+            kill_action=bpf.KillProcess())
+        for name, number in self.arch.syscalls.items():
+            expected_result = ('ALLOW'
+                               if name in syscalls else 'KILL_PROCESS')
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls[name], number)[1],
+                expected_result)
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls[name], number + 1)[1],
+                'KILL_PROCESS')
+
+    def test_compile_huge_filter(self):
+        """Ensure jumps while compiling a huge policy are still valid."""
+        # This is intended to force cases where the AST visitation would result
+        # in a combinatorial explosion of calls to Block.accept(). An optimized
+        # implementation should be O(n).
+        num_entries = 128
+        syscalls = {}
+        # Here we force every single filter to be distinct. Otherwise the
+        # codegen layer will coalesce filters that compile to the same
+        # instructions.
+        policy_contents = []
+        for name in random.sample(self.arch.syscalls.keys(), num_entries):
+            values = random.sample(range(1024), num_entries)
+            syscalls[name] = values
+            policy_contents.append(
+                '%s: %s' % (name, ' || '.join('arg0 == %d' % value
+                                              for value in values)))
+
+        path = self._write_file('test.policy', '\n'.join(policy_contents))
+
+        program = self.compiler.compile_file(
+            path,
+            optimization_strategy=compiler.OptimizationStrategy.LINEAR,
+            kill_action=bpf.KillProcess())
+        for name, values in syscalls.items():
+            self.assertEqual(
+                bpf.simulate(program.instructions,
+                             self.arch.arch_nr, self.arch.syscalls[name],
+                             random.choice(values))[1], 'ALLOW')
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls[name], 1025)[1],
+                'KILL_PROCESS')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tools/parser.py b/tools/parser.py
index 41d2f52..f3c5331 100644
--- a/tools/parser.py
+++ b/tools/parser.py
@@ -27,13 +27,14 @@
 
 import bpf
 
-Token = collections.namedtuple('token',
-                               ['type', 'value', 'filename', 'line', 'column'])
+Token = collections.namedtuple(
+    'token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
 
 # A regex that can tokenize a Minijail policy file line.
 _TOKEN_SPECIFICATION = (
     ('COMMENT', r'#.*$'),
     ('WHITESPACE', r'\s+'),
+    ('CONTINUATION', r'\\$'),
     ('DEFAULT', r'@default'),
     ('INCLUDE', r'@include'),
     ('FREQUENCY', r'@frequency'),
@@ -57,7 +58,7 @@
     ('ARGUMENT', r'arg[0-9]+'),
     ('RETURN', r'return'),
     ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
-    ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
+    ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
 )
 _TOKEN_RE = re.compile('|'.join(
     r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
@@ -67,8 +68,16 @@
     """An exception that is raised when parsing fails."""
 
     # pylint: disable=too-many-arguments
-    def __init__(self, message, filename, line, line_number=1, token=None):
+    def __init__(self,
+                 message,
+                 filename,
+                 *,
+                 line='',
+                 line_number=1,
+                 token=None):
         if token:
+            line = token.line
+            line_number = token.line_number
             column = token.column
             length = len(token.value)
         else:
@@ -105,42 +114,58 @@
         """Return the current line number being processed."""
         return self._line_number
 
-    def set_line(self, line):
-        """Update the current line being processed."""
-        self._line = line
-        self._line_number += 1
-
     def error(self, message, token=None):
         """Raise a ParserException with the provided message."""
-        raise ParseException(message, self.filename, self.line,
-                             self.line_number, token)
+        raise ParseException(
+            message,
+            self.filename,
+            line=self._line,
+            line_number=self._line_number,
+            token=token)
 
-    def tokenize(self):
+    def tokenize(self, lines):
         """Return a list of tokens for the current line."""
         tokens = []
 
-        last_end = 0
-        for token in _TOKEN_RE.finditer(self.line):
-            if token.start() != last_end:
+        for line_number, line in enumerate(lines):
+            self._line_number = line_number + 1
+            self._line = line.rstrip('\r\n')
+
+            last_end = 0
+            for token in _TOKEN_RE.finditer(self._line):
+                if token.start() != last_end:
+                    self.error(
+                        'invalid token',
+                        token=Token('INVALID',
+                                    self._line[last_end:token.start()],
+                                    self.filename, self._line,
+                                    self._line_number, last_end))
+                last_end = token.end()
+
+                # Omit whitespace and comments now to avoid sprinkling this logic
+                # elsewhere.
+                if token.lastgroup in ('WHITESPACE', 'COMMENT',
+                                       'CONTINUATION'):
+                    continue
+                tokens.append(
+                    Token(token.lastgroup, token.group(), self.filename,
+                          self._line, self._line_number, token.start()))
+            if last_end != len(self._line):
                 self.error(
                     'invalid token',
-                    token=Token('INVALID', self.line[last_end:token.start()],
-                                self.filename, self.line_number, last_end))
-            last_end = token.end()
+                    token=Token('INVALID', self._line[last_end:],
+                                self.filename, self._line, self._line_number,
+                                last_end))
 
-            # Omit whitespace and comments now to avoid sprinkling this logic
-            # elsewhere.
-            if token.lastgroup in ('WHITESPACE', 'COMMENT'):
+            if self._line.endswith('\\'):
+                # This line is not finished yet.
                 continue
-            tokens.append(
-                Token(token.lastgroup, token.group(), self.filename,
-                      self.line_number, token.start()))
-        if last_end != len(self.line):
-            self.error(
-                'invalid token',
-                token=Token('INVALID', self.line[last_end:], self.filename,
-                            self.line_number, last_end))
-        return tokens
+
+            if tokens:
+                # Return a copy of the token list so that the caller can be free
+                # to modify it.
+                yield tokens[::]
+            tokens.clear()
 
 
 Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
@@ -182,11 +207,17 @@
 class PolicyParser:
     """A parser for the Minijail seccomp policy file format."""
 
-    def __init__(self, arch, *, kill_action, include_depth_limit=10):
+    def __init__(self,
+                 arch,
+                 *,
+                 kill_action,
+                 include_depth_limit=10,
+                 override_default_action=None):
         self._parser_states = [ParserState("<memory>")]
         self._kill_action = kill_action
         self._include_depth_limit = include_depth_limit
         self._default_action = self._kill_action
+        self._override_default_action = override_default_action
         self._frequency_mapping = collections.defaultdict(int)
         self._arch = arch
 
@@ -360,7 +391,7 @@
     #        | 'log'
     #        | 'return' , single-constant
     #        ;
-    def _parse_action(self, tokens):
+    def parse_action(self, tokens):
         if not tokens:
             self._parser_state.error('missing action')
         action_token = tokens.pop(0)
@@ -400,12 +431,12 @@
             argument_expression = self.parse_argument_expression(tokens)
             if tokens and tokens[0].type == 'SEMICOLON':
                 tokens.pop(0)
-                action = self._parse_action(tokens)
+                action = self.parse_action(tokens)
             else:
                 action = bpf.Allow()
             return Filter(argument_expression, action)
         else:
-            return Filter(None, self._parse_action(tokens))
+            return Filter(None, self.parse_action(tokens))
 
     # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
     #        | single-filter
@@ -481,7 +512,7 @@
         return metadata
 
     # syscall-descriptor = syscall-name , [ metadata ]
-    #                    | libc-function , [ metadata ]
+    #                    | syscall-group-name , [ metadata ]
     #                    ;
     def _parse_syscall_descriptor(self, tokens):
         if not tokens:
@@ -490,11 +521,28 @@
         if syscall_descriptor.type != 'IDENTIFIER':
             self._parser_state.error(
                 'invalid syscall descriptor', token=syscall_descriptor)
-        # TODO(lhchavez): Support libc function names.
         if tokens and tokens[0].type == 'LBRACKET':
             metadata = self._parse_metadata(tokens)
             if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
                 return ()
+        if '@' in syscall_descriptor.value:
+            # This is a syscall group.
+            subtokens = syscall_descriptor.value.split('@')
+            if len(subtokens) != 2:
+                self._parser_state.error(
+                    'invalid syscall group name', token=syscall_descriptor)
+            syscall_group_name, syscall_namespace_name = subtokens
+            if syscall_namespace_name not in self._arch.syscall_groups:
+                self._parser_state.error(
+                    'nonexistent syscall group namespace',
+                    token=syscall_descriptor)
+            syscall_namespace = self._arch.syscall_groups[
+                syscall_namespace_name]
+            if syscall_group_name not in syscall_namespace:
+                self._parser_state.error(
+                    'nonexistent syscall group', token=syscall_descriptor)
+            return (Syscall(name, self._arch.syscalls[name])
+                    for name in syscall_namespace[syscall_group_name])
         if syscall_descriptor.value not in self._arch.syscalls:
             self._parser_state.error(
                 'nonexistent syscall', token=syscall_descriptor)
@@ -563,13 +611,7 @@
         try:
             frequency_mapping = collections.defaultdict(int)
             with open(filename) as frequency_file:
-                for line in frequency_file:
-                    self._parser_state.set_line(line.rstrip())
-                    tokens = self._parser_state.tokenize()
-
-                    if not tokens:
-                        continue
-
+                for tokens in self._parser_state.tokenize(frequency_file):
                     syscall_numbers = self._parse_syscall_descriptor(tokens)
                     if not tokens:
                         self._parser_state.error('missing colon')
@@ -636,14 +678,7 @@
         try:
             statements = []
             with open(filename) as policy_file:
-                for line in policy_file:
-                    self._parser_state.set_line(line.rstrip())
-                    tokens = self._parser_state.tokenize()
-
-                    if not tokens:
-                        # Allow empty lines.
-                        continue
-
+                for tokens in self._parser_state.tokenize(policy_file):
                     if tokens[0].type == 'INCLUDE':
                         statements.extend(
                             self._parse_include_statement(tokens))
@@ -676,8 +711,10 @@
         try:
             statements = [x for x in self._parse_policy_file(filename)]
         except RecursionError:
-            raise ParseException('recursion limit exceeded', filename,
-                                 self._parser_states[-1].line)
+            raise ParseException(
+                'recursion limit exceeded',
+                filename,
+                line=self._parser_states[-1].line)
 
         # Collapse statements into a single syscall-to-filter-list.
         syscall_filter_mapping = {}
@@ -691,6 +728,7 @@
                             []))
                     syscall_filter_mapping[syscall] = filter_statements[-1]
                 syscall_filter_mapping[syscall].filters.extend(filters)
+        default_action = self._override_default_action or self._default_action
         for filter_statement in filter_statements:
             unconditional_actions_suffix = list(
                 itertools.dropwhile(lambda filt: filt.expression is not None,
@@ -700,12 +738,14 @@
                 # to add another one.
                 continue
             if len(unconditional_actions_suffix) > 1:
-                raise ParseException(('Syscall %s (number %d) already had '
-                                      'an unconditional action applied') %
-                                     (filter_statement.syscall.name,
-                                      filter_statement.syscall.number),
-                                     filename, self._parser_states[-1].line)
+                raise ParseException(
+                    ('Syscall %s (number %d) already had '
+                     'an unconditional action applied') %
+                    (filter_statement.syscall.name,
+                     filter_statement.syscall.number),
+                    filename,
+                    line=self._parser_states[-1].line)
             assert not unconditional_actions_suffix
             filter_statement.filters.append(
-                Filter(expression=None, action=self._default_action))
-        return ParsedPolicy(self._default_action, filter_statements)
+                Filter(expression=None, action=default_action))
+        return ParsedPolicy(default_action, filter_statements)
diff --git a/tools/parser_unittest.py b/tools/parser_unittest.py
index 4fba590..e9f0ce2 100755
--- a/tools/parser_unittest.py
+++ b/tools/parser_unittest.py
@@ -40,8 +40,7 @@
     @staticmethod
     def _tokenize(line):
         parser_state = parser.ParserState('<memory>')
-        parser_state.set_line(line)
-        return parser_state.tokenize()
+        return list(parser_state.tokenize([line]))[0]
 
     def test_tokenize(self):
         """Accept valid tokens."""
@@ -105,8 +104,7 @@
 
     def _tokenize(self, line):
         # pylint: disable=protected-access
-        self.parser._parser_state.set_line(line)
-        return self.parser._parser_state.tokenize()
+        return list(self.parser._parser_state.tokenize([line]))[0]
 
     def test_parse_constant_unsigned(self):
         """Accept reasonably-sized unsigned constants."""
@@ -232,10 +230,15 @@
     def test_parse_empty_constant(self):
         """Reject parsing nothing."""
         with self.assertRaisesRegex(parser.ParseException, 'empty constant'):
-            self.parser.parse_value(self._tokenize(''))
+            self.parser.parse_value([])
         with self.assertRaisesRegex(parser.ParseException, 'empty constant'):
             self.parser.parse_value(self._tokenize('0|'))
 
+    def test_parse_invalid_constant(self):
+        """Reject parsing invalid constants."""
+        with self.assertRaisesRegex(parser.ParseException, 'invalid constant'):
+            self.parser.parse_value(self._tokenize('foo'))
+
 
 class ParseFilterExpressionTests(unittest.TestCase):
     """Tests for PolicyParser.parse_argument_expression."""
@@ -247,8 +250,7 @@
 
     def _tokenize(self, line):
         # pylint: disable=protected-access
-        self.parser._parser_state.set_line(line)
-        return self.parser._parser_state.tokenize()
+        return list(self.parser._parser_state.tokenize([line]))[0]
 
     def test_parse_argument_expression(self):
         """Accept valid argument expressions."""
@@ -287,6 +289,16 @@
             self.parser.parse_argument_expression(
                 self._tokenize('arg0 = 0xffff'))
 
+    def test_parse_missing_operator(self):
+        """Reject missing operator."""
+        with self.assertRaisesRegex(parser.ParseException, 'missing operator'):
+            self.parser.parse_argument_expression(self._tokenize('arg0'))
+
+    def test_parse_missing_operand(self):
+        """Reject missing operand."""
+        with self.assertRaisesRegex(parser.ParseException, 'empty constant'):
+            self.parser.parse_argument_expression(self._tokenize('arg0 =='))
+
 
 class ParseFilterTests(unittest.TestCase):
     """Tests for PolicyParser.parse_filter."""
@@ -298,8 +310,7 @@
 
     def _tokenize(self, line):
         # pylint: disable=protected-access
-        self.parser._parser_state.set_line(line)
-        return self.parser._parser_state.tokenize()
+        return list(self.parser._parser_state.tokenize([line]))[0]
 
     def test_parse_filter(self):
         """Accept valid filters."""
@@ -378,8 +389,7 @@
 
     def _tokenize(self, line):
         # pylint: disable=protected-access
-        self.parser._parser_state.set_line(line)
-        return self.parser._parser_state.tokenize()
+        return list(self.parser._parser_state.tokenize([line]))[0]
 
     def test_parse_filter_statement(self):
         """Accept valid filter statements."""
@@ -398,6 +408,24 @@
             ), [
                 parser.Filter([[parser.Atom(0, '==', 0)]], bpf.Allow()),
             ]))
+        self.assertEqual(
+            self.parser.parse_filter_statement(
+                self._tokenize('io@libc: arg0 == 0')),
+            parser.ParsedFilterStatement((
+                parser.Syscall('read', 0),
+                parser.Syscall('write', 1),
+            ), [
+                parser.Filter([[parser.Atom(0, '==', 0)]], bpf.Allow()),
+            ]))
+        self.assertEqual(
+            self.parser.parse_filter_statement(
+                self._tokenize('file-io@systemd: arg0 == 0')),
+            parser.ParsedFilterStatement((
+                parser.Syscall('read', 0),
+                parser.Syscall('write', 1),
+            ), [
+                parser.Filter([[parser.Atom(0, '==', 0)]], bpf.Allow()),
+            ]))
 
     def test_parse_metadata(self):
         """Accept valid filter statements with metadata."""
@@ -418,6 +446,11 @@
     def test_parse_unclosed_brace(self):
         """Reject unclosed brace."""
         with self.assertRaisesRegex(parser.ParseException, 'unclosed brace'):
+            self.parser.parse_filter(self._tokenize('{ allow'))
+
+    def test_parse_invalid_syscall_group(self):
+        """Reject invalid syscall groups."""
+        with self.assertRaisesRegex(parser.ParseException, 'unclosed brace'):
             self.parser.parse_filter_statement(
                 self._tokenize('{ read, write: arg0 == 0'))
 
@@ -484,6 +517,35 @@
                         ]),
                 ]))
 
+    def test_parse_multiline(self):
+        """Allow simple multi-line policy files."""
+        path = self._write_file(
+            'test.policy', """
+            # Comment.
+            read: \
+                allow
+            write: allow
+        """)
+
+        self.assertEqual(
+            self.parser.parse_file(path),
+            parser.ParsedPolicy(
+                default_action=bpf.KillProcess(),
+                filter_statements=[
+                    parser.FilterStatement(
+                        syscall=parser.Syscall('read', 0),
+                        frequency=1,
+                        filters=[
+                            parser.Filter(None, bpf.Allow()),
+                        ]),
+                    parser.FilterStatement(
+                        syscall=parser.Syscall('write', 1),
+                        frequency=1,
+                        filters=[
+                            parser.Filter(None, bpf.Allow()),
+                        ]),
+                ]))
+
     def test_parse_default(self):
         """Allow defining a default action."""
         path = self._write_file(
@@ -544,6 +606,28 @@
                         ]),
                 ]))
 
+    def test_parse_other_arch(self):
+        """Allow entries that only target another architecture."""
+        path = self._write_file(
+            'test.policy', """
+            # Comment.
+            read[arch=nonexistent]: allow
+            write: allow
+        """)
+
+        self.assertEqual(
+            self.parser.parse_file(path),
+            parser.ParsedPolicy(
+                default_action=bpf.KillProcess(),
+                filter_statements=[
+                    parser.FilterStatement(
+                        syscall=parser.Syscall('write', 1),
+                        frequency=1,
+                        filters=[
+                            parser.Filter(None, bpf.Allow()),
+                        ]),
+                ]))
+
     def test_parse_include(self):
         """Allow including policy files."""
         path = self._write_file(
@@ -582,6 +666,40 @@
                         ]),
                 ]))
 
+    def test_parse_invalid_include(self):
+        """Reject including invalid policy files."""
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'empty include path'):
+            path = self._write_file(
+                'test.policy', """
+                @include
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'invalid include path'):
+            path = self._write_file(
+                'test.policy', """
+                @include arg0
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'@include statement nested too deep'):
+            path = self._write_file(
+                'test.policy', """
+                @include ./test.policy
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'Could not @include .*'):
+            path = self._write_file(
+                'test.policy', """
+                @include ./nonexistent.policy
+            """)
+            self.parser.parse_file(path)
+
     def test_parse_frequency(self):
         """Allow including frequency files."""
         self._write_file(
@@ -608,6 +726,65 @@
                         ]),
                 ]))
 
+    def test_parse_invalid_frequency(self):
+        """Reject including invalid frequency files."""
+        path = self._write_file('test.policy',
+                                """@frequency ./test.frequency""")
+
+        with self.assertRaisesRegex(parser.ParseException, r'missing colon'):
+            self._write_file('test.frequency', """
+                read
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException, r'invalid colon'):
+            self._write_file('test.frequency', """
+                read foo
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException, r'missing number'):
+            self._write_file('test.frequency', """
+                read:
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException, r'invalid number'):
+            self._write_file('test.frequency', """
+                read: foo
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException, r'invalid number'):
+            self._write_file('test.frequency', """
+                read: -1
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'empty frequency path'):
+            path = self._write_file(
+                'test.policy', """
+                @frequency
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'invalid frequency path'):
+            path = self._write_file(
+                'test.policy', """
+                @frequency arg0
+            """)
+            self.parser.parse_file(path)
+
+        with self.assertRaisesRegex(parser.ParseException,
+                                    r'Could not open frequency file.*'):
+            path = self._write_file(
+                'test.policy', """
+                @frequency ./nonexistent.frequency
+            """)
+            self.parser.parse_file(path)
+
     def test_parse_multiple_unconditional(self):
         """Reject actions after an unconditional action."""
         path = self._write_file(
diff --git a/tools/testdata/arch_64.json b/tools/testdata/arch_64.json
index bd3e2f4..1286ee4 100644
--- a/tools/testdata/arch_64.json
+++ b/tools/testdata/arch_64.json
@@ -6,12 +6,278 @@
     "read": 0,
     "write": 1,
     "open": 2,
-    "close": 3
+    "close": 3,
+    "syscall_4": 4,
+    "syscall_5": 5,
+    "syscall_6": 6,
+    "syscall_7": 7,
+    "syscall_8": 8,
+    "syscall_9": 9,
+    "syscall_10": 10,
+    "syscall_11": 11,
+    "syscall_12": 12,
+    "syscall_13": 13,
+    "syscall_14": 14,
+    "syscall_15": 15,
+    "syscall_16": 16,
+    "syscall_17": 17,
+    "syscall_18": 18,
+    "syscall_19": 19,
+    "syscall_20": 20,
+    "syscall_21": 21,
+    "syscall_22": 22,
+    "syscall_23": 23,
+    "syscall_24": 24,
+    "syscall_25": 25,
+    "syscall_26": 26,
+    "syscall_27": 27,
+    "syscall_28": 28,
+    "syscall_29": 29,
+    "syscall_30": 30,
+    "syscall_31": 31,
+    "syscall_32": 32,
+    "syscall_33": 33,
+    "syscall_34": 34,
+    "syscall_35": 35,
+    "syscall_36": 36,
+    "syscall_37": 37,
+    "syscall_38": 38,
+    "syscall_39": 39,
+    "syscall_40": 40,
+    "syscall_41": 41,
+    "syscall_42": 42,
+    "syscall_43": 43,
+    "syscall_44": 44,
+    "syscall_45": 45,
+    "syscall_46": 46,
+    "syscall_47": 47,
+    "syscall_48": 48,
+    "syscall_49": 49,
+    "syscall_50": 50,
+    "syscall_51": 51,
+    "syscall_52": 52,
+    "syscall_53": 53,
+    "syscall_54": 54,
+    "syscall_55": 55,
+    "syscall_56": 56,
+    "syscall_57": 57,
+    "syscall_58": 58,
+    "syscall_59": 59,
+    "syscall_60": 60,
+    "syscall_61": 61,
+    "syscall_62": 62,
+    "syscall_63": 63,
+    "syscall_64": 64,
+    "syscall_65": 65,
+    "syscall_66": 66,
+    "syscall_67": 67,
+    "syscall_68": 68,
+    "syscall_69": 69,
+    "syscall_70": 70,
+    "syscall_71": 71,
+    "syscall_72": 72,
+    "syscall_73": 73,
+    "syscall_74": 74,
+    "syscall_75": 75,
+    "syscall_76": 76,
+    "syscall_77": 77,
+    "syscall_78": 78,
+    "syscall_79": 79,
+    "syscall_80": 80,
+    "syscall_81": 81,
+    "syscall_82": 82,
+    "syscall_83": 83,
+    "syscall_84": 84,
+    "syscall_85": 85,
+    "syscall_86": 86,
+    "syscall_87": 87,
+    "syscall_88": 88,
+    "syscall_89": 89,
+    "syscall_90": 90,
+    "syscall_91": 91,
+    "syscall_92": 92,
+    "syscall_93": 93,
+    "syscall_94": 94,
+    "syscall_95": 95,
+    "syscall_96": 96,
+    "syscall_97": 97,
+    "syscall_98": 98,
+    "syscall_99": 99,
+    "syscall_100": 100,
+    "syscall_101": 101,
+    "syscall_102": 102,
+    "syscall_103": 103,
+    "syscall_104": 104,
+    "syscall_105": 105,
+    "syscall_106": 106,
+    "syscall_107": 107,
+    "syscall_108": 108,
+    "syscall_109": 109,
+    "syscall_110": 110,
+    "syscall_111": 111,
+    "syscall_112": 112,
+    "syscall_113": 113,
+    "syscall_114": 114,
+    "syscall_115": 115,
+    "syscall_116": 116,
+    "syscall_117": 117,
+    "syscall_118": 118,
+    "syscall_119": 119,
+    "syscall_120": 120,
+    "syscall_121": 121,
+    "syscall_122": 122,
+    "syscall_123": 123,
+    "syscall_124": 124,
+    "syscall_125": 125,
+    "syscall_126": 126,
+    "syscall_127": 127,
+    "syscall_128": 128,
+    "syscall_129": 129,
+    "syscall_130": 130,
+    "syscall_131": 131,
+    "syscall_132": 132,
+    "syscall_133": 133,
+    "syscall_134": 134,
+    "syscall_135": 135,
+    "syscall_136": 136,
+    "syscall_137": 137,
+    "syscall_138": 138,
+    "syscall_139": 139,
+    "syscall_140": 140,
+    "syscall_141": 141,
+    "syscall_142": 142,
+    "syscall_143": 143,
+    "syscall_144": 144,
+    "syscall_145": 145,
+    "syscall_146": 146,
+    "syscall_147": 147,
+    "syscall_148": 148,
+    "syscall_149": 149,
+    "syscall_150": 150,
+    "syscall_151": 151,
+    "syscall_152": 152,
+    "syscall_153": 153,
+    "syscall_154": 154,
+    "syscall_155": 155,
+    "syscall_156": 156,
+    "syscall_157": 157,
+    "syscall_158": 158,
+    "syscall_159": 159,
+    "syscall_160": 160,
+    "syscall_161": 161,
+    "syscall_162": 162,
+    "syscall_163": 163,
+    "syscall_164": 164,
+    "syscall_165": 165,
+    "syscall_166": 166,
+    "syscall_167": 167,
+    "syscall_168": 168,
+    "syscall_169": 169,
+    "syscall_170": 170,
+    "syscall_171": 171,
+    "syscall_172": 172,
+    "syscall_173": 173,
+    "syscall_174": 174,
+    "syscall_175": 175,
+    "syscall_176": 176,
+    "syscall_177": 177,
+    "syscall_178": 178,
+    "syscall_179": 179,
+    "syscall_180": 180,
+    "syscall_181": 181,
+    "syscall_182": 182,
+    "syscall_183": 183,
+    "syscall_184": 184,
+    "syscall_185": 185,
+    "syscall_186": 186,
+    "syscall_187": 187,
+    "syscall_188": 188,
+    "syscall_189": 189,
+    "syscall_190": 190,
+    "syscall_191": 191,
+    "syscall_192": 192,
+    "syscall_193": 193,
+    "syscall_194": 194,
+    "syscall_195": 195,
+    "syscall_196": 196,
+    "syscall_197": 197,
+    "syscall_198": 198,
+    "syscall_199": 199,
+    "syscall_200": 200,
+    "syscall_201": 201,
+    "syscall_202": 202,
+    "syscall_203": 203,
+    "syscall_204": 204,
+    "syscall_205": 205,
+    "syscall_206": 206,
+    "syscall_207": 207,
+    "syscall_208": 208,
+    "syscall_209": 209,
+    "syscall_210": 210,
+    "syscall_211": 211,
+    "syscall_212": 212,
+    "syscall_213": 213,
+    "syscall_214": 214,
+    "syscall_215": 215,
+    "syscall_216": 216,
+    "syscall_217": 217,
+    "syscall_218": 218,
+    "syscall_219": 219,
+    "syscall_220": 220,
+    "syscall_221": 221,
+    "syscall_222": 222,
+    "syscall_223": 223,
+    "syscall_224": 224,
+    "syscall_225": 225,
+    "syscall_226": 226,
+    "syscall_227": 227,
+    "syscall_228": 228,
+    "syscall_229": 229,
+    "syscall_230": 230,
+    "syscall_231": 231,
+    "syscall_232": 232,
+    "syscall_233": 233,
+    "syscall_234": 234,
+    "syscall_235": 235,
+    "syscall_236": 236,
+    "syscall_237": 237,
+    "syscall_238": 238,
+    "syscall_239": 239,
+    "syscall_240": 240,
+    "syscall_241": 241,
+    "syscall_242": 242,
+    "syscall_243": 243,
+    "syscall_244": 244,
+    "syscall_245": 245,
+    "syscall_246": 246,
+    "syscall_247": 247,
+    "syscall_248": 248,
+    "syscall_249": 249,
+    "syscall_250": 250,
+    "syscall_251": 251,
+    "syscall_252": 252,
+    "syscall_253": 253,
+    "syscall_254": 254,
+    "syscall_255": 255
   },
   "constants": {
     "ENOSYS": 38,
     "O_RDONLY": 0,
     "PROT_WRITE": 2,
     "PROT_EXEC": 4
+  },
+  "syscall_groups": {
+    "libc": {
+      "io": [
+        "read",
+        "write"
+      ]
+    },
+    "systemd": {
+      "file-io": [
+        "read",
+        "write"
+      ]
+    }
   }
 }