syscall_filter: Add a small operand optimization

Since all <, <=, >, >= operands are unsigned, when the immediate fits in
32-bits (which should be the vast majority of the time), we can omit one
of the comparison that would normally occur. So, for

 arg1 >= K

That would be roughly translated to

 if (hi(arg1) > hi(K)) jump NEXT;
 if (hi(arg1) == hi(K) && lo(arg1) >= lo(K)) jump NEXT;
 jump KILL;

If the first check (|hi(arg1) > hi(K)|) fails, we then evaluate the
whole second expression. If |hi(K) == 0|, then the only value of
|hi(arg1)| for which it would fail would be if |hi(arg1) == 0|, so we
don't need to evaluate |hi(arg1) == hi(K)| at all, since we know that
it's always going to be true. In other words,

 // given that |hi(K) == 0|,
 if (hi(arg1) > 0) jump NEXT;
 // if the code gets here, |hi(arg1) == 0|.
 if (lo(arg1) >= lo(K)) jump NEXT;
 jump KILL;

The case for > is identical, and </<= get translated into >/>= since
cBPF only supports the latter two operators, which concludes the
proof of correctness for this optimization.

This saves one opcode.

Bug: 111726641
Test: make tests
Test: echo 'read: arg1 <= 0xbadc0ffee0ddf00d' | \
      ./parse_seccomp_policy --dump - | \
      ./libseccomp/tools/scmp_bpf_disasm
Test: echo 'read: arg1 <= 0xff' | ./parse_seccomp_policy --dump - | \
      ./libseccomp/tools/scmp_bpf_disasm

Change-Id: Ia00362ce92ff5e858c7366dab013e2db88c09818
diff --git a/bpf.c b/bpf.c
index f4b980b..3697f1c 100644
--- a/bpf.c
+++ b/bpf.c
@@ -121,8 +121,15 @@
 	struct sock_filter *curr_block = filter;
 
 	/* bpf_load_arg leaves |hi| in A. */
-	curr_block += bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
-	curr_block += bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	if (hi == 0) {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(2) + jt, NEXT);
+	} else {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
+		curr_block +=
+		    bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	}
 	set_bpf_stmt(curr_block++, BPF_LD + BPF_MEM, 0); /* swap in |lo| */
 	curr_block += bpf_comp_jgt32(curr_block, lo, jt, jf);
 
@@ -138,8 +145,15 @@
 	struct sock_filter *curr_block = filter;
 
 	/* bpf_load_arg leaves |hi| in A. */
-	curr_block += bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
-	curr_block += bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	if (hi == 0) {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(2) + jt, NEXT);
+	} else {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
+		curr_block +=
+		    bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	}
 	set_bpf_stmt(curr_block++, BPF_LD + BPF_MEM, 0); /* swap in |lo| */
 	curr_block += bpf_comp_jge32(curr_block, lo, jt, jf);
 
@@ -190,7 +204,7 @@
 	return bpf_comp_jset(filter, negative_mask, jf, jt);
 }
 
-static size_t bpf_arg_comp_len(int op)
+static size_t bpf_arg_comp_len(int op, unsigned long c attribute_unused)
 {
 	/* The comparisons that use gt/ge internally may have extra opcodes. */
 	switch (op) {
@@ -198,6 +212,13 @@
 	case LE:
 	case GT:
 	case GE:
+#if defined(BITS64)
+		/*
+		 * |c| can only have a high 32-bit part when running on 64 bits.
+		 */
+		if ((c >> 32) == 0)
+			return BPF_ARG_SHORT_GT_GE_COMP_LEN + 1;
+#endif
 		return BPF_ARG_GT_GE_COMP_LEN + 1;
 	default:
 		return BPF_ARG_COMP_LEN + 1;
@@ -207,7 +228,7 @@
 size_t bpf_arg_comp(struct sock_filter **pfilter, int op, int argidx,
 		    unsigned long c, unsigned int label_id)
 {
-	size_t filter_len = bpf_arg_comp_len(op);
+	size_t filter_len = bpf_arg_comp_len(op, c);
 	struct sock_filter *filter =
 	    calloc(filter_len, sizeof(struct sock_filter));
 	struct sock_filter *curr_block = filter;
diff --git a/bpf.h b/bpf.h
index 9c78cb3..db66223 100644
--- a/bpf.h
+++ b/bpf.h
@@ -68,10 +68,12 @@
  * On 32 bits, comparisons take 2 instructions: 1 for loading the argument,
  * 1 for the actual comparison.
  */
-#define BPF_LOAD_ARG_LEN	1U
-#define BPF_COMP_LEN		1U
-#define BPF_GT_GE_COMP_LEN	1U
+#define BPF_LOAD_ARG_LEN		1U
+#define BPF_COMP_LEN			1U
+#define BPF_SHORT_GT_GE_COMP_LEN	1U
+#define BPF_GT_GE_COMP_LEN		1U
 #define BPF_ARG_COMP_LEN (BPF_LOAD_ARG_LEN + BPF_COMP_LEN)
+#define BPF_ARG_SHORT_GT_GE_COMP_LEN (BPF_LOAD_ARG_LEN + BPF_SHORT_GT_GE_COMP_LEN)
 #define BPF_ARG_GT_GE_COMP_LEN (BPF_LOAD_ARG_LEN + BPF_GT_GE_COMP_LEN)
 
 #define bpf_comp_jeq bpf_comp_jeq32
@@ -86,10 +88,12 @@
  * On 64 bits, comparisons take 7-8 instructions: 4 for loading the argument,
  * and 3-4 for the actual comparison.
  */
-#define BPF_LOAD_ARG_LEN	4U
-#define BPF_COMP_LEN		3U
-#define BPF_GT_GE_COMP_LEN	4U
+#define BPF_LOAD_ARG_LEN		4U
+#define BPF_COMP_LEN			3U
+#define BPF_SHORT_GT_GE_COMP_LEN	3U
+#define BPF_GT_GE_COMP_LEN		4U
 #define BPF_ARG_COMP_LEN (BPF_LOAD_ARG_LEN + BPF_COMP_LEN)
+#define BPF_ARG_SHORT_GT_GE_COMP_LEN (BPF_LOAD_ARG_LEN + BPF_SHORT_GT_GE_COMP_LEN)
 #define BPF_ARG_GT_GE_COMP_LEN (BPF_LOAD_ARG_LEN + BPF_GT_GE_COMP_LEN)
 
 #define bpf_comp_jeq bpf_comp_jeq64
diff --git a/syscall_filter_unittest.cc b/syscall_filter_unittest.cc
index 36f7f1f..dc0b8be 100644
--- a/syscall_filter_unittest.cc
+++ b/syscall_filter_unittest.cc
@@ -410,13 +410,56 @@
   free_block_list(block);
 }
 
-TEST_F(ArgFilterTest, arg0_gt_ge_comparisons) {
+TEST_F(ArgFilterTest, arg0_short_gt_ge_comparisons) {
   for (const char* fragment :
        {"arg1 < 0xff", "arg1 <= 0xff", "arg1 > 0xff", "arg1 >= 0xff"}) {
     struct filter_block* block =
         compile_policy_line(&state_, nr_, fragment, id_, &labels_, NO_LOGGING);
 
     ASSERT_NE(block, nullptr);
+    size_t exp_total_len = 1 + (BPF_ARG_SHORT_GT_GE_COMP_LEN + 1) + 2 + 1 + 2;
+    EXPECT_EQ(block->total_len, exp_total_len);
+
+    // First block is a label.
+    struct filter_block* curr_block = block;
+    ASSERT_NE(curr_block, nullptr);
+    EXPECT_EQ(curr_block->len, 1U);
+    EXPECT_LBL(curr_block->instrs);
+
+    // Second block is a short gt/ge comparison.
+    curr_block = curr_block->next;
+    EXPECT_SHORT_GT_GE_COMP(curr_block);
+
+    // Third block is a jump and a label (end of AND group).
+    curr_block = curr_block->next;
+    ASSERT_NE(curr_block, nullptr);
+    EXPECT_GROUP_END(curr_block);
+
+    // Fourth block is SECCOMP_RET_KILL.
+    curr_block = curr_block->next;
+    ASSERT_NE(curr_block, nullptr);
+    EXPECT_KILL(curr_block);
+
+    // Fifth block is "SUCCESS" label and SECCOMP_RET_ALLOW.
+    curr_block = curr_block->next;
+    ASSERT_NE(curr_block, nullptr);
+    EXPECT_ALLOW(curr_block);
+
+    EXPECT_EQ(curr_block->next, nullptr);
+
+    free_block_list(block);
+  }
+}
+
+#if defined(BITS64)
+TEST_F(ArgFilterTest, arg0_long_gt_ge_comparisons) {
+  for (const char* fragment :
+       {"arg1 < 0xbadc0ffee0ddf00d", "arg1 <= 0xbadc0ffee0ddf00d",
+        "arg1 > 0xbadc0ffee0ddf00d", "arg1 >= 0xbadc0ffee0ddf00d"}) {
+    struct filter_block* block =
+        compile_policy_line(&state_, nr_, fragment, id_, &labels_, NO_LOGGING);
+
+    ASSERT_NE(block, nullptr);
     size_t exp_total_len = 1 + (BPF_ARG_GT_GE_COMP_LEN + 1) + 2 + 1 + 2;
     EXPECT_EQ(block->total_len, exp_total_len);
 
@@ -450,6 +493,7 @@
     free_block_list(block);
   }
 }
+#endif
 
 TEST_F(ArgFilterTest, arg0_mask) {
   const char *fragment = "arg1 & O_RDWR";
diff --git a/syscall_filter_unittest_macros.h b/syscall_filter_unittest_macros.h
index 8c3d234..85cedea 100644
--- a/syscall_filter_unittest_macros.h
+++ b/syscall_filter_unittest_macros.h
@@ -33,6 +33,12 @@
 	EXPECT_EQ((_block)->instrs->code, BPF_LD+BPF_W+BPF_ABS);	\
 } while (0)
 
+#define EXPECT_SHORT_GT_GE_COMP(_block) \
+do {	\
+	EXPECT_EQ((_block)->len, BPF_ARG_SHORT_GT_GE_COMP_LEN + 1);	\
+	EXPECT_EQ((_block)->instrs->code, BPF_LD+BPF_W+BPF_ABS);	\
+} while (0)
+
 #define EXPECT_LBL(_block) \
 do {	\
 	EXPECT_TRUE((_block)->code == (BPF_JMP+BPF_JA));	\