Snap for 5735642 from 52f6adabb6f44761cc23a226cc396bd5c664641c to sdk-release
Change-Id: Id6fd2c3ae12f9877e043e4559e92fee6f77f734b
diff --git a/Makefile b/Makefile
index b2c5569..4bfa820 100644
--- a/Makefile
+++ b/Makefile
@@ -46,8 +46,10 @@
GTEST_CXXFLAGS := -std=gnu++14
GTEST_LIBS := gtest.a
else
-GTEST_CXXFLAGS := $(shell gtest-config --cxxflags)
-GTEST_LIBS := $(shell gtest-config --libs)
+GTEST_CXXFLAGS := $(shell gtest-config --cxxflags 2>/dev/null || \
+ echo "-pthread")
+GTEST_LIBS := $(shell gtest-config --libs 2>/dev/null || \
+ echo "-lgtest -pthread -lpthread")
endif
CORE_OBJECT_FILES := libminijail.o syscall_filter.o signal_handler.o \
diff --git a/bpf.h b/bpf.h
index db66223..9404c94 100644
--- a/bpf.h
+++ b/bpf.h
@@ -48,6 +48,7 @@
#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
#define SECCOMP_RET_TRAP 0x00030000U /* return SIGSYS */
#define SECCOMP_RET_ERRNO 0x00050000U /* return -1 and set errno */
+#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
#define SECCOMP_RET_DATA 0x0000ffffU /* mask for return value */
@@ -172,6 +173,9 @@
set_bpf_stmt((_block), BPF_RET+BPF_K, \
SECCOMP_RET_ERRNO | ((_errno) & SECCOMP_RET_DATA))
+#define set_bpf_ret_log(_block) \
+ set_bpf_stmt((_block), BPF_RET+BPF_K, SECCOMP_RET_LOG)
+
#define set_bpf_ret_allow(_block) \
set_bpf_stmt((_block), BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
diff --git a/examples/cat.policy b/examples/cat.policy
index dea3dd3..25b4c29 100644
--- a/examples/cat.policy
+++ b/examples/cat.policy
@@ -5,10 +5,12 @@
read: 1
write: 1
+restart_syscall: 1
rt_sigreturn: 1
exit_group: 1
open: 1
+openat: 1
close: 1
fstat: 1
# Enforce W^X.
diff --git a/libminijail.c b/libminijail.c
index a5e4073..43de3c4 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -382,6 +382,17 @@
die("minijail_set_seccomp_filter_tsync() must be called "
"before minijail_parse_seccomp_filters()");
}
+
+ if (j->flags.seccomp_filter_logging && !seccomp_ret_log_available()) {
+ /*
+ * If SECCOMP_RET_LOG is not available, we don't want to use
+ * SECCOMP_RET_TRAP to both kill the entire process and report
+ * failing syscalls, since it will be brittle. Just bail.
+ */
+ die("SECCOMP_RET_LOG not available, cannot use logging with "
+ "thread sync at the same time");
+ }
+
j->flags.seccomp_filter_tsync = 1;
}
@@ -391,11 +402,23 @@
die("minijail_log_seccomp_filter_failures() must be called "
"before minijail_parse_seccomp_filters()");
}
-#ifdef ALLOW_DEBUG_LOGGING
- j->flags.seccomp_filter_logging = 1;
-#else
- warn("non-debug build: ignoring request to enable seccomp logging");
-#endif
+
+ if (j->flags.seccomp_filter_tsync && !seccomp_ret_log_available()) {
+ /*
+ * If SECCOMP_RET_LOG is not available, we don't want to use
+ * SECCOMP_RET_TRAP to both kill the entire process and report
+ * failing syscalls, since it will be brittle. Just bail.
+ */
+ die("SECCOMP_RET_LOG not available, cannot use thread sync with "
+ "logging at the same time");
+ }
+
+ if (debug_logging_allowed()) {
+ j->flags.seccomp_filter_logging = 1;
+ } else {
+ warn("non-debug build: ignoring request to enable seccomp "
+ "logging");
+ }
}
void API minijail_use_caps(struct minijail *j, uint64_t capmask)
@@ -932,12 +955,17 @@
}
static int set_seccomp_filters_internal(struct minijail *j,
- struct sock_fprog *filter, bool owned)
+ const struct sock_fprog *filter,
+ bool owned)
{
struct sock_fprog *fprog;
if (owned) {
- fprog = filter;
+ /*
+ * If |owned| is true, it's OK to cast away the const-ness since
+ * we'll own the pointer going forward.
+ */
+ fprog = (struct sock_fprog *)filter;
} else {
fprog = malloc(sizeof(struct sock_fprog));
if (!fprog)
@@ -962,45 +990,48 @@
return 0;
}
-void API minijail_set_seccomp_filters(struct minijail *j,
- const struct sock_fprog *filter)
-{
- if (!seccomp_should_use_filters(j))
- return;
-
- if (j->flags.seccomp_filter_logging) {
- die("minijail_log_seccomp_filter_failures() is incompatible "
- "with minijail_set_seccomp_filters()");
- }
-
- /*
- * set_seccomp_filters_internal() can only fail with ENOMEM.
- * Furthermore, since we won't own the incoming filter, it will not be
- * modified.
- */
- if (set_seccomp_filters_internal(j, (struct sock_fprog *)filter,
- false) < 0) {
- die("failed to copy seccomp filter");
- }
-}
-
static int parse_seccomp_filters(struct minijail *j, const char *filename,
FILE *policy_file)
{
struct sock_fprog *fprog = malloc(sizeof(struct sock_fprog));
if (!fprog)
return -ENOMEM;
- int use_ret_trap =
- j->flags.seccomp_filter_tsync || j->flags.seccomp_filter_logging;
- int allow_logging = j->flags.seccomp_filter_logging;
- if (compile_filter(filename, policy_file, fprog, use_ret_trap,
- allow_logging)) {
+ struct filter_options filteropts;
+
+ /*
+ * Figure out filter options.
+ * Allow logging?
+ */
+ filteropts.allow_logging =
+ debug_logging_allowed() && j->flags.seccomp_filter_logging;
+
+ /* What to do on a blocked system call? */
+ if (filteropts.allow_logging) {
+ if (seccomp_ret_log_available())
+ filteropts.action = ACTION_RET_LOG;
+ else
+ filteropts.action = ACTION_RET_TRAP;
+ } else {
+ if (j->flags.seccomp_filter_tsync)
+ filteropts.action = ACTION_RET_TRAP;
+ else
+ filteropts.action = ACTION_RET_KILL;
+ }
+
+ /*
+ * If SECCOMP_RET_LOG is not available, need to allow extra syscalls
+ * for logging.
+ */
+ filteropts.allow_syscalls_for_logging =
+ filteropts.allow_logging && !seccomp_ret_log_available();
+
+ if (compile_filter(filename, policy_file, fprog, &filteropts)) {
free(fprog);
return -1;
}
- return set_seccomp_filters_internal(j, fprog, true);
+ return set_seccomp_filters_internal(j, fprog, true /* owned */);
}
void API minijail_parse_seccomp_filters(struct minijail *j, const char *path)
@@ -1048,6 +1079,27 @@
fclose(file);
}
+void API minijail_set_seccomp_filters(struct minijail *j,
+ const struct sock_fprog *filter)
+{
+ if (!seccomp_should_use_filters(j))
+ return;
+
+ if (j->flags.seccomp_filter_logging) {
+ die("minijail_log_seccomp_filter_failures() is incompatible "
+ "with minijail_set_seccomp_filters()");
+ }
+
+ /*
+ * set_seccomp_filters_internal() can only fail with ENOMEM.
+ * Furthermore, since we won't own the incoming filter, it will not be
+ * modified.
+ */
+ if (set_seccomp_filters_internal(j, filter, false /* owned */) < 0) {
+ die("failed to set seccomp filter");
+ }
+}
+
int API minijail_use_alt_syscall(struct minijail *j, const char *table)
{
j->alt_syscall_table = strdup(table);
@@ -1959,13 +2011,16 @@
if (j->flags.seccomp_filter) {
if (j->flags.seccomp_filter_logging) {
- /*
- * If logging seccomp filter failures,
- * install the SIGSYS handler first.
- */
- if (install_sigsys_handler())
- pdie("failed to install SIGSYS handler");
warn("logging seccomp filter failures");
+ if (!seccomp_ret_log_available()) {
+ /*
+ * If SECCOMP_RET_LOG is not available,
+ * install the SIGSYS handler first.
+ */
+ if (install_sigsys_handler())
+ pdie(
+ "failed to install SIGSYS handler");
+ }
} else if (j->flags.seccomp_filter_tsync) {
/*
* If setting thread sync,
@@ -2745,7 +2800,7 @@
if (use_preload) {
free(oldenv_copy);
}
- die("failed to fork child");
+ pdie("failed to fork child");
}
if (child_pid) {
diff --git a/libminijail_unittest.cc b/libminijail_unittest.cc
index d6896bf..115b3ae 100644
--- a/libminijail_unittest.cc
+++ b/libminijail_unittest.cc
@@ -355,9 +355,9 @@
EXPECT_EQ(mj_run_ret, 0);
read_ret = read(child_stdout, buf, sizeof(buf));
- EXPECT_GE(read_ret, (int)testvar_len);
+ EXPECT_EQ(read_ret, (int)testvar_len + 2);
- EXPECT_EQ("|test\n", std::string(buf));
+ EXPECT_EQ("|test\n", std::string(buf, 0, testvar_len + 2));
EXPECT_EQ(waitpid(pid, &status, 0), pid);
ASSERT_TRUE(WIFEXITED(status));
@@ -667,6 +667,38 @@
minijail_destroy(j);
}
+TEST_F(NamespaceTest, test_bind_ro_with_pivot_root) {
+ if (!userns_supported_) {
+ SUCCEED();
+ return;
+ }
+ struct minijail *j = minijail_new();
+ minijail_namespace_cgroups(j);
+ minijail_namespace_net(j);
+ minijail_namespace_pids(j);
+ minijail_namespace_user(j);
+ minijail_namespace_uts(j);
+ minijail_namespace_vfs(j);
+
+ minijail_use_caps(j, 0);
+ minijail_no_new_privs(j);
+ minijail_namespace_user_disable_setgroups(j);
+
+ minijail_run_as_init(j);
+ minijail_enter_pivot_root(j, "/tmp");
+
+ // /dev/shm is usually mounted nosuid,nodev.
+ minijail_bind(j, "/dev/shm", "/", false);
+
+ pid_t p = minijail_fork(j);
+ if (p) {
+ EXPECT_EQ(0, minijail_wait(j));
+ minijail_destroy(j);
+ return;
+ }
+ exit(0);
+}
+
TEST_F(NamespaceTest, test_namespaces) {
constexpr char teststr[] = "test\n";
diff --git a/minijail0.1 b/minijail0.1
index 0fbf38e..d23f14a 100644
--- a/minijail0.1
+++ b/minijail0.1
@@ -18,6 +18,8 @@
If \fIdest\fR is not specified, it will default to \fIsrc\fR.
If the destination does not exist, it will be created as a file or directory
based on the \fIsrc\fR type (including missing parent directories).
+To create a writable bind-mount set \fIwritable\fR to \fB1\fR. If not specified
+it will default to \fB0\fR (read-only).
.TP
\fB-B <mask>\fR
Skip setting securebits in \fImask\fR when restricting capabilities (\fB-c\fR).
@@ -155,9 +157,13 @@
namespace independent.
.TP
\fB-L\fR
-Report blocked syscalls to syslog when using seccomp filter. This option will
-force certain syscalls to be allowed in order to achieve this, depending on the
-system.
+Report blocked syscalls when using a seccomp filter. On kernels with support for
+SECCOMP_RET_LOG, every blocked syscall will be reported through the audit
+subsystem (see \fBseccomp\fR(2) for more details on SECCOMP_RET_LOG
+availability.) On all other kernels, the first failing syscall will be logged to
+syslog. This latter case will also force certain syscalls to be allowed in order
+to write to syslog. Note: this option is disabled and ignored for release
+builds.
.TP
\fB-m[<uid> <loweruid> <count>[,<uid> <loweruid> <count>]]\fR
Set the uid mapping of a user namespace (implies \fB-pU\fR). Same arguments as
@@ -343,4 +349,4 @@
Copyright \(co 2011 The Chromium OS Authors
License BSD-like.
.SH "SEE ALSO"
-\fBlibminijail.h\fR \fBminijail0\fR(5)
+\fBlibminijail.h\fR \fBminijail0\fR(5) \fBseccomp\fR(2)
diff --git a/minijail0_cli.c b/minijail0_cli.c
index 807e567..1b92b09 100644
--- a/minijail0_cli.c
+++ b/minijail0_cli.c
@@ -139,9 +139,16 @@
}
if (dest == NULL || dest[0] == '\0')
dest = src;
- if (flags == NULL || flags[0] == '\0')
- flags = "0";
- if (minijail_bind(j, src, dest, atoi(flags))) {
+ int writable;
+ if (flags == NULL || flags[0] == '\0' || !strcmp(flags, "0"))
+ writable = 0;
+ else if (!strcmp(flags, "1"))
+ writable = 1;
+ else {
+ fprintf(stderr, "Bad value for <writable>: %s\n", flags);
+ exit(1);
+ }
+ if (minijail_bind(j, src, dest, writable)) {
fprintf(stderr, "minijail_bind failed.\n");
exit(1);
}
@@ -498,8 +505,9 @@
" -K: Do not change share mode of any existing mounts.\n"
" -K<mode>: Mark all existing mounts as <mode> instead of MS_PRIVATE.\n"
" -l: Enter new IPC namespace.\n"
- " -L: Report blocked syscalls to syslog when using seccomp filter.\n"
- " Forces the following syscalls to be allowed:\n"
+ " -L: Report blocked syscalls when using seccomp filter.\n"
+ " If the kernel does not support SECCOMP_RET_LOG,\n"
+ " forces the following syscalls to be allowed:\n"
" ", progn);
/* clang-format on */
for (i = 0; i < log_syscalls_len; i++)
diff --git a/minijail0_cli_unittest.cc b/minijail0_cli_unittest.cc
index a00541a..0d6a07d 100644
--- a/minijail0_cli_unittest.cc
+++ b/minijail0_cli_unittest.cc
@@ -405,6 +405,10 @@
// Missing mount namespace/etc...
argv = {"-b", "/", "/bin/sh"};
ASSERT_EXIT(parse_args_(argv), testing::ExitedWithCode(1), "");
+
+ // Bad value for <writable>.
+ argv = {"-b", "/,,writable", "/bin/sh"};
+ ASSERT_EXIT(parse_args_(argv), testing::ExitedWithCode(1), "");
}
// Valid calls to the mount option.
diff --git a/parse_seccomp_policy.cc b/parse_seccomp_policy.cc
index c563a4a..38fcbee 100644
--- a/parse_seccomp_policy.cc
+++ b/parse_seccomp_policy.cc
@@ -78,8 +78,14 @@
if (!f)
pdie("fopen(%s) failed", argv[1]);
+ struct filter_options fopts {
+ .action = ACTION_RET_KILL,
+ .allow_logging = 0,
+ .allow_syscalls_for_logging = 0,
+ };
+
struct sock_fprog fp;
- int res = compile_filter(argv[1], f, &fp, 0, 0);
+ int res = compile_filter(argv[1], f, &fp, &fopts);
fclose(f);
if (res != 0)
die("compile_filter failed");
diff --git a/syscall_filter.c b/syscall_filter.c
index 049797a..3b78f97 100644
--- a/syscall_filter.c
+++ b/syscall_filter.c
@@ -137,6 +137,13 @@
append_filter_block(head, filter, ONE_INSTR);
}
+void append_ret_log(struct filter_block *head)
+{
+ struct sock_filter *filter = new_instr_buf(ONE_INSTR);
+ set_bpf_ret_log(filter);
+ append_filter_block(head, filter, ONE_INSTR);
+}
+
void append_allow_syscall(struct filter_block *head, int nr)
{
struct sock_filter *filter = new_instr_buf(ALLOW_SYSCALL_LEN);
@@ -269,7 +276,7 @@
}
int compile_errno(struct parser_state *state, struct filter_block *head,
- char *ret_errno, int use_ret_trap)
+ char *ret_errno, enum block_action action)
{
char *errno_ptr = NULL;
@@ -292,10 +299,17 @@
append_ret_errno(head, errno_val);
} else {
- if (!use_ret_trap)
+ switch (action) {
+ case ACTION_RET_KILL:
append_ret_kill(head);
- else
+ break;
+ case ACTION_RET_TRAP:
append_ret_trap(head);
+ break;
+ case ACTION_RET_LOG:
+ compiler_warn(state, "invalid action: ACTION_RET_LOG");
+ return -1;
+ }
}
return 0;
}
@@ -304,7 +318,7 @@
const char *policy_line,
unsigned int entry_lbl_id,
struct bpf_labels *labels,
- int use_ret_trap)
+ enum block_action action)
{
/*
* |policy_line| should be an expression of the form:
@@ -368,7 +382,7 @@
/* Checks whether we're unconditionally blocking this syscall. */
if (strncmp(line, "return", strlen("return")) == 0) {
- if (compile_errno(state, head, line, use_ret_trap) < 0) {
+ if (compile_errno(state, head, line, action) < 0) {
free_block_list(head);
free(line);
return NULL;
@@ -423,16 +437,23 @@
* otherwise just kill the task.
*/
if (ret_errno) {
- if (compile_errno(state, head, ret_errno, use_ret_trap) < 0) {
+ if (compile_errno(state, head, ret_errno, action) < 0) {
free_block_list(head);
free(line);
return NULL;
}
} else {
- if (!use_ret_trap)
+ switch (action) {
+ case ACTION_RET_KILL:
append_ret_kill(head);
- else
+ break;
+ case ACTION_RET_TRAP:
append_ret_trap(head);
+ break;
+ case ACTION_RET_LOG:
+ append_ret_log(head);
+ break;
+ }
}
/*
@@ -535,12 +556,13 @@
memcpy(&line[ret + 1], next_line, next_ret + 1);
free(next_line);
*lineptr = line;
- return ret;
+ return *n - 1;
}
int compile_file(const char *filename, FILE *policy_file,
struct filter_block *head, struct filter_block **arg_blocks,
- struct bpf_labels *labels, int use_ret_trap, int allow_logging,
+ struct bpf_labels *labels,
+ const struct filter_options *filteropts,
unsigned int include_level)
{
/* clang-format off */
@@ -594,8 +616,7 @@
goto free_line;
}
if (compile_file(filename, included_file, head,
- arg_blocks, labels, use_ret_trap,
- allow_logging,
+ arg_blocks, labels, filteropts,
include_level + 1) == -1) {
compiler_warn(&state, "'@include %s' failed",
filename);
@@ -631,7 +652,7 @@
if (nr < 0) {
compiler_warn(&state, "nonexistent syscall '%s'",
syscall_name);
- if (allow_logging) {
+ if (filteropts->allow_logging) {
/*
* If we're logging failures, assume we're in a
* debugging case and continue.
@@ -669,14 +690,16 @@
append_filter_block(head, nr_comp, ALLOW_SYSCALL_LEN);
/* Build the arg filter block. */
- struct filter_block *block = compile_policy_line(
- &state, nr, policy_line, id, labels, use_ret_trap);
+ struct filter_block *block =
+ compile_policy_line(&state, nr, policy_line, id,
+ labels, filteropts->action);
if (!block) {
if (*arg_blocks) {
free_block_list(*arg_blocks);
*arg_blocks = NULL;
}
+ warn("could not allocate filter block");
ret = -1;
goto free_line;
}
@@ -695,6 +718,7 @@
free_block_list(*arg_blocks);
*arg_blocks = NULL;
}
+ warn("getmultiline() failed");
ret = -1;
}
@@ -704,7 +728,8 @@
}
int compile_filter(const char *filename, FILE *initial_file,
- struct sock_fprog *prog, int use_ret_trap, int allow_logging)
+ struct sock_fprog *prog,
+ const struct filter_options *filteropts)
{
int ret = 0;
struct bpf_labels labels;
@@ -728,26 +753,48 @@
len = bpf_load_syscall_nr(load_nr);
append_filter_block(head, load_nr, len);
- /* If logging failures, allow the necessary syscalls first. */
- if (allow_logging)
+ /*
+ * On kernels without SECCOMP_RET_LOG, Minijail can attempt to write the
+ * first failing syscall to syslog(3). In order for syslog(3) to work,
+ * some syscalls need to be unconditionally allowed.
+ */
+ if (filteropts->allow_syscalls_for_logging)
allow_logging_syscalls(head);
if (compile_file(filename, initial_file, head, &arg_blocks, &labels,
- use_ret_trap, allow_logging,
- 0 /* include_level */) != 0) {
+ filteropts, 0 /* include_level */) != 0) {
warn("compile_filter: compile_file() failed");
ret = -1;
goto free_filter;
}
/*
- * If none of the syscalls match, either fall through to KILL,
- * or return TRAP.
+ * If none of the syscalls match, either fall through to LOG, TRAP, or
+ * KILL.
*/
- if (!use_ret_trap)
+ switch (filteropts->action) {
+ case ACTION_RET_KILL:
append_ret_kill(head);
- else
+ break;
+ case ACTION_RET_TRAP:
append_ret_trap(head);
+ break;
+ case ACTION_RET_LOG:
+ if (filteropts->allow_logging) {
+ append_ret_log(head);
+ } else {
+ warn("compile_filter: cannot use RET_LOG without "
+ "allowing logging");
+ ret = -1;
+ goto free_filter;
+ }
+ break;
+ default:
+ warn("compile_filter: invalid log action %d",
+ filteropts->action);
+ ret = -1;
+ goto free_filter;
+ }
/* Allocate the final buffer, now that we know its size. */
size_t final_filter_len =
diff --git a/syscall_filter.h b/syscall_filter.h
index 737ef49..019f3f0 100644
--- a/syscall_filter.h
+++ b/syscall_filter.h
@@ -29,20 +29,31 @@
size_t line_number;
};
+enum block_action { ACTION_RET_KILL = 0, ACTION_RET_TRAP, ACTION_RET_LOG };
+
+struct filter_options {
+ enum block_action action;
+ int allow_logging;
+ int allow_syscalls_for_logging;
+};
+
struct bpf_labels;
struct filter_block *compile_policy_line(struct parser_state *state, int nr,
const char *policy_line,
unsigned int label_id,
struct bpf_labels *labels,
- int do_ret_trap);
+ enum block_action action);
+
int compile_file(const char *filename, FILE *policy_file,
struct filter_block *head, struct filter_block **arg_blocks,
- struct bpf_labels *labels, int use_ret_trap, int allow_logging,
+ struct bpf_labels *labels,
+ const struct filter_options *filteropts,
unsigned int include_level);
+
int compile_filter(const char *filename, FILE *policy_file,
- struct sock_fprog *prog, int do_ret_trap,
- int add_logging_syscalls);
+ struct sock_fprog *prog,
+ const struct filter_options *filteropts);
struct filter_block *new_filter_block(void);
int flatten_block_list(struct filter_block *head, struct sock_filter *filter,
diff --git a/syscall_filter_unittest.cc b/syscall_filter_unittest.cc
index aca5f54..95b38f4 100644
--- a/syscall_filter_unittest.cc
+++ b/syscall_filter_unittest.cc
@@ -38,18 +38,23 @@
};
enum use_logging {
- NO_LOGGING = 0,
- USE_LOGGING = 1,
+ NO_LOGGING = 0,
+ USE_SIGSYS_LOGGING = 1,
+ USE_RET_LOG_LOGGING = 2,
};
int test_compile_filter(
std::string filename,
FILE* policy_file,
struct sock_fprog* prog,
- enum ret_trap do_ret_trap = USE_RET_KILL,
- enum use_logging add_logging_syscalls = NO_LOGGING) {
- return compile_filter(filename.c_str(), policy_file, prog, do_ret_trap,
- add_logging_syscalls);
+ enum block_action action = ACTION_RET_KILL,
+ enum use_logging allow_logging = NO_LOGGING) {
+ struct filter_options filteropts {
+ .action = action,
+ .allow_logging = allow_logging != NO_LOGGING,
+ .allow_syscalls_for_logging = allow_logging == USE_SIGSYS_LOGGING,
+ };
+ return compile_filter(filename.c_str(), policy_file, prog, &filteropts);
}
int test_compile_file(
@@ -58,11 +63,16 @@
struct filter_block* head,
struct filter_block** arg_blocks,
struct bpf_labels* labels,
- enum ret_trap use_ret_trap = USE_RET_KILL,
+ enum block_action action = ACTION_RET_KILL,
enum use_logging allow_logging = NO_LOGGING,
unsigned int include_level = 0) {
+ struct filter_options filteropts {
+ .action = action,
+ .allow_logging = allow_logging != NO_LOGGING,
+ .allow_syscalls_for_logging = allow_logging == USE_SIGSYS_LOGGING,
+ };
return compile_file(filename.c_str(), policy_file, head, arg_blocks, labels,
- use_ret_trap, allow_logging, include_level);
+ &filteropts, include_level);
}
struct filter_block* test_compile_policy_line(
@@ -71,9 +81,9 @@
std::string policy_line,
unsigned int label_id,
struct bpf_labels* labels,
- enum ret_trap do_ret_trap = USE_RET_KILL) {
- return compile_policy_line(state, nr, policy_line.c_str(), label_id, labels,
- do_ret_trap);
+ enum block_action action = ACTION_RET_KILL) {
+ return compile_policy_line(state, nr, policy_line.c_str(), label_id,
+ labels, action);
}
} // namespace
@@ -522,6 +532,86 @@
free_block_list(block);
}
+TEST_F(ArgFilterTest, arg0_equals_trap) {
+ std::string fragment = "arg0 == 0";
+
+ struct filter_block* block = test_compile_policy_line(
+ &state_, nr_, fragment, id_, &labels_, ACTION_RET_TRAP);
+
+ ASSERT_NE(block, nullptr);
+ size_t exp_total_len = 1 + (BPF_ARG_COMP_LEN + 1) + 2 + 1 + 2;
+ EXPECT_EQ(block->total_len, exp_total_len);
+
+ /* First block is a label. */
+ struct filter_block* curr_block = block;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_EQ(curr_block->len, 1U);
+ EXPECT_LBL(curr_block->instrs);
+
+ /* Second block is a comparison. */
+ curr_block = curr_block->next;
+ EXPECT_COMP(curr_block);
+
+ /* Third block is a jump and a label (end of AND group). */
+ curr_block = curr_block->next;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_GROUP_END(curr_block);
+
+ /* Fourth block is SECCOMP_RET_TRAP. */
+ curr_block = curr_block->next;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_TRAP(curr_block);
+
+ /* Fifth block is "SUCCESS" label and SECCOMP_RET_ALLOW. */
+ curr_block = curr_block->next;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_ALLOW(curr_block);
+
+ EXPECT_EQ(curr_block->next, nullptr);
+
+ free_block_list(block);
+}
+
+TEST_F(ArgFilterTest, arg0_equals_log) {
+ std::string fragment = "arg0 == 0";
+
+ struct filter_block* block = test_compile_policy_line(
+ &state_, nr_, fragment, id_, &labels_, ACTION_RET_LOG);
+
+ ASSERT_NE(block, nullptr);
+ size_t exp_total_len = 1 + (BPF_ARG_COMP_LEN + 1) + 2 + 1 + 2;
+ EXPECT_EQ(block->total_len, exp_total_len);
+
+ /* First block is a label. */
+ struct filter_block* curr_block = block;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_EQ(curr_block->len, 1U);
+ EXPECT_LBL(curr_block->instrs);
+
+ /* Second block is a comparison. */
+ curr_block = curr_block->next;
+ EXPECT_COMP(curr_block);
+
+ /* Third block is a jump and a label (end of AND group). */
+ curr_block = curr_block->next;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_GROUP_END(curr_block);
+
+ /* Fourth block is SECCOMP_RET_LOG. */
+ curr_block = curr_block->next;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_LOG(curr_block);
+
+ /* Fifth block is "SUCCESS" label and SECCOMP_RET_ALLOW. */
+ curr_block = curr_block->next;
+ ASSERT_NE(curr_block, nullptr);
+ EXPECT_ALLOW(curr_block);
+
+ EXPECT_EQ(curr_block->next, nullptr);
+
+ free_block_list(block);
+}
+
TEST_F(ArgFilterTest, arg0_short_gt_ge_comparisons) {
for (std::string fragment :
{"arg1 < 0xff", "arg1 <= 0xff", "arg1 > 0xff", "arg1 >= 0xff"}) {
@@ -928,7 +1018,7 @@
struct filter_block* block =
test_compile_policy_line(&state_, nr_, fragment, id_, &labels_,
- USE_RET_TRAP);
+ ACTION_RET_TRAP);
ASSERT_NE(block, nullptr);
size_t exp_total_len = 1 + (BPF_ARG_COMP_LEN + 1) + 2 + 1 + 2;
@@ -1012,7 +1102,7 @@
struct filter_block* block =
test_compile_policy_line(&state_, nr_, fragment, id_, &labels_,
- USE_RET_TRAP);
+ ACTION_RET_TRAP);
ASSERT_NE(block, nullptr);
size_t exp_total_len = 1 + (BPF_ARG_COMP_LEN + 1) + 2 + 1 + 2;
EXPECT_EQ(block->total_len, exp_total_len);
@@ -1264,7 +1354,7 @@
TEST_F(FileTest, multiline) {
std::string policy =
"read:\\\n1\n"
- "openat:arg0 in\\\n5";
+ "openat:arg0 \\\nin\\\n \\\n5";
const int LABEL_ID = 0;
@@ -1344,7 +1434,8 @@
FILE* policy_file = write_policy_to_pipe(policy);
ASSERT_NE(policy_file, nullptr);
- int res = test_compile_filter("policy", policy_file, &actual, USE_RET_TRAP);
+ int res =
+ test_compile_filter("policy", policy_file, &actual, ACTION_RET_TRAP);
fclose(policy_file);
/*
@@ -1371,6 +1462,66 @@
free(actual.filter);
}
+TEST(FilterTest, seccomp_mode1_log) {
+ struct sock_fprog actual;
+ std::string policy =
+ "read: 1\n"
+ "write: 1\n"
+ "rt_sigreturn: 1\n"
+ "exit: 1\n";
+
+ FILE* policy_file = write_policy_to_pipe(policy);
+ ASSERT_NE(policy_file, nullptr);
+
+ int res = test_compile_filter("policy", policy_file, &actual, ACTION_RET_LOG,
+ USE_RET_LOG_LOGGING);
+ fclose(policy_file);
+
+ /*
+ * Checks return value, filter length, and that the filter
+ * validates arch, loads syscall number, and
+ * only allows expected syscalls.
+ */
+ ASSERT_EQ(res, 0);
+ EXPECT_EQ(actual.len, 13);
+ EXPECT_ARCH_VALIDATION(actual.filter);
+ EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN,
+ BPF_LD+BPF_W+BPF_ABS, syscall_nr);
+ EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 1,
+ __NR_read);
+ EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 3,
+ __NR_write);
+ EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 5,
+ __NR_rt_sigreturn);
+ EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 7,
+ __NR_exit);
+ EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN + 9, BPF_RET+BPF_K,
+ SECCOMP_RET_LOG);
+
+ free(actual.filter);
+}
+
+TEST(FilterTest, seccomp_mode1_log_fails) {
+ struct sock_fprog actual;
+ std::string policy =
+ "read: 1\n"
+ "write: 1\n"
+ "rt_sigreturn: 1\n"
+ "exit: 1\n";
+
+ FILE* policy_file = write_policy_to_pipe(policy);
+ ASSERT_NE(policy_file, nullptr);
+
+ int res = test_compile_filter("policy", policy_file, &actual, ACTION_RET_LOG,
+ NO_LOGGING);
+ fclose(policy_file);
+
+ /*
+ * ACTION_RET_LOG should never be used without allowing logging.
+ */
+ ASSERT_EQ(res, -1);
+}
+
TEST(FilterTest, seccomp_read_write) {
struct sock_fprog actual;
std::string policy =
@@ -1511,8 +1662,8 @@
FILE* policy_file = write_policy_to_pipe(policy);
ASSERT_NE(policy_file, nullptr);
- int res = test_compile_filter("policy", policy_file, &actual, USE_RET_TRAP,
- USE_LOGGING);
+ int res = test_compile_filter("policy", policy_file, &actual, ACTION_RET_TRAP,
+ USE_SIGSYS_LOGGING);
fclose(policy_file);
size_t i;
@@ -1558,8 +1709,8 @@
FILE* policy_file = write_policy_to_pipe(policy);
ASSERT_NE(policy_file, nullptr);
- int res = test_compile_filter("policy", policy_file, &actual, USE_RET_KILL,
- USE_LOGGING);
+ int res = test_compile_filter("policy", policy_file, &actual, ACTION_RET_KILL,
+ USE_SIGSYS_LOGGING);
fclose(policy_file);
size_t i;
@@ -1567,7 +1718,7 @@
/*
* Checks return value, filter length, and that the filter
* validates arch, loads syscall number, only allows expected syscalls,
- * and returns TRAP on failure.
+ * and kills on failure.
* NOTE(jorgelo): the filter is longer since we add the syscalls needed
* for logging.
*/
@@ -1712,7 +1863,7 @@
FILE* file_plain = write_policy_to_pipe(policy_plain);
ASSERT_NE(file_plain, nullptr);
int res_plain = test_compile_filter("policy", file_plain, &compiled_plain,
- USE_RET_KILL);
+ ACTION_RET_KILL);
fclose(file_plain);
std::string policy_with_include =
@@ -1720,9 +1871,8 @@
FILE* file_with_include = write_policy_to_pipe(policy_with_include);
ASSERT_NE(file_with_include, nullptr);
- int res_with_include =
- test_compile_filter("policy", file_with_include, &compiled_with_include,
- USE_RET_KILL);
+ int res_with_include = test_compile_filter(
+ "policy", file_with_include, &compiled_with_include, ACTION_RET_KILL);
fclose(file_with_include);
/*
diff --git a/syscall_filter_unittest_macros.h b/syscall_filter_unittest_macros.h
index 4923fc4..3972cb4 100644
--- a/syscall_filter_unittest_macros.h
+++ b/syscall_filter_unittest_macros.h
@@ -3,6 +3,11 @@
* found in the LICENSE file.
*/
+#ifndef SYSCALL_FILTER_UNITTEST_MACROS_H
+#define SYSCALL_FILTER_UNITTEST_MACROS_H
+
+#include "bpf.h"
+
/* BPF testing macros. */
#define EXPECT_EQ_BLOCK(_block, _code, _k, _jt, _jf) \
do { \
@@ -76,6 +81,13 @@
BPF_RET+BPF_K, SECCOMP_RET_TRAP); \
} while (0)
+#define EXPECT_LOG(_block) \
+do { \
+ EXPECT_EQ((_block)->len, 1U); \
+ EXPECT_EQ_STMT((_block)->instrs, \
+ BPF_RET+BPF_K, SECCOMP_RET_LOG); \
+} while (0)
+
#define EXPECT_ALLOW(_block) \
do { \
EXPECT_EQ((_block)->len, 2U); \
@@ -107,3 +119,5 @@
EXPECT_EQ_BLOCK(&(_filter)[1], \
BPF_JMP+BPF_JA, (_id), (_jt), (_jf)); \
} while (0)
+
+#endif // SYSCALL_FILTER_UNITTEST_MACROS_H
diff --git a/system.c b/system.c
index eb37fdd..2e5c232 100644
--- a/system.c
+++ b/system.c
@@ -3,7 +3,6 @@
* found in the LICENSE file.
*/
-#define _GNU_SOURCE
#include "system.h"
#include <errno.h>
@@ -13,9 +12,9 @@
#include <pwd.h>
#include <stdbool.h>
#include <stdio.h>
-#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
@@ -26,6 +25,35 @@
#include "util.h"
+/* Old libc versions might not define all constants that we need. */
+#ifndef ST_RDONLY
+#define ST_RDONLY 0x0001
+#endif
+#ifndef ST_NOSUID
+#define ST_NOSUID 0x0002
+#endif
+#ifndef ST_NODEV
+#define ST_NODEV 0x0004
+#endif
+#ifndef ST_NOEXEC
+#define ST_NOEXEC 0x0008
+#endif
+#ifndef ST_SYNCHRONOUS
+#define ST_SYNCHRONOUS 0x0010
+#endif
+#ifndef ST_MANDLOCK
+#define ST_MANDLOCK 0x0040
+#endif
+#ifndef ST_NOATIME
+#define ST_NOATIME 0x0400
+#endif
+#ifndef ST_NODIRATIME
+#define ST_NODIRATIME 0x0800
+#endif
+#ifndef ST_RELATIME
+#define ST_RELATIME 0x1000
+#endif
+
/*
* SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the
* definition if the securebits header doesn't provide it.
@@ -236,56 +264,23 @@
int write_pid_to_path(pid_t pid, const char *path)
{
- char *temp_path;
- int ret;
+ FILE *fp = fopen(path, "we");
- if (asprintf(&temp_path, "%s.XXXXXX", path) < 0) {
- warn("asprintf(temp_path) failed");
- return -ENOMEM;
- }
- int fd = mkstemp(temp_path);
- if (fd < 0) {
- pwarn("mkstemp(%s) failed", temp_path);
- ret = -errno;
- goto done;
- }
-
- FILE *fp = fdopen(fd, "we");
if (!fp) {
- pwarn("fdopen(fd, \"we\") failed");
- ret = -errno;
- close(fd);
- goto unlink_temp;
+ pwarn("failed to open '%s'", path);
+ return -errno;
}
if (fprintf(fp, "%d\n", (int)pid) < 0) {
/* fprintf(3) does not set errno on failure. */
warn("fprintf(%s) failed", path);
- ret = -1;
- fclose(fp);
- goto unlink_temp;
+ return -1;
}
if (fclose(fp)) {
pwarn("fclose(%s) failed", path);
- ret = -errno;
- /* Best effort. */
- close(fd);
- goto unlink_temp;
+ return -errno;
}
- if (rename(temp_path, path) < 0) {
- pwarn("rename(%s, %s) failed", temp_path, path);
- ret = -errno;
- goto unlink_temp;
- }
- ret = 0;
- goto done;
-
-unlink_temp:
- /* Best effort. */
- unlink(temp_path);
-done:
- free(temp_path);
- return ret;
+ return 0;
}
/*
@@ -331,6 +326,28 @@
}
/*
+ * get_mount_flags_for_directory: Returns the mount flags for the given
+ * directory.
+ */
+int get_mount_flags_for_directory(const char *path, unsigned long *mnt_flags)
+{
+ int rc;
+ struct statvfs stvfs_buf;
+
+ if (!mnt_flags)
+ return 0;
+
+ rc = statvfs(path, &stvfs_buf);
+ if (rc) {
+ rc = errno;
+ pwarn("failed to look up mount flags: source=%s", path);
+ return -rc;
+ }
+ *mnt_flags = vfs_flags_to_mount_flags(stvfs_buf.f_flag);
+ return 0;
+}
+
+/*
* setup_mount_destination: Ensures the mount target exists.
* Creates it if needed and possible.
*/
@@ -343,7 +360,7 @@
rc = stat(dest, &st_buf);
if (rc == 0) /* destination exists */
- return 0;
+ return get_mount_flags_for_directory(source, mnt_flags);
/*
* Try to create the destination.
@@ -377,19 +394,6 @@
(!bind && (S_ISBLK(st_buf.st_mode) ||
S_ISCHR(st_buf.st_mode)));
- /* If bind mounting, also grab the mount flags of the source. */
- if (bind && mnt_flags) {
- struct statvfs stvfs_buf;
- rc = statvfs(source, &stvfs_buf);
- if (rc) {
- rc = errno;
- pwarn(
- "failed to look up mount flags: source=%s",
- source);
- return -rc;
- }
- *mnt_flags = stvfs_buf.f_flag;
- }
} else {
/* The source is a relative path -- assume it's a pseudo fs. */
@@ -427,7 +431,35 @@
pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
return -rc;
}
- return 0;
+ return get_mount_flags_for_directory(source, mnt_flags);
+}
+
+/*
+ * vfs_flags_to_mount_flags: Converts the given flags returned by statvfs to
+ * flags that can be used by mount().
+ */
+unsigned long vfs_flags_to_mount_flags(unsigned long vfs_flags)
+{
+ unsigned int i;
+ unsigned long mount_flags = 0;
+
+ static struct {
+ unsigned long mount_flag;
+ unsigned long vfs_flag;
+ } const flag_translation_table[] = {
+ {MS_NOSUID, ST_NOSUID}, {MS_NODEV, ST_NODEV},
+ {MS_NOEXEC, ST_NOEXEC}, {MS_SYNCHRONOUS, ST_SYNCHRONOUS},
+ {MS_MANDLOCK, ST_MANDLOCK}, {MS_NOATIME, ST_NOATIME},
+ {MS_NODIRATIME, ST_NODIRATIME}, {MS_RELATIME, ST_RELATIME},
+ };
+
+ for (i = 0; i < ARRAY_SIZE(flag_translation_table); i++) {
+ if (vfs_flags & flag_translation_table[i].vfs_flag) {
+ mount_flags |= flag_translation_table[i].mount_flag;
+ }
+ }
+
+ return mount_flags;
}
/*
@@ -499,3 +531,61 @@
*gid = pgr->gr_gid;
return 0;
}
+
+static int seccomp_action_is_available(const char *wanted)
+{
+ if (is_android()) {
+ /*
+ * Accessing |actions_avail| is generating SELinux denials, so
+ * skip for now.
+ * TODO(crbug.com/978022, jorgelo): Remove once the denial is
+ * fixed.
+ */
+ return 0;
+ }
+ const char actions_avail_path[] =
+ "/proc/sys/kernel/seccomp/actions_avail";
+ FILE *f = fopen(actions_avail_path, "re");
+
+ if (!f) {
+ pwarn("fopen(%s) failed", actions_avail_path);
+ return 0;
+ }
+
+ char *actions_avail = NULL;
+ size_t buf_size = 0;
+ if (getline(&actions_avail, &buf_size, f) < 0) {
+ pwarn("getline() failed");
+ free(actions_avail);
+ return 0;
+ }
+
+ /*
+ * This is just substring search, which means that partial matches will
+ * match too (e.g. "action" would match "longaction"). There are no
+ * seccomp actions which include other actions though, so we're good for
+ * now. Eventually we might want to split the string by spaces.
+ */
+ return strstr(actions_avail, wanted) != NULL;
+}
+
+int seccomp_ret_log_available(void)
+{
+ static int ret_log_available = -1;
+
+ if (ret_log_available == -1)
+ ret_log_available = seccomp_action_is_available("log");
+
+ return ret_log_available;
+}
+
+int seccomp_ret_kill_process_available(void)
+{
+ static int ret_kill_process_available = -1;
+
+ if (ret_kill_process_available == -1)
+ ret_kill_process_available =
+ seccomp_action_is_available("kill_process");
+
+ return ret_kill_process_available;
+}
diff --git a/system.h b/system.h
index cd1a98a..f332952 100644
--- a/system.h
+++ b/system.h
@@ -54,12 +54,19 @@
int mkdir_p(const char *path, mode_t mode, bool isdir);
+int get_mount_flags_for_directory(const char *path, unsigned long *mnt_flags);
+
int setup_mount_destination(const char *source, const char *dest, uid_t uid,
uid_t gid, bool bind, unsigned long *mnt_flags);
+unsigned long vfs_flags_to_mount_flags(unsigned long vfs_flags);
+
int lookup_user(const char *user, uid_t *uid, gid_t *gid);
int lookup_group(const char *group, gid_t *gid);
+int seccomp_ret_log_available(void);
+int seccomp_ret_kill_process_available(void);
+
#ifdef __cplusplus
}; /* extern "C" */
#endif
diff --git a/system_unittest.cc b/system_unittest.cc
index dea3556..07a343c 100644
--- a/system_unittest.cc
+++ b/system_unittest.cc
@@ -203,7 +203,7 @@
TemporaryFile tmp;
ASSERT_TRUE(tmp.is_valid());
- ASSERT_EQ(0, write_pid_to_path(1234, tmp.path.c_str()));
+ EXPECT_EQ(0, write_pid_to_path(1234, tmp.path.c_str()));
FILE *fp = fopen(tmp.path.c_str(), "re");
EXPECT_NE(nullptr, fp);
char data[6] = {};
@@ -273,7 +273,7 @@
std::string proc = dir.path + "/proc";
EXPECT_EQ(0, setup_mount_destination("/proc", proc.c_str(), -1, -1, true,
&mount_flags));
- EXPECT_EQ(stvfs_buf.f_flag, mount_flags);
+ EXPECT_EQ(vfs_flags_to_mount_flags(stvfs_buf.f_flag), mount_flags);
EXPECT_EQ(0, rmdir(proc.c_str()));
// Same thing holds for children of a mount.
@@ -281,7 +281,7 @@
std::string proc_self = dir.path + "/proc_self";
EXPECT_EQ(0, setup_mount_destination("/proc/self", proc_self.c_str(), -1, -1,
true, &mount_flags));
- EXPECT_EQ(stvfs_buf.f_flag, mount_flags);
+ EXPECT_EQ(vfs_flags_to_mount_flags(stvfs_buf.f_flag), mount_flags);
EXPECT_EQ(0, rmdir(proc_self.c_str()));
}
@@ -362,3 +362,8 @@
// We check it's a directory by deleting it as such.
EXPECT_EQ(0, rmdir(child_dev.c_str()));
}
+
+TEST(seccomp_actions_available, smoke) {
+ seccomp_ret_log_available();
+ seccomp_ret_kill_process_available();
+}
diff --git a/util.h b/util.h
index 02e7c23..a71fd29 100644
--- a/util.h
+++ b/util.h
@@ -134,6 +134,14 @@
return compiled_with_asan() || &__asan_init != 0 || &__hwasan_init != 0;
}
+static inline bool debug_logging_allowed(void) {
+#if defined(ALLOW_DEBUG_LOGGING)
+ return true;
+#else
+ return false;
+#endif
+}
+
int lookup_syscall(const char *name);
const char *lookup_syscall_name(int nr);