| // Copyright 2023 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Implementation file for the sandbox2::MonitorBase class. |
| |
| #include "sandboxed_api/sandbox2/monitor_base.h" |
| |
| #include <pthread.h> |
| #include <sched.h> |
| #include <signal.h> |
| #include <sys/resource.h> |
| #include <syscall.h> |
| |
| #include <cerrno> |
| #include <cstdint> |
| #include <cstdio> |
| #include <iomanip> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/cleanup/cleanup.h" |
| #include "absl/flags/declare.h" |
| #include "absl/flags/flag.h" |
| #include "absl/log/check.h" |
| #include "absl/log/log.h" |
| #include "absl/log/vlog_is_on.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/match.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/synchronization/notification.h" |
| #include "absl/time/time.h" |
| #include "sandboxed_api/sandbox2/client.h" |
| #include "sandboxed_api/sandbox2/comms.h" |
| #include "sandboxed_api/sandbox2/executor.h" |
| #include "sandboxed_api/sandbox2/forkserver.pb.h" |
| #include "sandboxed_api/sandbox2/limits.h" |
| #include "sandboxed_api/sandbox2/mounts.h" |
| #include "sandboxed_api/sandbox2/namespace.h" |
| #include "sandboxed_api/sandbox2/network_proxy/client.h" |
| #include "sandboxed_api/sandbox2/network_proxy/server.h" |
| #include "sandboxed_api/sandbox2/notify.h" |
| #include "sandboxed_api/sandbox2/policy.h" |
| #include "sandboxed_api/sandbox2/result.h" |
| #include "sandboxed_api/sandbox2/stack_trace.h" |
| #include "sandboxed_api/sandbox2/syscall.h" |
| #include "sandboxed_api/sandbox2/util.h" |
| #include "sandboxed_api/util/file_helpers.h" |
| #include "sandboxed_api/util/strerror.h" |
| #include "sandboxed_api/util/temp_file.h" |
| #include "sandboxed_api/util/thread.h" |
| |
| ABSL_FLAG(bool, sandbox2_report_on_sandboxee_signal, true, |
| "Report sandbox2 sandboxee deaths caused by signals"); |
| |
| ABSL_FLAG(bool, sandbox2_report_on_sandboxee_timeout, true, |
| "Report sandbox2 sandboxee timeouts"); |
| |
| ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all); |
| ABSL_DECLARE_FLAG(std::string, sandbox2_danger_danger_permit_all_and_log); |
| |
| namespace sandbox2 { |
| namespace { |
| |
| void MaybeEnableTomoyoLsmWorkaround(Mounts& mounts, std::string& comms_fd_dev) { |
| static auto tomoyo_active = []() -> bool { |
| std::string lsm_list; |
| if (auto status = sapi::file::GetContents( |
| "/sys/kernel/security/lsm", &lsm_list, sapi::file::Defaults()); |
| !status.ok() && !absl::IsNotFound(status)) { |
| VLOG(1) << "Checking active LSMs failed: " << status.message() << ": " |
| << sapi::StrError(errno); |
| return false; |
| } |
| return absl::StrContains(lsm_list, "tomoyo"); |
| }(); |
| |
| if (!tomoyo_active) { |
| return; |
| } |
| VLOG(1) << "Tomoyo LSM active, enabling workaround"; |
| |
| if (mounts.ResolvePath("/dev").ok() || mounts.ResolvePath("/dev/fd").ok()) { |
| // Avoid shadowing /dev/fd/1022 below if /dev or /dev/fd is already mapped. |
| VLOG(1) << "Parent dir already mapped, skipping"; |
| return; |
| } |
| |
| auto temp_file = sapi::CreateNamedTempFileAndClose("/tmp/"); |
| if (!temp_file.ok()) { |
| LOG(WARNING) << "Failed to create empty temp file: " << temp_file.status(); |
| return; |
| } |
| comms_fd_dev = std::move(*temp_file); |
| |
| // Ignore errors here, as the file itself might already be mapped. |
| if (auto status = mounts.AddFileAt( |
| comms_fd_dev, absl::StrCat("/dev/fd/", Comms::kSandbox2TargetExecFD), |
| false); |
| !status.ok()) { |
| VLOG(1) << "Mapping comms FD: %s" << status.message(); |
| } |
| } |
| |
| void LogContainer(const std::vector<std::string>& container) { |
| for (size_t i = 0; i < container.size(); ++i) { |
| LOG(INFO) << "[" << std::setfill('0') << std::setw(4) << i |
| << "]=" << container[i]; |
| } |
| } |
| |
| } // namespace |
| |
| MonitorBase::MonitorBase(Executor* executor, Policy* policy, Notify* notify) |
| : executor_(executor), |
| policy_(policy), |
| notify_(notify), |
| // NOLINTNEXTLINE clang-diagnostic-deprecated-declarations |
| comms_(executor_->ipc()->comms()), |
| ipc_(executor_->ipc()), |
| uses_custom_forkserver_(executor_->fork_client_ != nullptr) { |
| wait_for_execveat_ = executor->enable_sandboxing_pre_execve_; |
| // It's a pre-connected Comms channel, no need to accept new connection. |
| CHECK(comms_->IsConnected()); |
| std::string path = |
| absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log); |
| if (!path.empty()) { |
| log_file_ = std::fopen(path.c_str(), "a+"); |
| PCHECK(log_file_ != nullptr) << "Failed to open log file '" << path << "'"; |
| } |
| |
| if (auto& ns = policy_->namespace_; ns) { |
| // Check for the Tomoyo LSM, which is active by default in several common |
| // distribution kernels (esp. Debian). |
| MaybeEnableTomoyoLsmWorkaround(ns->mounts(), comms_fd_dev_); |
| } |
| } |
| |
| MonitorBase::~MonitorBase() { |
| if (!comms_fd_dev_.empty()) { |
| std::remove(comms_fd_dev_.c_str()); |
| } |
| if (log_file_) { |
| std::fclose(log_file_); |
| } |
| if (network_proxy_thread_.IsJoinable()) { |
| network_proxy_thread_.Join(); |
| } |
| } |
| |
| void MonitorBase::OnDone() { |
| if (done_notification_.HasBeenNotified()) { |
| return; |
| } |
| |
| notify_->EventFinished(result_); |
| ipc_->InternalCleanupFdMap(); |
| done_notification_.Notify(); |
| } |
| |
| void MonitorBase::Launch() { |
| |
| absl::Cleanup process_cleanup = [this] { |
| if (process_.init_pid > 0) { |
| kill(process_.init_pid, SIGKILL); |
| } else if (process_.main_pid > 0) { |
| kill(process_.main_pid, SIGKILL); |
| } |
| }; |
| absl::Cleanup monitor_done = [this] { OnDone(); }; |
| |
| const Namespace* ns = policy_->GetNamespaceOrNull(); |
| if (VLOG_IS_ON(1) && ns != nullptr) { |
| std::vector<std::string> outside_entries; |
| std::vector<std::string> inside_entries; |
| ns->mounts().RecursivelyListMounts( |
| /*outside_entries=*/&outside_entries, |
| /*inside_entries=*/&inside_entries); |
| VLOG(1) << "Outside entries mapped to chroot:"; |
| LogContainer(outside_entries); |
| VLOG(1) << "Inside entries as they appear in chroot:"; |
| LogContainer(inside_entries); |
| } |
| |
| // Don't trace the child: it will allow to use 'strace -f' with the whole |
| // sandbox master/monitor, which ptrace_attach'es to the child. |
| int clone_flags = CLONE_UNTRACED; |
| |
| if (policy_->allowed_hosts_) { |
| EnableNetworkProxyServer(); |
| } |
| |
| // Get PID of the sandboxee. |
| bool should_have_init = ns && (ns->clone_flags() & CLONE_NEWPID); |
| absl::StatusOr<SandboxeeProcess> process = executor_->StartSubProcess( |
| clone_flags, ns, policy_->allow_speculation_, type_); |
| |
| if (!process.ok()) { |
| LOG(ERROR) << "Starting sandboxed subprocess failed: " << process.status(); |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS); |
| return; |
| } |
| |
| process_ = *std::move(process); |
| |
| if (process_.main_pid <= 0 || (should_have_init && process_.init_pid <= 0)) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS); |
| return; |
| } |
| |
| if (!notify_->EventStarted(process_.main_pid, comms_)) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_NOTIFY); |
| return; |
| } |
| if (!InitSendIPC()) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_IPC); |
| return; |
| } |
| if (!InitSendCwd()) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_CWD); |
| return; |
| } |
| if (!InitSendPolicy()) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_POLICY); |
| return; |
| } |
| if (!WaitForSandboxReady()) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_WAIT); |
| return; |
| } |
| if (!InitApplyLimits()) { |
| SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_LIMITS); |
| return; |
| } |
| std::move(process_cleanup).Cancel(); |
| |
| RunInternal(); |
| std::move(monitor_done).Cancel(); |
| } |
| |
| absl::StatusOr<Result> MonitorBase::AwaitResultWithTimeout( |
| absl::Duration timeout) { |
| auto done = done_notification_.WaitForNotificationWithTimeout(timeout); |
| if (!done) { |
| return absl::DeadlineExceededError("Sandbox did not finish within timeout"); |
| } |
| |
| Join(); |
| return result_; |
| } |
| |
| void MonitorBase::SetExitStatusCode(Result::StatusEnum final_status, |
| uintptr_t reason_code) { |
| CHECK(result_.final_status() == Result::UNSET); |
| result_.SetExitStatusCode(final_status, reason_code); |
| } |
| |
| absl::Status MonitorBase::SendPolicy(const std::vector<sock_filter>& policy) { |
| if (!comms_->SendBytes(reinterpret_cast<const uint8_t*>(policy.data()), |
| policy.size() * sizeof(sock_filter))) { |
| return absl::InternalError("Error while sending policy via comms"); |
| } |
| return absl::OkStatus(); |
| } |
| |
| bool MonitorBase::InitSendPolicy() { |
| bool user_notif = type_ == FORKSERVER_MONITOR_UNOTIFY; |
| auto policy = |
| policy_->GetPolicy(user_notif, executor_->enable_sandboxing_pre_execve_); |
| absl::Status status = SendPolicy(std::move(policy)); |
| if (!status.ok()) { |
| LOG(ERROR) << "Couldn't send policy: " << status; |
| return false; |
| } |
| return true; |
| } |
| |
| bool MonitorBase::InitSendCwd() { |
| if (!comms_->SendString(executor_->cwd_)) { |
| PLOG(ERROR) << "Couldn't send cwd"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool MonitorBase::InitApplyLimit(pid_t pid, int resource, |
| const rlimit64& rlim) const { |
| using RlimitResource = __rlimit_resource; |
| |
| rlimit64 curr_limit; |
| if (prlimit64(pid, static_cast<RlimitResource>(resource), nullptr, |
| &curr_limit) == -1) { |
| PLOG(ERROR) << "prlimit64(" << pid << ", " << util::GetRlimitName(resource) |
| << ")"; |
| } else if (rlim.rlim_cur > curr_limit.rlim_max) { |
| // In such case, don't update the limits, as it will fail. Just stick to the |
| // current ones (which are already lower than intended). |
| LOG(ERROR) << util::GetRlimitName(resource) |
| << ": new.current > current.max (" << rlim.rlim_cur << " > " |
| << curr_limit.rlim_max << "), skipping"; |
| return true; |
| } |
| |
| if (prlimit64(pid, static_cast<RlimitResource>(resource), &rlim, nullptr) == |
| -1) { |
| PLOG(ERROR) << "prlimit64(" << pid << ", " << util::GetRlimitName(resource) |
| << ", " << rlim.rlim_cur << ")"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool MonitorBase::InitApplyLimits() { |
| Limits* limits = executor_->limits(); |
| return InitApplyLimit(process_.main_pid, RLIMIT_AS, limits->rlimit_as()) && |
| InitApplyLimit(process_.main_pid, RLIMIT_CPU, limits->rlimit_cpu()) && |
| InitApplyLimit(process_.main_pid, RLIMIT_FSIZE, |
| limits->rlimit_fsize()) && |
| InitApplyLimit(process_.main_pid, RLIMIT_NOFILE, |
| limits->rlimit_nofile()) && |
| InitApplyLimit(process_.main_pid, RLIMIT_CORE, limits->rlimit_core()); |
| } |
| |
| bool MonitorBase::InitSendIPC() { return ipc_->SendFdsOverComms(); } |
| |
| bool MonitorBase::WaitForSandboxReady() { |
| uint32_t message; |
| if (!comms_->RecvUint32(&message)) { |
| LOG(ERROR) << "Couldn't receive 'Client::kClient2SandboxReady' message"; |
| return false; |
| } |
| if (message != Client::kClient2SandboxReady) { |
| LOG(ERROR) << "Received " << message << " != Client::kClient2SandboxReady (" |
| << Client::kClient2SandboxReady << ")"; |
| return false; |
| } |
| return true; |
| } |
| |
| void MonitorBase::LogSyscallViolation(const Syscall& syscall) const { |
| // Do not unwind libunwind. |
| if (executor_->libunwind_sbox_for_pid_ != 0) { |
| LOG(ERROR) << "Sandbox violation during execution of libunwind: " |
| << syscall.GetDescription(); |
| return; |
| } |
| |
| // So, this is an invalid syscall. Will be killed by seccomp-bpf policies as |
| // well, but we should be on a safe side here as well. |
| LOG(ERROR) << "SANDBOX VIOLATION : PID: " << syscall.pid() << ", PROG: '" |
| << util::GetProgName(syscall.pid()) |
| << "' : " << syscall.GetDescription(); |
| if (VLOG_IS_ON(1)) { |
| VLOG(1) << "Cmdline: " << util::GetCmdLine(syscall.pid()); |
| VLOG(1) << "Task Name: " << util::GetProcStatusLine(syscall.pid(), "Name"); |
| VLOG(1) << "Tgid: " << util::GetProcStatusLine(syscall.pid(), "Tgid"); |
| } |
| |
| LogSyscallViolationExplanation(syscall); |
| } |
| |
| void MonitorBase::LogSyscallViolationExplanation(const Syscall& syscall) const { |
| const uintptr_t syscall_nr = syscall.nr(); |
| const uintptr_t arg0 = syscall.args()[0]; |
| |
| // This follows policy in Policy::GetDefaultPolicy - keep it in sync. |
| if (syscall.arch() != Syscall::GetHostArch()) { |
| LOG(ERROR) |
| << "This is a violation because the syscall was issued because the" |
| << " sandboxee and executor architectures are different."; |
| return; |
| } |
| if (syscall_nr == __NR_ptrace) { |
| LOG(ERROR) |
| << "This is a violation because the ptrace syscall would be unsafe in" |
| << " sandbox2, so it has been blocked."; |
| return; |
| } |
| if (syscall_nr == __NR_bpf) { |
| LOG(ERROR) |
| << "This is a violation because the bpf syscall would be risky in" |
| << " a sandbox, so it has been blocked."; |
| return; |
| } |
| if (syscall_nr == __NR_clone && ((arg0 & CLONE_UNTRACED) != 0)) { |
| LOG(ERROR) << "This is a violation because calling clone with CLONE_UNTRACE" |
| << " would be unsafe in sandbox2, so it has been blocked."; |
| return; |
| } |
| } |
| |
| bool MonitorBase::StackTraceCollectionPossible() const { |
| // Only get the stacktrace if we are not in the libunwind sandbox (avoid |
| // recursion). |
| if (executor_->libunwind_recursion_depth() <= 1) { |
| return true; |
| } |
| LOG(ERROR) << "Cannot collect stack trace. Unwind pid " |
| << executor_->libunwind_sbox_for_pid_ << ", namespace " |
| << policy_->GetNamespaceOrNull(); |
| return false; |
| } |
| |
| void MonitorBase::EnableNetworkProxyServer() { |
| int fd = ipc_->ReceiveFd(NetworkProxyClient::kFDName); |
| |
| network_proxy_server_ = std::make_unique<NetworkProxyServer>( |
| fd, &policy_->allowed_hosts_.value(), |
| [this] { NotifyNetworkViolation(); }); |
| |
| network_proxy_thread_ = |
| sapi::Thread(network_proxy_server_.get(), &NetworkProxyServer::Run, |
| "NetworkProxyServer"); |
| } |
| |
| bool MonitorBase::ShouldCollectStackTrace(Result::StatusEnum status) const { |
| if (!StackTraceCollectionPossible()) { |
| return false; |
| } |
| switch (status) { |
| case Result::EXTERNAL_KILL: |
| return policy_->collect_stacktrace_on_kill_; |
| case Result::TIMEOUT: |
| return policy_->collect_stacktrace_on_timeout_; |
| case Result::SIGNALED: |
| return policy_->collect_stacktrace_on_signal_; |
| case Result::VIOLATION: |
| return policy_->collect_stacktrace_on_violation_; |
| case Result::OK: |
| return policy_->collect_stacktrace_on_exit_; |
| default: |
| return false; |
| } |
| } |
| |
| absl::StatusOr<std::vector<std::string>> MonitorBase::GetStackTrace( |
| const Regs* regs) { |
| return sandbox2::GetStackTrace(regs, policy_->GetNamespaceOrNull(), |
| uses_custom_forkserver_, |
| executor_->libunwind_recursion_depth() + 1); |
| } |
| |
| absl::StatusOr<std::vector<std::string>> MonitorBase::GetAndLogStackTrace( |
| const Regs* regs) { |
| auto stack_trace = GetStackTrace(regs); |
| if (!stack_trace.ok()) { |
| return stack_trace.status(); |
| } |
| |
| LOG(INFO) << "Stack trace: ["; |
| for (const auto& frame : CompactStackTrace(*stack_trace)) { |
| LOG(INFO) << " " << frame; |
| } |
| LOG(INFO) << "]"; |
| |
| return stack_trace; |
| } |
| } // namespace sandbox2 |