More robust handling of iptables-restore process termination
Bug: 32323979
Test: unit tests pass
Test: bullhead builds and boots
Change-Id: Ib3ea4221b1b2025a0a236f2607db29e1cd30ffa9
diff --git a/server/Controllers.cpp b/server/Controllers.cpp
index 1a618c1..0584d09 100644
--- a/server/Controllers.cpp
+++ b/server/Controllers.cpp
@@ -129,7 +129,6 @@
Controllers::Controllers() : clatdCtrl(&netCtrl) {
InterfaceController::initializeAll();
- IptablesRestoreController::installSignalHandler(&iptablesRestoreCtrl);
}
void Controllers::initIptablesRules() {
diff --git a/server/IptablesRestoreController.cpp b/server/IptablesRestoreController.cpp
index bedb0a4..45f9ab6 100644
--- a/server/IptablesRestoreController.cpp
+++ b/server/IptablesRestoreController.cpp
@@ -21,6 +21,7 @@
#include <sys/wait.h>
#include <unistd.h>
+#define LOG_TAG "IptablesRestoreController"
#include <android-base/logging.h>
#include <android-base/file.h>
@@ -55,13 +56,55 @@
close(pollFds[STDERR_IDX].fd);
}
+ bool outputReady() {
+ struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT };
+ int ret = poll(&pollfd, 1, 0);
+ if (ret == -1) {
+ ALOGE("outputReady poll failed: %s", strerror(errno));
+ return false;
+ }
+ return (ret == 1) && !(pollfd.revents & POLLERR);
+ }
+
+ void stop() {
+ if (processTerminated) return;
+
+ // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the
+ // process was killed by something else on the system). In both cases, it's safe to send the
+ // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls
+ // waitpid on it, so there's no risk that the PID is reused.
+ int err = kill(pid, SIGTERM);
+ if (err) {
+ err = errno;
+ }
+
+ if (err == ESRCH) {
+ // This means that someone else inside netd but outside this class called waitpid(),
+ // which is a programming error. There's no point in calling waitpid() here since we
+ // know that the process is gone.
+ ALOGE("iptables child process %d unexpectedly disappeared", pid);
+ processTerminated = true;
+ return;
+ }
+
+ if (err) {
+ ALOGE("Error killing iptables child process %d: %s", pid, strerror(err));
+ }
+
+ if (waitpid(pid, nullptr, 0) == -1) {
+ ALOGE("Error waiting for iptables child process %d: %s", pid, strerror(errno));
+ }
+
+ processTerminated = true;
+ }
+
const pid_t pid;
const int stdIn;
struct pollfd pollFds[2];
std::string errBuf;
- bool processTerminated;
+ std::atomic_bool processTerminated;
static constexpr size_t STDOUT_IDX = 0;
static constexpr size_t STDERR_IDX = 1;
@@ -152,76 +195,6 @@
return new IptablesProcess(child_pid, stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]);
}
-void sigchldHandler(int /* signal_number */, siginfo_t *siginfo, void* /* context */) {
- // Save and restore errno to prevent threads from spuriously seeing
- // incorrect errors due to errors from this signal handler.
- int saved_errno = errno;
-
- // Notify the IptablesRestoreController so that it can try to recover. Log
- // relevant information if it's one of the process we care about. netd
- // forks other processes as well, so there's no need to spam the logs
- // every time one of those dies.
- const pid_t child_pid = siginfo->si_pid;
- const IptablesRestoreController::IptablesProcessType process =
- sInstance->notifyChildTermination(child_pid);
-
- if (process != IptablesRestoreController::INVALID_PROCESS) {
- // This should return immediately because we've been informed that
- // |child_pid| just exited.
- pid_t wait_result = waitpid(child_pid, nullptr, WNOHANG);
- if (wait_result < 0) {
- PLOG(WARNING) << "waitpid for child " << child_pid << " unexpectedly failed";
- }
-
- if (siginfo->si_code == CLD_EXITED) {
- LOG(WARNING) << "iptables[6]-restore process exited (pid=" << child_pid
- << ") exit_status=" << siginfo->si_status
- << " type=" << process;
- } else {
- LOG(WARNING) << "iptables[6]-restore process was signalled (pid=" << child_pid
- << ") signal=" << siginfo->si_status
- << " type=" << process;
- }
- }
-
- errno = saved_errno;
-}
-
-/* static */
-void IptablesRestoreController::installSignalHandler(IptablesRestoreController *singleton) {
- if (singleton == nullptr) {
- LOG(ERROR) << "installSignalHandler: singleton == nullptr";
- }
-
- sInstance = singleton;
-
- struct sigaction sa = {};
- sa.sa_flags = SA_SIGINFO;
- sa.sa_sigaction = sigchldHandler;
- const int err = sigaction(SIGCHLD, &sa, nullptr);
- if (err < 0) {
- PLOG(ERROR) << "Unable to set SIGCHLD handler.";
- }
-}
-
-IptablesRestoreController::IptablesProcessType
-IptablesRestoreController::notifyChildTermination(pid_t pid) {
- // We minimize the amount of work that we do from the signal handler, given
- // that this can be called at any arbitrary point of time.
-
- if (mIpRestore != nullptr && mIpRestore->pid == pid) {
- mIpRestore->processTerminated = true;
- return IPTABLES_PROCESS;
- }
-
- if (mIp6Restore != nullptr && mIp6Restore->pid == pid) {
- mIp6Restore->processTerminated = true;
- return IP6TABLES_PROCESS;
- }
-
- return INVALID_PROCESS;
-}
-
// TODO: Return -errno on failure instead of -1.
// TODO: Maybe we should keep a rotating buffer of the last N commands
// so that they can be dumped on dumpsys.
@@ -230,6 +203,7 @@
std::unique_ptr<IptablesProcess> *process =
(type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore;
+
// We might need to fork a new process if we haven't forked one yet, or
// if the forked process terminated.
//
@@ -237,7 +211,13 @@
// recover from a child death. If the child dies at some later point during
// the execution of this method, we will receive an EPIPE and return an
// error. The command will then need to be retried at a higher level.
- if (process->get() == nullptr || (*process)->processTerminated) {
+ IptablesProcess *existingProcess = process->get();
+ if (existingProcess != nullptr && !existingProcess->outputReady()) {
+ existingProcess->stop();
+ existingProcess = nullptr;
+ }
+
+ if (existingProcess == nullptr) {
// Fork a new iptables[6]-restore process.
IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type);
if (newProcess == nullptr) {
@@ -259,6 +239,7 @@
fixedCommand.data(),
fixedCommand.length())) {
PLOG(ERROR) << "Unable to send command";
+ return -1;
}
if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) {
@@ -267,7 +248,7 @@
}
if (!drainAndWaitForAck(*process)) {
- LOG(ERROR) << "Timed out waiting for response from iptables process: " << (*process)->pid;
+ // drainAndWaitForAck has already logged an error.
return -1;
}
@@ -324,7 +305,8 @@
}
// We've timed out, which means something has gone wrong - we know that stdout should have
- // become available to read with the ACK message.
+ // become available to read with the ACK message, or that stderr should have been available
+ // to read with an error message.
if (numEvents == 0) {
continue;
}
@@ -361,9 +343,22 @@
IptablesRestoreController::maybeLogStderr(process, buffer, size);
}
}
+ if (pollfd.revents & POLLHUP) {
+ // The pipe was closed. This likely means the subprocess is exiting, since
+ // iptables-restore only closes stdin on error.
+ process->stop();
+ break;
+ }
}
}
+ if (!receivedAck) {
+ if (process->processTerminated)
+ ALOGE("iptables-restore process %d terminated", process->pid);
+ else
+ ALOGE("Timed out waiting for response from iptables process %d", process->pid);
+ }
+
return receivedAck;
}
@@ -379,3 +374,7 @@
}
return res;
}
+
+int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) {
+ return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid;
+}
diff --git a/server/IptablesRestoreController.h b/server/IptablesRestoreController.h
index 8f2dc30..4279fac 100644
--- a/server/IptablesRestoreController.h
+++ b/server/IptablesRestoreController.h
@@ -31,10 +31,6 @@
// to get an instance of this class.
IptablesRestoreController();
- // Called precisely once in netd's lifetime, when the singleton
- // Controllers object is created.
- static void installSignalHandler(IptablesRestoreController *singleton);
-
// Execute |commands| on the given |target|.
int execute(const IptablesTarget target, const std::string& commands);
@@ -50,6 +46,10 @@
virtual ~IptablesRestoreController();
+protected:
+ friend class IptablesRestoreControllerTest;
+ pid_t getIpRestorePid(const IptablesProcessType type);
+
private:
static IptablesProcess* forkAndExec(const IptablesProcessType type);
@@ -62,11 +62,11 @@
static void maybeLogStderr(const std::unique_ptr<IptablesProcess> &process,
const char* buf, const ssize_t numBytes);
- std::unique_ptr<IptablesProcess> mIpRestore;
- std::unique_ptr<IptablesProcess> mIp6Restore;
-
// Guards calls to execute().
std::mutex mLock;
+
+ std::unique_ptr<IptablesProcess> mIpRestore;
+ std::unique_ptr<IptablesProcess> mIp6Restore;
};
#endif // NETD_SERVER_IPTABLES_RESTORE_CONTROLLER_H
diff --git a/server/IptablesRestoreControllerTest.cpp b/server/IptablesRestoreControllerTest.cpp
index 3176500..c302b52 100644
--- a/server/IptablesRestoreControllerTest.cpp
+++ b/server/IptablesRestoreControllerTest.cpp
@@ -15,32 +15,99 @@
*/
#include <string>
+#include <fcntl.h>
+#include <signal.h>
#include <gtest/gtest.h>
+#define LOG_TAG "IptablesRestoreControllerTest"
+#include <cutils/log.h>
+#include <android-base/stringprintf.h>
+
#include "IptablesRestoreController.h"
#include "NetdConstants.h"
-class IptablesRestoreControllerHolder {
-public:
- IptablesRestoreControllerHolder() {
- IptablesRestoreController::installSignalHandler(&ctrl);
- }
+using android::base::StringPrintf;
- IptablesRestoreController ctrl;
+class IptablesRestoreControllerTest : public ::testing::Test {
+public:
+ IptablesRestoreController con;
+
+ pid_t getIpRestorePid(const IptablesRestoreController::IptablesProcessType type) {
+ return con.getIpRestorePid(type);
+ };
+
+ void expectNoIptablesRestoreProcess(pid_t pid) {
+ // We can't readlink /proc/PID/exe, because zombie processes don't have it.
+ // Parse /proc/PID/stat instead.
+ std::string statPath = StringPrintf("/proc/%d/stat", pid);
+ int fd = open(statPath.c_str(), O_RDONLY);
+ if (fd == -1) {
+ // ENOENT means the process is gone (expected).
+ ASSERT_EQ(errno, ENOENT)
+ << "Unexpected error opening " << statPath << ": " << strerror(errno);
+ return;
+ }
+
+ // If the PID exists, it's possible (though very unlikely) that the PID was reused. Check the
+ // binary name as well, to ensure the test isn't flaky.
+ char statBuf[1024];
+ ASSERT_NE(-1, read(fd, statBuf, sizeof(statBuf)))
+ << "Could not read from " << statPath << ": " << strerror(errno);
+ close(fd);
+
+ std::string statString(statBuf);
+ EXPECT_FALSE(statString.find("iptables-restor") || statString.find("ip6tables-resto"))
+ << "Previous iptables-restore pid " << pid << " still alive: " << statString;
+ }
};
-IptablesRestoreControllerHolder holder;
-
-TEST(IptablesRestoreControllerTest, TestBasicCommand) {
- IptablesRestoreController& con = holder.ctrl;
- EXPECT_EQ(0, con.execute(IptablesTarget::V4, "#Test\n"));
+TEST_F(IptablesRestoreControllerTest, TestBasicCommand) {
EXPECT_EQ(0, con.execute(IptablesTarget::V4V6, "#Test\n"));
+
+ pid_t pid4 = getIpRestorePid(IptablesRestoreController::IPTABLES_PROCESS);
+ pid_t pid6 = getIpRestorePid(IptablesRestoreController::IP6TABLES_PROCESS);
+
EXPECT_EQ(0, con.execute(IptablesTarget::V6, "#Test\n"));
+ EXPECT_EQ(0, con.execute(IptablesTarget::V4, "#Test\n"));
+
+ // Check the PIDs are the same as they were before. If they're not, the child processes were
+ // restarted, which causes a 30-60ms delay.
+ EXPECT_EQ(pid4, getIpRestorePid(IptablesRestoreController::IPTABLES_PROCESS));
+ EXPECT_EQ(pid6, getIpRestorePid(IptablesRestoreController::IP6TABLES_PROCESS));
}
-TEST(IptablesRestoreControllerTest, TestRestartOnMalformedCommand) {
- IptablesRestoreController& con = holder.ctrl;
- EXPECT_EQ(-1, con.execute(IptablesTarget::V4V6, "malformed command\n"));
+TEST_F(IptablesRestoreControllerTest, TestRestartOnMalformedCommand) {
+ for (int i = 0; i < 50; i++) {
+ IptablesTarget target = (IptablesTarget) (i % 3);
+ ASSERT_EQ(-1, con.execute(target, "malformed command\n")) <<
+ "Malformed command did not fail at iteration " << i;
+ ASSERT_EQ(0, con.execute(target, "#Test\n")) <<
+ "No-op command did not succeed at iteration " << i;
+ }
+}
+
+TEST_F(IptablesRestoreControllerTest, TestRestartOnProcessDeath) {
+ // Run a command to ensure that the processes are running.
EXPECT_EQ(0, con.execute(IptablesTarget::V4V6, "#Test\n"));
+
+ pid_t pid4 = getIpRestorePid(IptablesRestoreController::IPTABLES_PROCESS);
+ pid_t pid6 = getIpRestorePid(IptablesRestoreController::IP6TABLES_PROCESS);
+
+ ASSERT_EQ(0, kill(pid4, 0)) << "iptables-restore pid " << pid4 << " does not exist";
+ ASSERT_EQ(0, kill(pid6, 0)) << "ip6tables-restore pid " << pid6 << " does not exist";
+ ASSERT_EQ(0, kill(pid4, SIGTERM)) << "Failed to send SIGTERM to iptables-restore pid " << pid4;
+ ASSERT_EQ(0, kill(pid6, SIGTERM)) << "Failed to send SIGTERM to ip6tables-restore pid " << pid6;
+
+ // Wait 100ms for processes to terminate.
+ TEMP_FAILURE_RETRY(usleep(100 * 1000));
+
+ // Ensure that running a new command properly restarts the processes.
+ EXPECT_EQ(0, con.execute(IptablesTarget::V4V6, "#Test\n"));
+ EXPECT_NE(pid4, getIpRestorePid(IptablesRestoreController::IPTABLES_PROCESS));
+ EXPECT_NE(pid6, getIpRestorePid(IptablesRestoreController::IP6TABLES_PROCESS));
+
+ // Check there are no zombies.
+ expectNoIptablesRestoreProcess(pid4);
+ expectNoIptablesRestoreProcess(pid6);
}