blob: 66f6dd3dc283b7e616fdf44dae6a085507a8b1c1 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "createns"
#include <log/log.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <limits>
#include <string>
#include <vector>
static const char kNamespacePath[] = "/data/vendor/var/run/netns/";
static const char kProcNsNet[] = "/proc/self/ns/net";
class Fd {
public:
explicit Fd(int fd) : mFd(fd) { }
Fd(const Fd&) = delete;
~Fd() {
if (mFd != -1) {
::close(mFd);
mFd = -1;
}
}
int get() const { return mFd; }
Fd& operator=(const Fd&) = delete;
private:
int mFd;
};
static void usage(const char* program) {
ALOGE("%s <namespace>", program);
}
static bool removeFile(const char* file) {
if (::unlink(file) == -1) {
ALOGE("Failed to unlink file '%s': %s", file, strerror(errno));
return false;
}
return true;
}
static std::string getNamespacePath(const char* name) {
size_t len = strlen(name);
if (len == 0) {
ALOGE("Must provide a namespace argument that is not empty");
return std::string();
}
if (std::numeric_limits<size_t>::max() - sizeof(kNamespacePath) < len) {
// The argument is so big the resulting string can't fit in size_t
ALOGE("Namespace argument too long");
return std::string();
}
std::vector<char> nsPath(sizeof(kNamespacePath) + len);
size_t totalSize = strlcpy(nsPath.data(), kNamespacePath, nsPath.size());
if (totalSize >= nsPath.size()) {
// The resulting string had to be concatenated to fit, this is a logic
// error in the code above that determines the size of the data.
ALOGE("Could not create namespace path");
return std::string();
}
totalSize = strlcat(nsPath.data(), name, nsPath.size());
if (totalSize >= nsPath.size()) {
// The resulting string had to be concatenated to fit, this is a logic
// error in the code above that determines the size of the data.
ALOGE("Could not append to namespace path");
return std::string();
}
return nsPath.data();
}
static bool writeNamespacePid(const char* name, pid_t pid) {
std::string path = getNamespacePath(name);
if (path.empty()) {
return false;
}
path += ".pid";
Fd fd(::open(path.c_str(),
O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC,
S_IRUSR | S_IRGRP | S_IROTH));
if (fd.get() == -1) {
ALOGE("Unable to create file '%s': %s", path.c_str(), strerror(errno));
return false;
}
// In order to safely print a pid_t we use int64_t with a known format
// specifier. Ensure that a pid_t will fit in a pid_t. According to POSIX
// pid_t is signed.
static_assert(sizeof(pid_t) <= sizeof(int64_t),
"pid_t is larger than int64_t");
char pidString[32];
int printed = snprintf(pidString,
sizeof(pidString),
"%" PRId64,
static_cast<int64_t>(pid));
if (printed <= 0) {
ALOGE("Unabled to created PID string for writing");
removeFile(path.c_str());
return false;
}
const char* toPrint = pidString;
int remaining = printed;
for (;;) {
int result = ::write(fd.get(), toPrint, remaining);
if (result < 0) {
if (errno == EINTR) {
continue;
}
ALOGE("Unable to write pid to file %s: %s",
path.c_str(), strerror(errno));
removeFile(path.c_str());
return false;
} else if (result < printed) {
remaining -= result;
toPrint += result;
} else {
break;
}
}
return true;
}
static pid_t daemonize(int fd) {
// This convoluted way of demonizing the process is described in
// man (7) daemon.
// (1) Close all files, we don't have any open files at this point
// (2) Reset all signal handlers to default, they already are
// (3) Reset the signal mask, we never changed it
// (4) Sanitize environment block, we didn't change the environment
// (5) Call fork
pid_t pid = ::fork();
if (pid != 0) {
// In the parent, nothing more to do
return pid;
}
// (6) Acquire a new session to detach from terminal
::setsid();
// (7) Fork again to avoid the daemon being attached to a terminal again
pid = ::fork();
if (pid != 0) {
// (8) This is the first child, needs to call exit
exit(0);
return pid;
}
// (9) Connect /dev/null to stdin, stdout, stderr
::close(STDIN_FILENO);
::close(STDOUT_FILENO);
::close(STDERR_FILENO);
// Since open will always reuse the lowest available fd and we have closed
// every single fd at this point we can just open them in the correct order.
if (::open("/dev/null", O_RDONLY) == -1) {
ALOGE("Unable to open /dev/null as stdin");
}
if (::open("/dev/null", O_WRONLY) == -1) {
ALOGE("Unable to open /dev/null as stdout");
}
if (::open("/dev/null", O_WRONLY) == -1) {
ALOGE("Unable to open /dev/null as stderr");
}
// (10) Reset umask to zero
::umask(0);
// (11) Change directory to root (/)
if (::chdir("/") != 0) {
ALOGE("Failed to set working directory to root: %s", strerror(errno));
}
// (12) Write the pid of the daemon to a file, we're passing this to
// the process that starts the daemon to ensure that the pid file exists
// once that process exits. Atomicity is guaranteed by that write requiring
// that the pid file does not exist to begin with.
pid = ::getpid();
if (::write(fd, &pid, sizeof(pid)) != sizeof(pid)) {
ALOGE("Unable to write pid to pipe: %s", strerror(errno));
::close(fd);
exit(1);
}
::close(fd);
// (13) Drop privileges, doing this causes problems for execns when it's
// trying to open the proc/ns/net file of this process so we can't do that.
// (14) Notify the starting process that the daemon is running, this is done
// in step (12) above.
// (15) Exit starting process happens in main where it returns.
return 0;
}
int main(int argc, char* argv[]) {
if (argc != 2) {
usage(argv[0]);
return 1;
}
int fds[2];
if (::pipe2(fds, O_CLOEXEC) != 0) {
ALOGE("Failed to create pipe: %s", strerror(errno));
return 1;
}
Fd readPipe(fds[0]);
Fd writePipe(fds[1]);
if (::unshare(CLONE_NEWNET) != 0) {
ALOGE("Failed to create network namespace '%s': %s",
argv[1],
strerror(errno));
return 1;
}
std::string path = getNamespacePath(argv[1]);
if (path.empty()) {
return 1;
}
{
// Open and then immediately close the fd
Fd fd(::open(path.c_str(), O_CREAT | O_EXCL | O_RDONLY | O_CLOEXEC, 0));
if (fd.get() == -1) {
ALOGE("Failed to open file %s: %s", path.c_str(), strerror(errno));
return 1;
}
}
if (::mount(kProcNsNet, path.c_str(), nullptr, MS_BIND, nullptr) != 0) {
ALOGE("Failed to bind %s to %s: %s",
kProcNsNet,
path.c_str(),
strerror(errno));
// Clean up on failure
removeFile(path.c_str());
return 1;
}
// At this point we fork. This way we keep a process in the namespace alive
// without this command being blocking. This is valuable because it allows
// us to write the pid to a file before we exit. That way we can guarantee
// that after this command completes there is a pid to be read, there is no
// asynchronous behavior going on.
pid_t pid = daemonize(writePipe.get());
if (pid == 0) {
// In the child
for (;;) {
pause();
}
} else {
// In the parent, read the pid of the daemon from the pipe and write it
// to a file.
pid_t child = 0;
if (::read(readPipe.get(), &child, sizeof(child)) != sizeof(child)) {
ALOGE("Failed to read child PID from pipe: %s", strerror(errno));
return 1;
}
if (!writeNamespacePid(argv[1], child)) {
return 1;
}
}
return 0;
}