blob: 4d4e84c3fcea09564083ad2788c5f2646509d4e3 [file] [log] [blame]
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "interceptor.h"
#include <dlfcn.h>
#include <fcntl.h>
#include <spawn.h>
#include <unistd.h>
#include <algorithm>
#include <array>
#include <filesystem>
#include <fstream>
#include <functional>
#include <initializer_list>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <memory>
#include <optional>
#include <regex>
#include <sstream>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>
#include <android-base/unique_fd.h>
#include <google/protobuf/util/delimited_message_util.h>
#include "interceptor_utils.h"
namespace fs = std::filesystem;
// UTILITY function declarations
// process applicable calls (i.e. programs that we might be able to handle)
static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[],
char* const envp[],
bool* should_recurse);
// log command if logging is enabled
static void log(const interceptor::Command&);
// execute potentially modified command
using executor_t = std::function<int(const char* filename, char* const argv[], char* const envp[])>;
static int execute(const char* filename, char* const argv[], char* const envp[],
const executor_t& executor);
// OVERLOADS for LD_PRELOAD USE
// Intercept execve calls, for that capture the original execve call
static auto const old_execve = reinterpret_cast<decltype(execve)*>(dlsym(RTLD_NEXT, "execve"));
extern "C" {
int execve(const char* filename, char* const argv[], char* const envp[]) {
return execute(filename, argv, envp,
[&](const char* file, char* const arguments[], char* const environment[]) {
return old_execve(file, arguments, environment);
});
}
} // extern "C"
// Intercept posix_spawn calls, for that capture the original posix_spawn call
static auto const old_posix_spawn =
reinterpret_cast<decltype(posix_spawn)*>(dlsym(RTLD_NEXT, "posix_spawn"));
extern "C" {
int posix_spawn(pid_t* pid, const char* filename, const posix_spawn_file_actions_t* file_actions,
const posix_spawnattr_t* attrp, char* const argv[], char* const envp[]) {
return execute(filename, argv, envp,
[&](const char* file, char* const arguments[], char* const environment[]) {
return old_posix_spawn(pid, file, file_actions, attrp, arguments, environment);
});
}
} // extern "C"
// LIBRARY IMPLEMENTATION
namespace interceptor {
static Command instantiate_command(const char* program, char* const argv[], char* const envp[]) {
Command result;
result.set_program(program);
result.set_current_directory(fs::current_path());
for (auto current_argument = argv; *current_argument; ++current_argument) {
result.add_arguments(*current_argument);
}
for (auto current_env_var = envp; *current_env_var; ++current_env_var) {
const std::string s(*current_env_var);
const auto pos = s.find('=');
if (pos == std::string::npos) {
continue;
}
(*result.mutable_environment_variables())[s.substr(0, pos)] = s.substr(pos + 1);
}
return result;
}
static std::optional<std::string> command_getenv(const Command& command, const char* key) {
const auto& env = command.environment_variables();
if (const auto iter = env.find(key); iter != env.cend()) {
return {iter->second};
}
return {};
}
static std::optional<fs::path> command_get_root_directory(const Command& command) {
// determine the ROOT_DIR
fs::path root_directory;
if (const auto root = command_getenv(command, kEnvRootDirectory)) {
return fs::path(*root) / "";
}
// there is no ROOT_DIR that we can use to make calls relative
return {};
}
static bool default_make_relative(Command* command, const fs::path& root_directory) {
// determine the relative path to ROOT_DIR from the current working dir
auto relative_root = fs::relative(root_directory) / "";
if (relative_root == "./") {
relative_root.clear();
}
// TODO: This is generally bad as this means we can't make anything relative.
// This happens if the out dir is outside of the root.
if (relative_root.native().find(root_directory) != std::string::npos) {
return false;
}
// replacement functor
const auto replace_all = [&](auto& str) {
size_t start = 0;
auto pos = std::string::npos;
while ((pos = str.find(root_directory, start)) != std::string::npos) {
str.replace(pos, root_directory.native().length(), relative_root);
start = pos + relative_root.native().length();
}
};
// now go and replace everything
replace_all(*command->mutable_program());
std::for_each(command->mutable_arguments()->begin(), command->mutable_arguments()->end(), replace_all);
return true;
}
struct InputsOutputs {
Inputs inputs;
Outputs outputs;
};
class Analyzer {
public:
static std::unique_ptr<Analyzer> get(const interceptor::Command&);
virtual ~Analyzer() = default;
void set_inputs_outputs(Command* command) const;
virtual bool make_relative(Command*, const fs::path&) const { return false; }
virtual bool should_recurse(Command*) const { return true; }
virtual bool make_fake(Command*) const { return false; }
virtual std::string name() const { return "Generic"; };
protected:
virtual InputsOutputs determine_inputs_outputs(const Command&) const { return {}; }
};
void Analyzer::set_inputs_outputs(Command* command) const {
auto [inputs, outputs] = determine_inputs_outputs(*command);
*command->mutable_inputs() = {inputs.begin(), inputs.end()};
*command->mutable_outputs() = {outputs.begin(), outputs.end()};
for (const auto& input : command->inputs()) {
if (!fs::is_regular_file(input)) {
std::cerr << "missing input: " << input << "\n" << *command << "\n";
exit(1);
}
}
}
int default_fake(Command* command) {
if (command->outputs().empty()) {
return false;
}
// rewrite the command to just produce empty files for any output
command->set_program("/bin/sh");
command->clear_arguments();
command->add_arguments("/bin/sh");
command->add_arguments("-c");
// truncate makes sure we leave an empty file even if the output existed from
// an earlier run.
std::ostringstream command_line("truncate -s 0", std::ios_base::ate);
for (const auto& output : command->outputs()) {
command_line << ' ' << output;
}
command->add_arguments(command_line.str());
return true;
}
class CompileLinkerAnalyzer : public Analyzer {
std::string name() const final { return "CompilerLinker"; };
InputsOutputs determine_inputs_outputs(const Command& command) const final {
static constexpr std::array kSkipNextArguments{
"-isystem", "-I", "-L", "-m", "-soname", "-z",
};
static constexpr std::string_view kOutputOption = "-Wp,-MMD,";
InputsOutputs result;
bool next_is_out = false;
bool skip_next = false;
// skip arguments[0] as this is the program itself
for (auto it = command.arguments().cbegin() + 1; it != command.arguments().cend(); ++it) {
const auto& argument = *it;
if (argument == "-o") {
next_is_out = true;
continue;
}
if (next_is_out) {
result.outputs.push_back(argument);
next_is_out = false;
continue;
}
if (argument.rfind(kOutputOption, 0) == 0) {
result.outputs.push_back(argument.substr(kOutputOption.size()));
}
if (skip_next) {
skip_next = false;
continue;
}
if (std::find(kSkipNextArguments.cbegin(), kSkipNextArguments.cend(), argument) !=
kSkipNextArguments.cend()) {
skip_next = true;
}
// ignore test compilations
if (argument == "/dev/null" || argument == "-") {
return {};
}
if (argument[0] == '-') { // ignore flags
continue;
}
result.inputs.push_back(argument);
}
return result;
}
bool make_relative(Command* command, const fs::path& root_directory) const final {
return default_make_relative(command, root_directory);
}
// do not recurse the interceptor into the subprocesses of compilers/linkers;
// otherwise we will trace (and get confused by) ld.lld/cc1-plus and friends.
bool should_recurse(Command*) const final { return false; }
bool make_fake(Command* command) const final { return default_fake(command); }
};
class ArchiverAnalyzer : public Analyzer {
std::string name() const final { return "Archiver"; };
InputsOutputs determine_inputs_outputs(const Command& command) const final {
InputsOutputs result;
const auto& arguments = command.arguments();
if (arguments.size() < 3) {
return result;
}
// skip arguments[0] as this is the program itself
// skip arguments[1] are the archiver flags
// arguments[2] is the output
result.outputs.push_back(arguments[2]);
// arguments[3:] are the inputs
result.inputs.insert(result.inputs.cend(), arguments.cbegin() + 3, arguments.cend());
return result;
}
bool make_relative(Command* command, const fs::path& root_directory) const final {
return default_make_relative(command, root_directory);
}
bool should_recurse(Command*) const final { return false; }
bool make_fake(Command* command) const final { return default_fake(command); }
};
class FixdepAnalyzer : public Analyzer {
std::string name() const final { return "Fixdep"; };
InputsOutputs determine_inputs_outputs(const Command& command) const final {
InputsOutputs result;
const auto& arguments = command.arguments();
if (arguments.size() < 3) {
return result;
}
// fixdep reads the object file and the .d file and outputs to the .d file
result.outputs.push_back(arguments[1]);
result.inputs.push_back(arguments[1]);
result.inputs.push_back(arguments[2]);
return result;
};
bool make_fake(Command* command) const final { return default_fake(command); }
};
static const std::initializer_list<
std::pair<std::regex, std::function<std::unique_ptr<Analyzer>()>>>
analyzers{
{
std::regex("^(.*/)?(clang|clang\\+\\+|gcc|g\\+\\+|ld(\\.lld)?|llvm-strip)$"),
[]() { return std::make_unique<CompileLinkerAnalyzer>(); },
},
{
std::regex("^(.*/)?(llvm-)?ar$"),
[]() { return std::make_unique<ArchiverAnalyzer>(); },
},
{
std::regex("^scripts/basic/fixdep$"),
[]() { return std::make_unique<FixdepAnalyzer>(); },
},
};
std::unique_ptr<Analyzer> Analyzer::get(const Command& command) {
for (const auto& [regex, analyzer_factory] : analyzers) {
if (std::regex_match(command.arguments()[0], regex)) {
return analyzer_factory();
}
}
return std::make_unique<Analyzer>();
}
} // namespace interceptor
/// UTILITY FUNCTIONS
static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[],
char* const envp[],
bool* should_recurse) {
// First, try to find out whether we at all can handle this command. If not,
// simply return and fall back to the original handler.
if (!fs::is_regular_file(filename)) {
return {};
}
// Ok, we can handle that one, let's transform it.
auto command = interceptor::instantiate_command(filename, argv, envp);
const auto analyzer = interceptor::Analyzer::get(command);
command.set_analyzer(analyzer->name());
bool transformed = false;
auto root_directory = command_get_root_directory(command);
if (root_directory.has_value()) {
command.set_current_directory(fs::relative(command.current_directory(), *root_directory));
// rewrite all command line arguments (including the program itself) to use
// paths relative to ROOT_DIR. This is essential for reproducible builds and
// furthermore necessary to produce cache hits in RBE.
if (command_getenv(command, kEnvMakeRelative)) {
transformed |= analyzer->make_relative(&command, *root_directory);
}
}
analyzer->set_inputs_outputs(&command);
*should_recurse = analyzer->should_recurse(&command);
log(command);
// now that we have logged the command away, we can entirely rewrite it, or
// whatever the analyzer thinks a good fake execution looks like.
if (command_getenv(command, kEnvFake)) {
transformed |= analyzer->make_fake(&command);
}
if (transformed) {
return command;
}
return {};
}
static void log(const interceptor::Command& command) {
if (const auto log = command_getenv(command, kEnvCommandLog)) {
interceptor::Message message;
*message.mutable_command() = command;
message.mutable_command()->clear_environment_variables();
std::ostringstream os;
google::protobuf::util::SerializeDelimitedToOstream(message, &os);
const auto& str = os.str();
android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(log->c_str(), O_WRONLY | O_APPEND)));
if (!fd.ok()) {
std::cerr << "Could not open " << std::quoted(*log) << ": " << strerror(errno);
return;
}
if (TEMP_FAILURE_RETRY(write(fd, str.data(), str.size())) < str.size()) {
std::cerr << "Could not write " << std::quoted(*log) << ": " << strerror(errno);
return;
}
}
}
static std::vector<const char*> argv_vector(const interceptor::Command& command) {
std::vector<const char*> result;
result.reserve(command.arguments().size() + 1);
result[command.arguments().size()] = nullptr;
for (const auto& arg : command.arguments()) {
result.push_back(arg.c_str());
}
return result;
}
static void disable_ld_preload(char* const envp[]) {
for (auto current_env_var = envp; *current_env_var; ++current_env_var) {
const std::string_view s(*current_env_var);
if (s.rfind("LD_PRELOAD=", 0) == 0) {
*current_env_var[0] = '\0';
}
}
}
int execute(const char* filename, char* const argv[], char* const envp[],
const executor_t& executor) {
// pass on to process_command(), if unhandled, fall back to the original executor
bool should_recurse = true;
auto command = process_command(filename, argv, envp, &should_recurse);
if (!should_recurse) {
disable_ld_preload(envp);
}
if (command.has_value()) {
// pass down the transformed command to the executor
return executor(command->program().c_str(), const_cast<char**>(argv_vector(*command).data()),
envp);
}
// else fall back to the original call
return executor(filename, argv, envp);
}