| /* |
| * Copyright (C) 2021 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "interceptor.h" |
| |
| #include <dlfcn.h> |
| #include <fcntl.h> |
| #include <spawn.h> |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <array> |
| #include <filesystem> |
| #include <fstream> |
| #include <functional> |
| #include <initializer_list> |
| #include <iomanip> |
| #include <iostream> |
| #include <iterator> |
| #include <memory> |
| #include <optional> |
| #include <regex> |
| #include <sstream> |
| #include <string> |
| #include <string_view> |
| #include <type_traits> |
| #include <utility> |
| |
| #include <android-base/unique_fd.h> |
| |
| #include <google/protobuf/util/delimited_message_util.h> |
| |
| #include "interceptor_utils.h" |
| |
| namespace fs = std::filesystem; |
| |
| // UTILITY function declarations |
| |
| // process applicable calls (i.e. programs that we might be able to handle) |
| static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[], |
| char* const envp[], |
| bool* should_recurse); |
| |
| // log command if logging is enabled |
| static void log(const interceptor::Command&); |
| |
| // execute potentially modified command |
| using executor_t = std::function<int(const char* filename, char* const argv[], char* const envp[])>; |
| static int execute(const char* filename, char* const argv[], char* const envp[], |
| const executor_t& executor); |
| |
| // OVERLOADS for LD_PRELOAD USE |
| |
| // Intercept execve calls, for that capture the original execve call |
| static auto const old_execve = reinterpret_cast<decltype(execve)*>(dlsym(RTLD_NEXT, "execve")); |
| |
| extern "C" { |
| int execve(const char* filename, char* const argv[], char* const envp[]) { |
| return execute(filename, argv, envp, |
| [&](const char* file, char* const arguments[], char* const environment[]) { |
| return old_execve(file, arguments, environment); |
| }); |
| } |
| } // extern "C" |
| |
| // Intercept posix_spawn calls, for that capture the original posix_spawn call |
| static auto const old_posix_spawn = |
| reinterpret_cast<decltype(posix_spawn)*>(dlsym(RTLD_NEXT, "posix_spawn")); |
| |
| extern "C" { |
| int posix_spawn(pid_t* pid, const char* filename, const posix_spawn_file_actions_t* file_actions, |
| const posix_spawnattr_t* attrp, char* const argv[], char* const envp[]) { |
| return execute(filename, argv, envp, |
| [&](const char* file, char* const arguments[], char* const environment[]) { |
| return old_posix_spawn(pid, file, file_actions, attrp, arguments, environment); |
| }); |
| } |
| } // extern "C" |
| |
| // LIBRARY IMPLEMENTATION |
| |
| namespace interceptor { |
| |
| static Command instantiate_command(const char* program, char* const argv[], char* const envp[]) { |
| Command result; |
| result.set_program(program); |
| result.set_current_directory(fs::current_path()); |
| |
| for (auto current_argument = argv; *current_argument; ++current_argument) { |
| result.add_arguments(*current_argument); |
| } |
| |
| for (auto current_env_var = envp; *current_env_var; ++current_env_var) { |
| const std::string s(*current_env_var); |
| const auto pos = s.find('='); |
| if (pos == std::string::npos) { |
| continue; |
| } |
| |
| (*result.mutable_environment_variables())[s.substr(0, pos)] = s.substr(pos + 1); |
| } |
| |
| return result; |
| } |
| |
| static std::optional<std::string> command_getenv(const Command& command, const char* key) { |
| const auto& env = command.environment_variables(); |
| if (const auto iter = env.find(key); iter != env.cend()) { |
| return {iter->second}; |
| } |
| return {}; |
| } |
| |
| static std::optional<fs::path> command_get_root_directory(const Command& command) { |
| // determine the ROOT_DIR |
| fs::path root_directory; |
| if (const auto root = command_getenv(command, kEnvRootDirectory)) { |
| return fs::path(*root) / ""; |
| } |
| // there is no ROOT_DIR that we can use to make calls relative |
| return {}; |
| } |
| |
| static bool default_make_relative(Command* command, const fs::path& root_directory) { |
| // determine the relative path to ROOT_DIR from the current working dir |
| auto relative_root = fs::relative(root_directory) / ""; |
| if (relative_root == "./") { |
| relative_root.clear(); |
| } |
| |
| // TODO: This is generally bad as this means we can't make anything relative. |
| // This happens if the out dir is outside of the root. |
| if (relative_root.native().find(root_directory) != std::string::npos) { |
| return false; |
| } |
| |
| // replacement functor |
| const auto replace_all = [&](auto& str) { |
| size_t start = 0; |
| auto pos = std::string::npos; |
| while ((pos = str.find(root_directory, start)) != std::string::npos) { |
| str.replace(pos, root_directory.native().length(), relative_root); |
| start = pos + relative_root.native().length(); |
| } |
| }; |
| |
| // now go and replace everything |
| replace_all(*command->mutable_program()); |
| std::for_each(command->mutable_arguments()->begin(), command->mutable_arguments()->end(), replace_all); |
| |
| return true; |
| } |
| |
| struct InputsOutputs { |
| Inputs inputs; |
| Outputs outputs; |
| }; |
| |
| class Analyzer { |
| public: |
| static std::unique_ptr<Analyzer> get(const interceptor::Command&); |
| virtual ~Analyzer() = default; |
| |
| void set_inputs_outputs(Command* command) const; |
| virtual bool make_relative(Command*, const fs::path&) const { return false; } |
| virtual bool should_recurse(Command*) const { return true; } |
| virtual bool make_fake(Command*) const { return false; } |
| |
| virtual std::string name() const { return "Generic"; }; |
| |
| protected: |
| virtual InputsOutputs determine_inputs_outputs(const Command&) const { return {}; } |
| }; |
| |
| void Analyzer::set_inputs_outputs(Command* command) const { |
| auto [inputs, outputs] = determine_inputs_outputs(*command); |
| |
| *command->mutable_inputs() = {inputs.begin(), inputs.end()}; |
| *command->mutable_outputs() = {outputs.begin(), outputs.end()}; |
| |
| for (const auto& input : command->inputs()) { |
| if (!fs::is_regular_file(input)) { |
| std::cerr << "missing input: " << input << "\n" << *command << "\n"; |
| exit(1); |
| } |
| } |
| } |
| |
| int default_fake(Command* command) { |
| if (command->outputs().empty()) { |
| return false; |
| } |
| |
| // rewrite the command to just produce empty files for any output |
| command->set_program("/bin/sh"); |
| command->clear_arguments(); |
| command->add_arguments("/bin/sh"); |
| command->add_arguments("-c"); |
| |
| // truncate makes sure we leave an empty file even if the output existed from |
| // an earlier run. |
| std::ostringstream command_line("truncate -s 0", std::ios_base::ate); |
| for (const auto& output : command->outputs()) { |
| command_line << ' ' << output; |
| } |
| command->add_arguments(command_line.str()); |
| |
| return true; |
| } |
| |
| class CompileLinkerAnalyzer : public Analyzer { |
| std::string name() const final { return "CompilerLinker"; }; |
| |
| InputsOutputs determine_inputs_outputs(const Command& command) const final { |
| static constexpr std::array kSkipNextArguments{ |
| "-isystem", "-I", "-L", "-m", "-soname", "-z", |
| }; |
| static constexpr std::string_view kOutputOption = "-Wp,-MMD,"; |
| |
| InputsOutputs result; |
| bool next_is_out = false; |
| bool skip_next = false; |
| // skip arguments[0] as this is the program itself |
| for (auto it = command.arguments().cbegin() + 1; it != command.arguments().cend(); ++it) { |
| const auto& argument = *it; |
| if (argument == "-o") { |
| next_is_out = true; |
| continue; |
| } |
| if (next_is_out) { |
| result.outputs.push_back(argument); |
| next_is_out = false; |
| continue; |
| } |
| if (argument.rfind(kOutputOption, 0) == 0) { |
| result.outputs.push_back(argument.substr(kOutputOption.size())); |
| } |
| if (skip_next) { |
| skip_next = false; |
| continue; |
| } |
| if (std::find(kSkipNextArguments.cbegin(), kSkipNextArguments.cend(), argument) != |
| kSkipNextArguments.cend()) { |
| skip_next = true; |
| } |
| // ignore test compilations |
| if (argument == "/dev/null" || argument == "-") { |
| return {}; |
| } |
| if (argument[0] == '-') { // ignore flags |
| continue; |
| } |
| result.inputs.push_back(argument); |
| } |
| |
| return result; |
| } |
| |
| bool make_relative(Command* command, const fs::path& root_directory) const final { |
| return default_make_relative(command, root_directory); |
| } |
| |
| // do not recurse the interceptor into the subprocesses of compilers/linkers; |
| // otherwise we will trace (and get confused by) ld.lld/cc1-plus and friends. |
| bool should_recurse(Command*) const final { return false; } |
| bool make_fake(Command* command) const final { return default_fake(command); } |
| }; |
| |
| class ArchiverAnalyzer : public Analyzer { |
| std::string name() const final { return "Archiver"; }; |
| |
| InputsOutputs determine_inputs_outputs(const Command& command) const final { |
| InputsOutputs result; |
| |
| const auto& arguments = command.arguments(); |
| |
| if (arguments.size() < 3) { |
| return result; |
| } |
| // skip arguments[0] as this is the program itself |
| // skip arguments[1] are the archiver flags |
| // arguments[2] is the output |
| result.outputs.push_back(arguments[2]); |
| // arguments[3:] are the inputs |
| result.inputs.insert(result.inputs.cend(), arguments.cbegin() + 3, arguments.cend()); |
| return result; |
| } |
| |
| bool make_relative(Command* command, const fs::path& root_directory) const final { |
| return default_make_relative(command, root_directory); |
| } |
| bool should_recurse(Command*) const final { return false; } |
| bool make_fake(Command* command) const final { return default_fake(command); } |
| }; |
| |
| class FixdepAnalyzer : public Analyzer { |
| std::string name() const final { return "Fixdep"; }; |
| |
| InputsOutputs determine_inputs_outputs(const Command& command) const final { |
| InputsOutputs result; |
| const auto& arguments = command.arguments(); |
| |
| if (arguments.size() < 3) { |
| return result; |
| } |
| |
| // fixdep reads the object file and the .d file and outputs to the .d file |
| result.outputs.push_back(arguments[1]); |
| result.inputs.push_back(arguments[1]); |
| result.inputs.push_back(arguments[2]); |
| |
| return result; |
| }; |
| |
| bool make_fake(Command* command) const final { return default_fake(command); } |
| }; |
| |
| static const std::initializer_list< |
| std::pair<std::regex, std::function<std::unique_ptr<Analyzer>()>>> |
| analyzers{ |
| { |
| std::regex("^(.*/)?(clang|clang\\+\\+|gcc|g\\+\\+|ld(\\.lld)?|llvm-strip)$"), |
| []() { return std::make_unique<CompileLinkerAnalyzer>(); }, |
| }, |
| { |
| std::regex("^(.*/)?(llvm-)?ar$"), |
| []() { return std::make_unique<ArchiverAnalyzer>(); }, |
| }, |
| { |
| std::regex("^scripts/basic/fixdep$"), |
| []() { return std::make_unique<FixdepAnalyzer>(); }, |
| }, |
| }; |
| |
| std::unique_ptr<Analyzer> Analyzer::get(const Command& command) { |
| for (const auto& [regex, analyzer_factory] : analyzers) { |
| if (std::regex_match(command.arguments()[0], regex)) { |
| return analyzer_factory(); |
| } |
| } |
| return std::make_unique<Analyzer>(); |
| } |
| |
| } // namespace interceptor |
| |
| /// UTILITY FUNCTIONS |
| |
| static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[], |
| char* const envp[], |
| bool* should_recurse) { |
| // First, try to find out whether we at all can handle this command. If not, |
| // simply return and fall back to the original handler. |
| |
| if (!fs::is_regular_file(filename)) { |
| return {}; |
| } |
| |
| // Ok, we can handle that one, let's transform it. |
| |
| auto command = interceptor::instantiate_command(filename, argv, envp); |
| |
| const auto analyzer = interceptor::Analyzer::get(command); |
| command.set_analyzer(analyzer->name()); |
| |
| bool transformed = false; |
| |
| auto root_directory = command_get_root_directory(command); |
| |
| if (root_directory.has_value()) { |
| command.set_current_directory(fs::relative(command.current_directory(), *root_directory)); |
| |
| // rewrite all command line arguments (including the program itself) to use |
| // paths relative to ROOT_DIR. This is essential for reproducible builds and |
| // furthermore necessary to produce cache hits in RBE. |
| if (command_getenv(command, kEnvMakeRelative)) { |
| transformed |= analyzer->make_relative(&command, *root_directory); |
| } |
| } |
| |
| analyzer->set_inputs_outputs(&command); |
| *should_recurse = analyzer->should_recurse(&command); |
| |
| log(command); |
| |
| // now that we have logged the command away, we can entirely rewrite it, or |
| // whatever the analyzer thinks a good fake execution looks like. |
| if (command_getenv(command, kEnvFake)) { |
| transformed |= analyzer->make_fake(&command); |
| } |
| |
| if (transformed) { |
| return command; |
| } |
| return {}; |
| } |
| |
| static void log(const interceptor::Command& command) { |
| if (const auto log = command_getenv(command, kEnvCommandLog)) { |
| interceptor::Message message; |
| *message.mutable_command() = command; |
| message.mutable_command()->clear_environment_variables(); |
| |
| std::ostringstream os; |
| google::protobuf::util::SerializeDelimitedToOstream(message, &os); |
| |
| const auto& str = os.str(); |
| android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(log->c_str(), O_WRONLY | O_APPEND))); |
| if (!fd.ok()) { |
| std::cerr << "Could not open " << std::quoted(*log) << ": " << strerror(errno); |
| return; |
| } |
| |
| if (TEMP_FAILURE_RETRY(write(fd, str.data(), str.size())) < str.size()) { |
| std::cerr << "Could not write " << std::quoted(*log) << ": " << strerror(errno); |
| return; |
| } |
| } |
| } |
| |
| static std::vector<const char*> argv_vector(const interceptor::Command& command) { |
| std::vector<const char*> result; |
| result.reserve(command.arguments().size() + 1); |
| result[command.arguments().size()] = nullptr; |
| for (const auto& arg : command.arguments()) { |
| result.push_back(arg.c_str()); |
| } |
| return result; |
| } |
| |
| static void disable_ld_preload(char* const envp[]) { |
| for (auto current_env_var = envp; *current_env_var; ++current_env_var) { |
| const std::string_view s(*current_env_var); |
| if (s.rfind("LD_PRELOAD=", 0) == 0) { |
| *current_env_var[0] = '\0'; |
| } |
| } |
| } |
| |
| int execute(const char* filename, char* const argv[], char* const envp[], |
| const executor_t& executor) { |
| // pass on to process_command(), if unhandled, fall back to the original executor |
| bool should_recurse = true; |
| auto command = process_command(filename, argv, envp, &should_recurse); |
| |
| if (!should_recurse) { |
| disable_ld_preload(envp); |
| } |
| |
| if (command.has_value()) { |
| // pass down the transformed command to the executor |
| return executor(command->program().c_str(), const_cast<char**>(argv_vector(*command).data()), |
| envp); |
| } |
| // else fall back to the original call |
| return executor(filename, argv, envp); |
| } |