| /* |
| * Copyright (C) 2021 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "interceptor.h" |
| |
| #include <dlfcn.h> |
| #include <fcntl.h> |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <array> |
| #include <filesystem> |
| #include <fstream> |
| #include <functional> |
| #include <initializer_list> |
| #include <iomanip> |
| #include <iostream> |
| #include <iterator> |
| #include <memory> |
| #include <optional> |
| #include <regex> |
| #include <sstream> |
| #include <string> |
| #include <string_view> |
| #include <type_traits> |
| #include <utility> |
| |
| #include <android-base/unique_fd.h> |
| |
| #include <google/protobuf/util/delimited_message_util.h> |
| |
| #include "interceptor_utils.h" |
| |
| namespace fs = std::filesystem; |
| |
| // UTILITY function declarations |
| |
| // process applicable calls (i.e. programs that we might be able to handle) |
| static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[], |
| char* const envp[]); |
| |
| // log command if logging is enabled |
| static void log(const interceptor::Command&); |
| |
| // execute potentially modified command |
| static int execute_execve(const interceptor::Command&, char* const envp[]); |
| |
| // OVERLOADS for LD_PRELOAD USE |
| |
| // Intercept execve calls, for that capture the original execve call |
| static auto const old_execve = reinterpret_cast<decltype(execve)*>(dlsym(RTLD_NEXT, "execve")); |
| |
| extern "C" { |
| int execve(const char* filename, char* const argv[], char* const envp[]) { |
| // pass on to process_command(), if unhandled, fall back to the original |
| // execve |
| auto command = process_command(filename, argv, envp); |
| |
| if (command.has_value()) { |
| // pass down the transformed command to execve |
| return execute_execve(*command, envp); |
| } |
| // else fall back to the original call |
| return old_execve(filename, argv, envp); |
| } |
| } // extern "C" |
| |
| // LIBRARY IMPLEMENTATION |
| |
| namespace interceptor { |
| |
| static Command instantiate_command(const char* program, char* const argv[], char* const envp[]) { |
| Command result; |
| result.set_program(program); |
| result.set_current_directory(fs::current_path()); |
| |
| for (auto current_argument = argv; *current_argument; ++current_argument) { |
| result.add_arguments(*current_argument); |
| } |
| |
| for (auto current_env_var = envp; *current_env_var; ++current_env_var) { |
| const std::string s(*current_env_var); |
| const auto pos = s.find('='); |
| if (pos == std::string::npos) { |
| continue; |
| } |
| |
| (*result.mutable_environment_variables())[s.substr(0, pos)] = s.substr(pos + 1); |
| } |
| |
| return result; |
| } |
| |
| static std::optional<std::string> command_getenv(const Command& command, const char* key) { |
| const auto& env = command.environment_variables(); |
| if (const auto iter = env.find(key); iter != env.cend()) { |
| return {iter->second}; |
| } |
| return {}; |
| } |
| |
| static bool default_make_relative(Command* command) { |
| // determine the ROOT_DIR |
| std::string root_directory; |
| if (const auto root = command_getenv(*command, kEnvRootDirectory)) { |
| root_directory = *root; |
| if (root_directory[root_directory.size() - 1] != '/') { |
| root_directory += '/'; |
| } |
| } else { |
| return false; |
| } |
| |
| // determine the relative path to ROOT_DIR from the current working dir |
| std::string relative_root = fs::relative(root_directory); |
| if (relative_root[relative_root.size() - 1] != '/') { |
| relative_root += '/'; |
| } |
| if (relative_root == "./") { |
| relative_root.clear(); |
| } |
| |
| // TODO: This is generally bad as this means we can't make anything relative. |
| // This happens if the out dir is outside of the root. |
| if (relative_root.find(root_directory) != std::string::npos) { |
| return false; |
| } |
| |
| command->set_current_directory(fs::relative(command->current_directory(), root_directory)); |
| |
| // replacement functor |
| const auto replace_all = [&](auto& str) { |
| auto pos = std::string::npos; |
| while ((pos = str.find(root_directory)) != std::string::npos) { |
| str.replace(pos, root_directory.length(), relative_root); |
| } |
| }; |
| |
| // now go and replace everything |
| replace_all(*command->mutable_program()); |
| std::for_each(command->mutable_arguments()->begin(), command->mutable_arguments()->end(), replace_all); |
| |
| return true; |
| } |
| |
| struct InputsOutputs { |
| Inputs inputs; |
| Outputs outputs; |
| }; |
| |
| class Analyzer { |
| public: |
| static std::unique_ptr<Analyzer> get(const interceptor::Command&); |
| virtual ~Analyzer() = default; |
| |
| void set_inputs_outputs(Command* command) const; |
| virtual bool make_relative(Command*) const { return false; } |
| |
| protected: |
| virtual InputsOutputs determine_inputs_outputs(const Command&) const { return {}; } |
| }; |
| |
| void Analyzer::set_inputs_outputs(Command* command) const { |
| auto [inputs, outputs] = determine_inputs_outputs(*command); |
| |
| // TODO: this sanitizing should be done during make_relative |
| for (auto& input : inputs) { |
| if (input.rfind("./", 0) == 0) { |
| input = input.substr(2); |
| } |
| } |
| for (auto& output : outputs) { |
| if (output.rfind("./", 0) == 0) { |
| output = output.substr(2); |
| } |
| } |
| *command->mutable_inputs() = {inputs.begin(), inputs.end()}; |
| *command->mutable_outputs() = {outputs.begin(), outputs.end()}; |
| |
| for (const auto& input : command->inputs()) { |
| if (!fs::is_regular_file(input)) { |
| std::cerr << "missing input: " << input << "\n" << *command << "\n"; |
| exit(1); |
| } |
| } |
| } |
| |
| class CompileLinkerAnalyzer : public Analyzer { |
| InputsOutputs determine_inputs_outputs(const Command& command) const final { |
| static constexpr std::array kSkipNextArguments{ |
| "-isystem", "-I", "-L", "-m", "-soname", "-z", |
| }; |
| static constexpr std::string_view kOutputOption = "-Wp,-MMD,"; |
| |
| InputsOutputs result; |
| bool next_is_out = false; |
| bool skip_next = false; |
| // skip arguments[0] as this is the program itself |
| for (auto it = command.arguments().cbegin() + 1; it != command.arguments().cend(); ++it) { |
| const auto& argument = *it; |
| if (argument == "-o") { |
| next_is_out = true; |
| continue; |
| } |
| if (next_is_out) { |
| result.outputs.push_back(argument); |
| next_is_out = false; |
| continue; |
| } |
| if (argument.rfind(kOutputOption, 0) == 0) { |
| result.outputs.push_back(argument.substr(kOutputOption.size())); |
| } |
| if (skip_next) { |
| skip_next = false; |
| continue; |
| } |
| if (std::find(kSkipNextArguments.cbegin(), kSkipNextArguments.cend(), argument) != |
| kSkipNextArguments.cend()) { |
| skip_next = true; |
| } |
| // ignore test compilations |
| if (argument == "/dev/null" || argument == "-") { |
| return {}; |
| } |
| if (argument[0] == '-') { // ignore flags |
| continue; |
| } |
| result.inputs.push_back(argument); |
| } |
| |
| return result; |
| } |
| |
| bool make_relative(Command* command) const final { return default_make_relative(command); } |
| }; |
| |
| class ArchiverAnalyzer : public Analyzer { |
| InputsOutputs determine_inputs_outputs(const Command& command) const final { |
| InputsOutputs result; |
| |
| const auto& arguments = command.arguments(); |
| |
| if (arguments.size() < 3) { |
| return result; |
| } |
| // skip arguments[0] as this is the program itself |
| // skip arguments[1] are the archiver flags |
| // arguments[2] is the output |
| result.outputs.push_back(arguments[2]); |
| // arguments[3:] are the inputs |
| result.inputs.insert(result.inputs.cend(), arguments.cbegin() + 3, arguments.cend()); |
| return result; |
| } |
| |
| bool make_relative(Command* command) const final { return default_make_relative(command); } |
| }; |
| |
| static const std::initializer_list< |
| std::pair<std::regex, std::function<std::unique_ptr<Analyzer>()>>> |
| analyzers{ |
| { |
| std::regex("^(.*/)?(clang|clang\\+\\+|gcc|g\\+\\+|ld(\\.lld)?|llvm-strip)$"), |
| []() { return std::make_unique<CompileLinkerAnalyzer>(); }, |
| }, |
| { |
| std::regex("^(.*/)?(llvm-)?ar$"), |
| []() { return std::make_unique<ArchiverAnalyzer>(); }, |
| }, |
| }; |
| |
| std::unique_ptr<Analyzer> Analyzer::get(const Command& command) { |
| for (const auto& [regex, analyzer_factory] : analyzers) { |
| if (std::regex_match(command.arguments()[0], regex)) { |
| return analyzer_factory(); |
| } |
| } |
| return std::make_unique<Analyzer>(); |
| } |
| |
| } // namespace interceptor |
| |
| /// UTILITY FUNCTIONS |
| |
| static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[], |
| char* const envp[]) { |
| // First, try to find out whether we at all can handle this command. If not, |
| // simply return and fall back to the original handler. |
| |
| if (!fs::is_regular_file(filename)) { |
| return {}; |
| } |
| |
| // Ok, we can handle that one, let's transform it. |
| |
| auto command = interceptor::instantiate_command(filename, argv, envp); |
| |
| const auto analyzer = interceptor::Analyzer::get(command); |
| |
| bool transformed = false; |
| |
| // rewrite all command line arguments (including the program itself) to use |
| // paths relative to ROOT_DIR. This is essential for reproducible builds and |
| // furthermore necessary to produce cache hits in RBE. |
| if (command_getenv(command, kEnvMakeRelative)) { |
| transformed |= analyzer->make_relative(&command); |
| } |
| |
| analyzer->set_inputs_outputs(&command); |
| |
| log(command); |
| |
| if (transformed) { |
| return command; |
| } |
| return {}; |
| } |
| |
| static void log(const interceptor::Command& command) { |
| if (const auto log = command_getenv(command, kEnvCommandLog)) { |
| interceptor::Message message; |
| *message.mutable_command() = command; |
| message.mutable_command()->clear_environment_variables(); |
| |
| std::ostringstream os; |
| google::protobuf::util::SerializeDelimitedToOstream(message, &os); |
| |
| const auto& str = os.str(); |
| android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(log->c_str(), O_WRONLY | O_APPEND))); |
| if (!fd.ok()) { |
| std::cerr << "Could not open " << std::quoted(*log) << ": " << strerror(errno); |
| return; |
| } |
| |
| if (TEMP_FAILURE_RETRY(write(fd, str.data(), str.size())) < str.size()) { |
| std::cerr << "Could not write " << std::quoted(*log) << ": " << strerror(errno); |
| return; |
| } |
| } |
| } |
| |
| static std::vector<const char*> argv_vector(const interceptor::Command& command) { |
| std::vector<const char*> result; |
| result.reserve(command.arguments().size() + 1); |
| result[command.arguments().size()] = nullptr; |
| for (const auto& arg : command.arguments()) { |
| result.push_back(arg.c_str()); |
| } |
| return result; |
| } |
| |
| static int execute_execve(const interceptor::Command& command, char* const envp[]) { |
| // TODO: at this point, we could free some memory that is held in Command. |
| // While the arguments vector is reused for arguments, we could free |
| // the EnvMap and the original arguments. |
| |
| // does not actually return |
| return old_execve(command.program().c_str(), const_cast<char**>(argv_vector(command).data()), |
| envp); |
| } |