Merge "interceptor_analysis supports --relative option."
diff --git a/interceptor.cc b/interceptor.cc
index 6ad5790..e9efccc 100644
--- a/interceptor.cc
+++ b/interceptor.cc
@@ -18,6 +18,7 @@
#include <dlfcn.h>
#include <fcntl.h>
+#include <spawn.h>
#include <unistd.h>
#include <algorithm>
@@ -49,13 +50,17 @@
// UTILITY function declarations
// process applicable calls (i.e. programs that we might be able to handle)
-static void process_command(const char* filename, char* const argv[], char* const envp[]);
+static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[],
+ char* const envp[],
+ bool* should_recurse);
// log command if logging is enabled
static void log(const interceptor::Command&);
// execute potentially modified command
-static void execute(const interceptor::Command&, char* const envp[]);
+using executor_t = std::function<int(const char* filename, char* const argv[], char* const envp[])>;
+static int execute(const char* filename, char* const argv[], char* const envp[],
+ const executor_t& executor);
// OVERLOADS for LD_PRELOAD USE
@@ -64,10 +69,24 @@
extern "C" {
int execve(const char* filename, char* const argv[], char* const envp[]) {
- // pass on to process_command(), if unhandled, fall back to the original
- // execve
- process_command(filename, argv, envp);
- return old_execve(filename, argv, envp);
+ return execute(filename, argv, envp,
+ [&](const char* file, char* const arguments[], char* const environment[]) {
+ return old_execve(file, arguments, environment);
+ });
+}
+} // extern "C"
+
+// Intercept posix_spawn calls, for that capture the original posix_spawn call
+static auto const old_posix_spawn =
+ reinterpret_cast<decltype(posix_spawn)*>(dlsym(RTLD_NEXT, "posix_spawn"));
+
+extern "C" {
+int posix_spawn(pid_t* pid, const char* filename, const posix_spawn_file_actions_t* file_actions,
+ const posix_spawnattr_t* attrp, char* const argv[], char* const envp[]) {
+ return execute(filename, argv, envp,
+ [&](const char* file, char* const arguments[], char* const environment[]) {
+ return old_posix_spawn(pid, file, file_actions, attrp, arguments, environment);
+ });
}
} // extern "C"
@@ -105,46 +124,45 @@
return {};
}
-static void default_make_relative(Command* command) {
+static bool default_make_relative(Command* command) {
// determine the ROOT_DIR
- std::string root_directory;
+ fs::path root_directory;
if (const auto root = command_getenv(*command, kEnvRootDirectory)) {
- root_directory = *root;
- if (root_directory[root_directory.size() - 1] != '/') {
- root_directory += '/';
- }
+ root_directory = fs::path(*root) / "";
} else {
- return;
+ // there is no ROOT_DIR that we can use to make calls relative
+ return false;
}
// determine the relative path to ROOT_DIR from the current working dir
- std::string relative_root = fs::relative(root_directory);
- if (relative_root[relative_root.size() - 1] != '/') {
- relative_root += '/';
- }
+ auto relative_root = fs::relative(root_directory) / "";
if (relative_root == "./") {
relative_root.clear();
}
// TODO: This is generally bad as this means we can't make anything relative.
// This happens if the out dir is outside of the root.
- if (relative_root.find(root_directory) != std::string::npos) {
- return;
+ if (relative_root.native().find(root_directory) != std::string::npos) {
+ return false;
}
command->set_current_directory(fs::relative(command->current_directory(), root_directory));
// replacement functor
const auto replace_all = [&](auto& str) {
+ size_t start = 0;
auto pos = std::string::npos;
- while ((pos = str.find(root_directory)) != std::string::npos) {
- str.replace(pos, root_directory.length(), relative_root);
+ while ((pos = str.find(root_directory, start)) != std::string::npos) {
+ str.replace(pos, root_directory.native().length(), relative_root);
+ start = pos + relative_root.native().length();
}
};
// now go and replace everything
replace_all(*command->mutable_program());
std::for_each(command->mutable_arguments()->begin(), command->mutable_arguments()->end(), replace_all);
+
+ return true;
}
struct InputsOutputs {
@@ -158,7 +176,8 @@
virtual ~Analyzer() = default;
void set_inputs_outputs(Command* command) const;
- virtual void make_relative(Command*) const {}
+ virtual bool make_relative(Command*) const { return false; }
+ virtual bool should_recurse(Command*) const { return true; }
protected:
virtual InputsOutputs determine_inputs_outputs(const Command&) const { return {}; }
@@ -167,26 +186,15 @@
void Analyzer::set_inputs_outputs(Command* command) const {
auto [inputs, outputs] = determine_inputs_outputs(*command);
- // TODO: this sanitizing should be done during make_relative
- for (auto& input : inputs) {
- if (input.rfind("./", 0) == 0) {
- input = input.substr(2);
- }
- }
- for (auto& output : outputs) {
- if (output.rfind("./", 0) == 0) {
- output = output.substr(2);
- }
- }
- for (const auto& input : inputs) {
+ *command->mutable_inputs() = {inputs.begin(), inputs.end()};
+ *command->mutable_outputs() = {outputs.begin(), outputs.end()};
+
+ for (const auto& input : command->inputs()) {
if (!fs::is_regular_file(input)) {
std::cerr << "missing input: " << input << "\n" << *command << "\n";
exit(1);
}
}
-
- *command->mutable_inputs() = {inputs.begin(), inputs.end()};
- *command->mutable_outputs() = {outputs.begin(), outputs.end()};
}
class CompileLinkerAnalyzer : public Analyzer {
@@ -235,7 +243,11 @@
return result;
}
- void make_relative(Command* command) const final { default_make_relative(command); }
+ bool make_relative(Command* command) const final { return default_make_relative(command); }
+
+ // do not recurse the interceptor into the subprocesses of compilers/linkers;
+ // otherwise we will trace (and get confused by) ld.lld/cc1-plus and friends.
+ bool should_recurse(Command*) const final { return false; }
};
class ArchiverAnalyzer : public Analyzer {
@@ -256,7 +268,26 @@
return result;
}
- void make_relative(Command* command) const final { default_make_relative(command); }
+ bool make_relative(Command* command) const final { return default_make_relative(command); }
+ bool should_recurse(Command*) const final { return false; }
+};
+
+class FixdepAnalyzer : public Analyzer {
+ InputsOutputs determine_inputs_outputs(const Command& command) const final {
+ InputsOutputs result;
+ const auto& arguments = command.arguments();
+
+ if (arguments.size() < 3) {
+ return result;
+ }
+
+ // fixdep reads the object file and the .d file and outputs to the .d file
+ result.outputs.push_back(arguments[1]);
+ result.inputs.push_back(arguments[1]);
+ result.inputs.push_back(arguments[2]);
+
+ return result;
+ };
};
static const std::initializer_list<
@@ -270,6 +301,10 @@
std::regex("^(.*/)?(llvm-)?ar$"),
[]() { return std::make_unique<ArchiverAnalyzer>(); },
},
+ {
+ std::regex("^scripts/basic/fixdep$"),
+ []() { return std::make_unique<FixdepAnalyzer>(); },
+ },
};
std::unique_ptr<Analyzer> Analyzer::get(const Command& command) {
@@ -285,12 +320,14 @@
/// UTILITY FUNCTIONS
-static void process_command(const char* filename, char* const argv[], char* const envp[]) {
+static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[],
+ char* const envp[],
+ bool* should_recurse) {
// First, try to find out whether we at all can handle this command. If not,
// simply return and fall back to the original handler.
if (!fs::is_regular_file(filename)) {
- return;
+ return {};
}
// Ok, we can handle that one, let's transform it.
@@ -299,19 +336,24 @@
const auto analyzer = interceptor::Analyzer::get(command);
+ bool transformed = false;
+
// rewrite all command line arguments (including the program itself) to use
// paths relative to ROOT_DIR. This is essential for reproducible builds and
// furthermore necessary to produce cache hits in RBE.
if (command_getenv(command, kEnvMakeRelative)) {
- analyzer->make_relative(&command);
+ transformed |= analyzer->make_relative(&command);
}
analyzer->set_inputs_outputs(&command);
+ *should_recurse = analyzer->should_recurse(&command);
log(command);
- // pass down the transformed command to execve
- execute(command, envp);
+ if (transformed) {
+ return command;
+ }
+ return {};
}
static void log(const interceptor::Command& command) {
@@ -337,17 +379,40 @@
}
}
-static void execute(const interceptor::Command& command, char* const envp[]) {
- std::vector<const char*> c_arguments;
- c_arguments.reserve(command.arguments().size() + 1);
- c_arguments[command.arguments().size()] = nullptr;
+static std::vector<const char*> argv_vector(const interceptor::Command& command) {
+ std::vector<const char*> result;
+ result.reserve(command.arguments().size() + 1);
+ result[command.arguments().size()] = nullptr;
for (const auto& arg : command.arguments()) {
- c_arguments.push_back(arg.data());
+ result.push_back(arg.c_str());
}
- // TODO: at this point, we could free some memory that is held in Command.
- // While the arguments vector is reused for arguments, we could free
- // the EnvMap and the original arguments.
+ return result;
+}
- // does not return
- old_execve(command.program().c_str(), const_cast<char**>(c_arguments.data()), envp);
+static void disable_ld_preload(char* const envp[]) {
+ for (auto current_env_var = envp; *current_env_var; ++current_env_var) {
+ const std::string_view s(*current_env_var);
+ if (s.rfind("LD_PRELOAD=", 0) == 0) {
+ *current_env_var[0] = '\0';
+ }
+ }
+}
+
+int execute(const char* filename, char* const argv[], char* const envp[],
+ const executor_t& executor) {
+ // pass on to process_command(), if unhandled, fall back to the original executor
+ bool should_recurse = true;
+ auto command = process_command(filename, argv, envp, &should_recurse);
+
+ if (!should_recurse) {
+ disable_ld_preload(envp);
+ }
+
+ if (command.has_value()) {
+ // pass down the transformed command to the executor
+ return executor(command->program().c_str(), const_cast<char**>(argv_vector(*command).data()),
+ envp);
+ }
+ // else fall back to the original call
+ return executor(filename, argv, envp);
}