Merge "interceptor_analysis supports --relative option."
diff --git a/interceptor.cc b/interceptor.cc
index 6ad5790..e9efccc 100644
--- a/interceptor.cc
+++ b/interceptor.cc
@@ -18,6 +18,7 @@
 
 #include <dlfcn.h>
 #include <fcntl.h>
+#include <spawn.h>
 #include <unistd.h>
 
 #include <algorithm>
@@ -49,13 +50,17 @@
 // UTILITY function declarations
 
 // process applicable calls (i.e. programs that we might be able to handle)
-static void process_command(const char* filename, char* const argv[], char* const envp[]);
+static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[],
+                                                           char* const envp[],
+                                                           bool* should_recurse);
 
 // log command if logging is enabled
 static void log(const interceptor::Command&);
 
 // execute potentially modified command
-static void execute(const interceptor::Command&, char* const envp[]);
+using executor_t = std::function<int(const char* filename, char* const argv[], char* const envp[])>;
+static int execute(const char* filename, char* const argv[], char* const envp[],
+                   const executor_t& executor);
 
 // OVERLOADS for LD_PRELOAD USE
 
@@ -64,10 +69,24 @@
 
 extern "C" {
 int execve(const char* filename, char* const argv[], char* const envp[]) {
-  // pass on to process_command(), if unhandled, fall back to the original
-  // execve
-  process_command(filename, argv, envp);
-  return old_execve(filename, argv, envp);
+  return execute(filename, argv, envp,
+                 [&](const char* file, char* const arguments[], char* const environment[]) {
+                   return old_execve(file, arguments, environment);
+                 });
+}
+}  // extern "C"
+
+// Intercept posix_spawn calls, for that capture the original posix_spawn call
+static auto const old_posix_spawn =
+    reinterpret_cast<decltype(posix_spawn)*>(dlsym(RTLD_NEXT, "posix_spawn"));
+
+extern "C" {
+int posix_spawn(pid_t* pid, const char* filename, const posix_spawn_file_actions_t* file_actions,
+                const posix_spawnattr_t* attrp, char* const argv[], char* const envp[]) {
+  return execute(filename, argv, envp,
+                 [&](const char* file, char* const arguments[], char* const environment[]) {
+                   return old_posix_spawn(pid, file, file_actions, attrp, arguments, environment);
+                 });
 }
 }  // extern "C"
 
@@ -105,46 +124,45 @@
   return {};
 }
 
-static void default_make_relative(Command* command) {
+static bool default_make_relative(Command* command) {
   // determine the ROOT_DIR
-  std::string root_directory;
+  fs::path root_directory;
   if (const auto root = command_getenv(*command, kEnvRootDirectory)) {
-    root_directory = *root;
-    if (root_directory[root_directory.size() - 1] != '/') {
-      root_directory += '/';
-    }
+    root_directory = fs::path(*root) / "";
   } else {
-    return;
+    // there is no ROOT_DIR that we can use to make calls relative
+    return false;
   }
 
   // determine the relative path to ROOT_DIR from the current working dir
-  std::string relative_root = fs::relative(root_directory);
-  if (relative_root[relative_root.size() - 1] != '/') {
-    relative_root += '/';
-  }
+  auto relative_root = fs::relative(root_directory) / "";
   if (relative_root == "./") {
     relative_root.clear();
   }
 
   // TODO: This is generally bad as this means we can't make anything relative.
   // This happens if the out dir is outside of the root.
-  if (relative_root.find(root_directory) != std::string::npos) {
-    return;
+  if (relative_root.native().find(root_directory) != std::string::npos) {
+    return false;
   }
 
   command->set_current_directory(fs::relative(command->current_directory(), root_directory));
 
   // replacement functor
   const auto replace_all = [&](auto& str) {
+    size_t start = 0;
     auto pos = std::string::npos;
-    while ((pos = str.find(root_directory)) != std::string::npos) {
-      str.replace(pos, root_directory.length(), relative_root);
+    while ((pos = str.find(root_directory, start)) != std::string::npos) {
+      str.replace(pos, root_directory.native().length(), relative_root);
+      start = pos + relative_root.native().length();
     }
   };
 
   // now go and replace everything
   replace_all(*command->mutable_program());
   std::for_each(command->mutable_arguments()->begin(), command->mutable_arguments()->end(), replace_all);
+
+  return true;
 }
 
 struct InputsOutputs {
@@ -158,7 +176,8 @@
   virtual ~Analyzer() = default;
 
   void set_inputs_outputs(Command* command) const;
-  virtual void make_relative(Command*) const {}
+  virtual bool make_relative(Command*) const { return false; }
+  virtual bool should_recurse(Command*) const { return true; }
 
  protected:
   virtual InputsOutputs determine_inputs_outputs(const Command&) const { return {}; }
@@ -167,26 +186,15 @@
 void Analyzer::set_inputs_outputs(Command* command) const {
   auto [inputs, outputs] = determine_inputs_outputs(*command);
 
-  // TODO: this sanitizing should be done during make_relative
-  for (auto& input : inputs) {
-    if (input.rfind("./", 0) == 0) {
-      input = input.substr(2);
-    }
-  }
-  for (auto& output : outputs) {
-    if (output.rfind("./", 0) == 0) {
-      output = output.substr(2);
-    }
-  }
-  for (const auto& input : inputs) {
+  *command->mutable_inputs() = {inputs.begin(), inputs.end()};
+  *command->mutable_outputs() = {outputs.begin(), outputs.end()};
+
+  for (const auto& input : command->inputs()) {
     if (!fs::is_regular_file(input)) {
       std::cerr << "missing input: " << input << "\n" << *command << "\n";
       exit(1);
     }
   }
-
-  *command->mutable_inputs() = {inputs.begin(), inputs.end()};
-  *command->mutable_outputs() = {outputs.begin(), outputs.end()};
 }
 
 class CompileLinkerAnalyzer : public Analyzer {
@@ -235,7 +243,11 @@
     return result;
   }
 
-  void make_relative(Command* command) const final { default_make_relative(command); }
+  bool make_relative(Command* command) const final { return default_make_relative(command); }
+
+  // do not recurse the interceptor into the subprocesses of compilers/linkers;
+  // otherwise we will trace (and get confused by) ld.lld/cc1-plus and friends.
+  bool should_recurse(Command*) const final { return false; }
 };
 
 class ArchiverAnalyzer : public Analyzer {
@@ -256,7 +268,26 @@
     return result;
   }
 
-  void make_relative(Command* command) const final { default_make_relative(command); }
+  bool make_relative(Command* command) const final { return default_make_relative(command); }
+  bool should_recurse(Command*) const final { return false; }
+};
+
+class FixdepAnalyzer : public Analyzer {
+  InputsOutputs determine_inputs_outputs(const Command& command) const final {
+    InputsOutputs result;
+    const auto& arguments = command.arguments();
+
+    if (arguments.size() < 3) {
+      return result;
+    }
+
+    // fixdep reads the object file and the .d file and outputs to the .d file
+    result.outputs.push_back(arguments[1]);
+    result.inputs.push_back(arguments[1]);
+    result.inputs.push_back(arguments[2]);
+
+    return result;
+  };
 };
 
 static const std::initializer_list<
@@ -270,6 +301,10 @@
             std::regex("^(.*/)?(llvm-)?ar$"),
             []() { return std::make_unique<ArchiverAnalyzer>(); },
         },
+        {
+            std::regex("^scripts/basic/fixdep$"),
+            []() { return std::make_unique<FixdepAnalyzer>(); },
+        },
     };
 
 std::unique_ptr<Analyzer> Analyzer::get(const Command& command) {
@@ -285,12 +320,14 @@
 
 /// UTILITY FUNCTIONS
 
-static void process_command(const char* filename, char* const argv[], char* const envp[]) {
+static std::optional<interceptor::Command> process_command(const char* filename, char* const argv[],
+                                                           char* const envp[],
+                                                           bool* should_recurse) {
   // First, try to find out whether we at all can handle this command. If not,
   // simply return and fall back to the original handler.
 
   if (!fs::is_regular_file(filename)) {
-    return;
+    return {};
   }
 
   // Ok, we can handle that one, let's transform it.
@@ -299,19 +336,24 @@
 
   const auto analyzer = interceptor::Analyzer::get(command);
 
+  bool transformed = false;
+
   // rewrite all command line arguments (including the program itself) to use
   // paths relative to ROOT_DIR. This is essential for reproducible builds and
   // furthermore necessary to produce cache hits in RBE.
   if (command_getenv(command, kEnvMakeRelative)) {
-    analyzer->make_relative(&command);
+    transformed |= analyzer->make_relative(&command);
   }
 
   analyzer->set_inputs_outputs(&command);
+  *should_recurse = analyzer->should_recurse(&command);
 
   log(command);
 
-  // pass down the transformed command to execve
-  execute(command, envp);
+  if (transformed) {
+    return command;
+  }
+  return {};
 }
 
 static void log(const interceptor::Command& command) {
@@ -337,17 +379,40 @@
   }
 }
 
-static void execute(const interceptor::Command& command, char* const envp[]) {
-  std::vector<const char*> c_arguments;
-  c_arguments.reserve(command.arguments().size() + 1);
-  c_arguments[command.arguments().size()] = nullptr;
+static std::vector<const char*> argv_vector(const interceptor::Command& command) {
+  std::vector<const char*> result;
+  result.reserve(command.arguments().size() + 1);
+  result[command.arguments().size()] = nullptr;
   for (const auto& arg : command.arguments()) {
-    c_arguments.push_back(arg.data());
+    result.push_back(arg.c_str());
   }
-  // TODO: at this point, we could free some memory that is held in Command.
-  //       While the arguments vector is reused for arguments, we could free
-  //       the EnvMap and the original arguments.
+  return result;
+}
 
-  // does not return
-  old_execve(command.program().c_str(), const_cast<char**>(c_arguments.data()), envp);
+static void disable_ld_preload(char* const envp[]) {
+  for (auto current_env_var = envp; *current_env_var; ++current_env_var) {
+    const std::string_view s(*current_env_var);
+    if (s.rfind("LD_PRELOAD=", 0) == 0) {
+      *current_env_var[0] = '\0';
+    }
+  }
+}
+
+int execute(const char* filename, char* const argv[], char* const envp[],
+            const executor_t& executor) {
+  // pass on to process_command(), if unhandled, fall back to the original executor
+  bool should_recurse = true;
+  auto command = process_command(filename, argv, envp, &should_recurse);
+
+  if (!should_recurse) {
+    disable_ld_preload(envp);
+  }
+
+  if (command.has_value()) {
+    // pass down the transformed command to the executor
+    return executor(command->program().c_str(), const_cast<char**>(argv_vector(*command).data()),
+                    envp);
+  }
+  // else fall back to the original call
+  return executor(filename, argv, envp);
 }