[Environment Variable][1/N] Use thread-safe env variable API in c10 (#119449)

This PR is the beginning of attempts to wrap thread-unsafe getenv and set_env functions inside a RW mutex.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/119449
Approved by: https://github.com/albanD
diff --git a/c10/core/impl/alloc_cpu.cpp b/c10/core/impl/alloc_cpu.cpp
index 9b7ae22..def4c3a 100644
--- a/c10/core/impl/alloc_cpu.cpp
+++ b/c10/core/impl/alloc_cpu.cpp
@@ -3,6 +3,7 @@
 #include <c10/core/alignment.h>
 #include <c10/util/Flags.h>
 #include <c10/util/Logging.h>
+#include <c10/util/env.h>
 #include <c10/util/irange.h>
 #include <c10/util/numa.h>
 
@@ -53,8 +54,8 @@
 #if defined(__linux__) && !defined(__ANDROID__)
 static inline bool is_thp_alloc_enabled() {
   static bool value = [&] {
-    const char* ptr = std::getenv("THP_MEM_ALLOC_ENABLE");
-    return ptr != nullptr ? std::atoi(ptr) : 0;
+    auto env = c10::utils::check_env("THP_MEM_ALLOC_ENABLE");
+    return env.has_value() ? env.value() : 0;
   }();
   return value;
 }
diff --git a/c10/cuda/CUDAAllocatorConfig.cpp b/c10/cuda/CUDAAllocatorConfig.cpp
index 1f81ed4..ca38dfd 100644
--- a/c10/cuda/CUDAAllocatorConfig.cpp
+++ b/c10/cuda/CUDAAllocatorConfig.cpp
@@ -234,7 +234,7 @@
   return i;
 }
 
-void CUDAAllocatorConfig::parseArgs(const char* env) {
+void CUDAAllocatorConfig::parseArgs(const std::optional<std::string>& env) {
   // If empty, set the default values
   m_max_split_size = std::numeric_limits<size_t>::max();
   m_roundup_power2_divisions.assign(kRoundUpPowerOfTwoIntervals, 0);
@@ -242,16 +242,16 @@
   bool used_cudaMallocAsync = false;
   bool used_native_specific_option = false;
 
-  if (env == nullptr) {
+  if (!env.has_value()) {
     return;
   }
   {
     std::lock_guard<std::mutex> lock(m_last_allocator_settings_mutex);
-    m_last_allocator_settings = env;
+    m_last_allocator_settings = env.value();
   }
 
   std::vector<std::string> config;
-  lexArgs(env, config);
+  lexArgs(env.value().c_str(), config);
 
   for (size_t i = 0; i < config.size(); i++) {
     std::string_view config_item_view(config[i]);
diff --git a/c10/cuda/CUDAAllocatorConfig.h b/c10/cuda/CUDAAllocatorConfig.h
index 3106fc1..db5c9e1 100644
--- a/c10/cuda/CUDAAllocatorConfig.h
+++ b/c10/cuda/CUDAAllocatorConfig.h
@@ -2,6 +2,7 @@
 
 #include <c10/cuda/CUDAMacros.h>
 #include <c10/util/Exception.h>
+#include <c10/util/env.h>
 
 #include <atomic>
 #include <cstddef>
@@ -72,14 +73,13 @@
   static CUDAAllocatorConfig& instance() {
     static CUDAAllocatorConfig* s_instance = ([]() {
       auto inst = new CUDAAllocatorConfig();
-      const char* env = getenv("PYTORCH_CUDA_ALLOC_CONF");
-      inst->parseArgs(env);
+      inst->parseArgs(c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF"));
       return inst;
     })();
     return *s_instance;
   }
 
-  void parseArgs(const char* env);
+  void parseArgs(const std::optional<std::string>& env);
 
  private:
   CUDAAllocatorConfig();
diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp
index c472e82..afac527 100644
--- a/c10/cuda/CUDACachingAllocator.cpp
+++ b/c10/cuda/CUDACachingAllocator.cpp
@@ -8,6 +8,7 @@
 #include <c10/util/CallOnce.h>
 #include <c10/util/ScopeExit.h>
 #include <c10/util/UniqueVoidPtr.h>
+#include <c10/util/env.h>
 #include <c10/util/flat_hash_map.h>
 #include <c10/util/hash.h>
 #include <c10/util/irange.h>
@@ -2831,7 +2832,7 @@
 // errors, since the caching allocator foils cuda-memcheck.
 bool forceUncachedAllocator() {
   static bool force_uncached =
-      getenv("PYTORCH_NO_CUDA_MEMORY_CACHING") != nullptr;
+      c10::utils::has_env("PYTORCH_NO_CUDA_MEMORY_CACHING");
   return force_uncached;
 }
 
@@ -3363,9 +3364,9 @@
   // version checks, to CUDAAllocatorConfig's runtime doublecheck. If this
   // works, maybe we should move all of CUDAAllocatorConfig here?
   CUDAAllocator* parseEnvForBackend() {
-    const char* val = getenv("PYTORCH_CUDA_ALLOC_CONF");
-    if (val != nullptr) {
-      const std::string config(val);
+    const auto val = c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF");
+    if (val.has_value()) {
+      const std::string& config = val.value();
 
       std::regex exp("[\\s,]+");
       std::sregex_token_iterator it(config.begin(), config.end(), exp, -1);
diff --git a/c10/cuda/CUDADeviceAssertionHost.cpp b/c10/cuda/CUDADeviceAssertionHost.cpp
index 1d52af7..ec41e62 100644
--- a/c10/cuda/CUDADeviceAssertionHost.cpp
+++ b/c10/cuda/CUDADeviceAssertionHost.cpp
@@ -3,6 +3,7 @@
 #include <c10/cuda/CUDAFunctions.h>
 #include <c10/util/Backtrace.h>
 #include <c10/util/Exception.h>
+#include <c10/util/env.h>
 #include <c10/util/irange.h>
 #include <cuda_runtime.h>
 
@@ -80,8 +81,8 @@
 }
 
 bool env_flag_set(const char* env_var_name) {
-  const char* const env_string = std::getenv(env_var_name);
-  return (env_string == nullptr) ? false : std::strcmp(env_string, "0");
+  const auto env_flag = c10::utils::check_env(env_var_name);
+  return env_flag.has_value() && env_flag.value();
 }
 
 /// Deleter for UVM/managed memory pointers
diff --git a/c10/cuda/CUDAMiscFunctions.cpp b/c10/cuda/CUDAMiscFunctions.cpp
index 11ea775..9ef7248 100644
--- a/c10/cuda/CUDAMiscFunctions.cpp
+++ b/c10/cuda/CUDAMiscFunctions.cpp
@@ -1,12 +1,14 @@
 #include <c10/cuda/CUDAMiscFunctions.h>
-#include <cstdlib>
+#include <c10/util/env.h>
 
 namespace c10::cuda {
 
+// NOLINTNEXTLINE(bugprone-exception-escape,-warnings-as-errors)
 const char* get_cuda_check_suffix() noexcept {
-  static char* device_blocking_flag = getenv("CUDA_LAUNCH_BLOCKING");
+  static auto device_blocking_flag =
+      c10::utils::check_env("CUDA_LAUNCH_BLOCKING");
   static bool blocking_enabled =
-      (device_blocking_flag && atoi(device_blocking_flag));
+      (device_blocking_flag.has_value() && device_blocking_flag.value());
   if (blocking_enabled) {
     return "";
   } else {
diff --git a/c10/test/util/DeadlockDetection_test.cpp b/c10/test/util/DeadlockDetection_test.cpp
index 35c4953..05ae154 100644
--- a/c10/test/util/DeadlockDetection_test.cpp
+++ b/c10/test/util/DeadlockDetection_test.cpp
@@ -1,9 +1,8 @@
 #include <c10/util/DeadlockDetection.h>
+#include <c10/util/env.h>
 
 #include <gtest/gtest.h>
 
-#include <cstdlib>
-
 using namespace ::testing;
 using namespace c10::impl;
 
@@ -23,7 +22,7 @@
 
 #ifndef _WIN32
 TEST(DeadlockDetection, disable) {
-  setenv("TORCH_DISABLE_DEADLOCK_DETECTION", "1", 1);
+  c10::utils::set_env("TORCH_DISABLE_DEADLOCK_DETECTION", "1");
   DummyPythonGILHooks hooks;
   SetPythonGILHooks(&hooks);
   SetPythonGILHooks(&hooks);
diff --git a/c10/util/DeadlockDetection.cpp b/c10/util/DeadlockDetection.cpp
index 320fa78..4b00d24 100644
--- a/c10/util/DeadlockDetection.cpp
+++ b/c10/util/DeadlockDetection.cpp
@@ -1,6 +1,5 @@
 #include <c10/util/DeadlockDetection.h>
-
-#include <cstdlib>
+#include <c10/util/env.h>
 
 namespace c10::impl {
 
@@ -8,7 +7,7 @@
 PythonGILHooks* python_gil_hooks = nullptr;
 
 bool disable_detection() {
-  return std::getenv("TORCH_DISABLE_DEADLOCK_DETECTION") != nullptr;
+  return c10::utils::has_env("TORCH_DISABLE_DEADLOCK_DETECTION");
 }
 } // namespace
 
diff --git a/c10/util/Logging.cpp b/c10/util/Logging.cpp
index e9c9e9c..17459f6 100644
--- a/c10/util/Logging.cpp
+++ b/c10/util/Logging.cpp
@@ -1,6 +1,7 @@
 #include <c10/util/Backtrace.h>
 #include <c10/util/Flags.h>
 #include <c10/util/Logging.h>
+#include <c10/util/env.h>
 #ifdef FBCODE_CAFFE2
 #include <folly/synchronization/SanitizeThread.h>
 #endif
@@ -10,7 +11,6 @@
 #endif
 
 #include <algorithm>
-#include <cstdlib>
 #include <iostream>
 
 // Common code that we use regardless of whether we use glog or not.
@@ -94,8 +94,8 @@
 
 namespace {
 bool IsAPIUsageDebugMode() {
-  const char* val = getenv("PYTORCH_API_USAGE_STDERR");
-  return val && *val; // any non-empty value
+  auto val = c10::utils::get_env("PYTORCH_API_USAGE_STDERR");
+  return val.has_value() && !val.value().empty(); // any non-empty value
 }
 
 void APIUsageDebug(const string& event) {
@@ -438,10 +438,10 @@
 namespace {
 
 void setLogLevelFlagFromEnv() {
-  const char* level_str = std::getenv("TORCH_CPP_LOG_LEVEL");
+  auto level_env = c10::utils::get_env("TORCH_CPP_LOG_LEVEL");
 
   // Not set, fallback to the default level (i.e. WARNING).
-  std::string level{level_str != nullptr ? level_str : ""};
+  std::string level{level_env.has_value() ? level_env.value() : ""};
   if (level.empty()) {
     return;
   }
diff --git a/c10/util/env.cpp b/c10/util/env.cpp
new file mode 100644
index 0000000..865c6b9
--- /dev/null
+++ b/c10/util/env.cpp
@@ -0,0 +1,108 @@
+#include <c10/util/Exception.h>
+#include <c10/util/env.h>
+#include <fmt/format.h>
+#include <cstdlib>
+#include <shared_mutex>
+
+namespace c10::utils {
+
+static std::shared_mutex env_mutex;
+
+// Set an environment variable.
+void set_env(const char* name, const char* value, bool overwrite) {
+  std::lock_guard lk(env_mutex);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+#ifdef _MSC_VER
+  if (!overwrite) {
+    // NOLINTNEXTLINE(concurrency-mt-unsafe)
+    if (std::getenv(name) != nullptr) {
+      return;
+    }
+  }
+  auto full_env_variable = fmt::format("{}={}", name, value);
+  // NOLINTNEXTLINE(concurrency-mt-unsafe)
+  auto err = putenv(full_env_variable.c_str());
+  TORCH_INTERNAL_ASSERT(
+      err == 0,
+      "putenv failed for environment \"",
+      name,
+      "\", the error is: ",
+      err);
+#else
+  // NOLINTNEXTLINE(concurrency-mt-unsafe)
+  auto err = setenv(name, value, static_cast<int>(overwrite));
+  TORCH_INTERNAL_ASSERT(
+      err == 0,
+      "setenv failed for environment \"",
+      name,
+      "\", the error is: ",
+      err);
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+  return;
+}
+
+// Checks an environment variable is set.
+bool has_env(const char* name) noexcept {
+  std::shared_lock lk(env_mutex);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+  // NOLINTNEXTLINE(concurrency-mt-unsafe)
+  auto envar = std::getenv(name);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+  return envar != nullptr;
+}
+
+// Reads an environment variable and returns the content if it is set
+std::optional<std::string> get_env(const char* name) noexcept {
+  std::shared_lock lk(env_mutex);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+  // NOLINTNEXTLINE(concurrency-mt-unsafe)
+  auto envar = std::getenv(name);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+  if (envar != nullptr) {
+    return std::string(envar);
+  }
+  return std::nullopt;
+}
+
+// Reads an environment variable and returns
+// - optional<true>,              if set equal to "1"
+// - optional<false>,             if set equal to "0"
+// - nullopt,   otherwise
+//
+// NB:
+// Issues a warning if the value of the environment variable is not 0 or 1.
+std::optional<bool> check_env(const char* name) {
+  auto env_opt = get_env(name);
+  if (env_opt.has_value()) {
+    if (*env_opt == "0") {
+      return false;
+    }
+    if (*env_opt == "1") {
+      return true;
+    }
+    TORCH_WARN(
+        "Ignoring invalid value for boolean flag ",
+        name,
+        ": ",
+        *env_opt,
+        "valid values are 0 or 1.");
+  }
+  return std::nullopt;
+}
+} // namespace c10::utils
diff --git a/c10/util/env.h b/c10/util/env.h
index 3db116c..04b7585 100644
--- a/c10/util/env.h
+++ b/c10/util/env.h
@@ -1,11 +1,20 @@
 #pragma once
 
-#include <c10/util/Exception.h>
-#include <cstdlib>
-#include <cstring>
+#include <c10/macros/Export.h>
 #include <optional>
+#include <string>
 
 namespace c10::utils {
+
+// Set an environment variable.
+C10_API void set_env(
+    const char* name,
+    const char* value,
+    bool overwrite = true);
+
+// Checks an environment variable is set.
+C10_API bool has_env(const char* name) noexcept;
+
 // Reads an environment variable and returns
 // - optional<true>,              if set equal to "1"
 // - optional<false>,             if set equal to "0"
@@ -13,29 +22,10 @@
 //
 // NB:
 // Issues a warning if the value of the environment variable is not 0 or 1.
-inline std::optional<bool> check_env(const char* name) {
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
-  auto envar = std::getenv(name);
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-  if (envar) {
-    if (strcmp(envar, "0") == 0) {
-      return false;
-    }
-    if (strcmp(envar, "1") == 0) {
-      return true;
-    }
-    TORCH_WARN(
-        "Ignoring invalid value for boolean flag ",
-        name,
-        ": ",
-        envar,
-        "valid values are 0 or 1.");
-  }
-  return std::nullopt;
-}
+C10_API std::optional<bool> check_env(const char* name);
+
+// Reads the value of an environment variable if it is set.
+// However, check_env should be used if the value is assumed to be a flag.
+C10_API std::optional<std::string> get_env(const char* name) noexcept;
+
 } // namespace c10::utils
diff --git a/c10/util/tempfile.cpp b/c10/util/tempfile.cpp
index 28c3c7f..f106885 100644
--- a/c10/util/tempfile.cpp
+++ b/c10/util/tempfile.cpp
@@ -1,4 +1,5 @@
 #include <c10/util/Exception.h>
+#include <c10/util/env.h>
 #include <c10/util/tempfile.h>
 #include <fmt/format.h>
 
@@ -22,10 +23,11 @@
   // We see if any of these environment variables is set and use their value, or
   // else default the temporary directory to `/tmp`.
 
-  const char* tmp_directory = "/tmp";
+  std::string tmp_directory = "/tmp";
   for (const char* variable : {"TMPDIR", "TMP", "TEMP", "TEMPDIR"}) {
-    if (const char* path = getenv(variable)) {
-      tmp_directory = path;
+    auto path_opt = c10::utils::get_env(variable);
+    if (path_opt.has_value()) {
+      tmp_directory = path_opt.value();
       break;
     }
   }