[LT] Upstream backend interfaces (#67927)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/67927

BackendData - represents 'tensor data' in opaque backend storage
LoweringContext - interface for performing backend-specific IR lowering
BackendImplInterface - interface for lazy tensors backends to implement

Reorgs backend-related files into lazy/backend subdir

includes a few small fixes, which were made on lazy_tensor_staging but need to be back-ported to master.

Test Plan: used by lazy_tensor_staging branch

Reviewed By: desertfire

Differential Revision: D32142032

fbshipit-source-id: 828c717bcd0d511876e64ad209b50f7bfb10cec5
diff --git a/test/cpp/lazy/test_ir_util.cpp b/test/cpp/lazy/test_ir_util.cpp
index 836540b..5c21625 100644
--- a/test/cpp/lazy/test_ir_util.cpp
+++ b/test/cpp/lazy/test_ir_util.cpp
@@ -12,7 +12,7 @@
 class IrUtilNode : public Node {
  public:
   explicit IrUtilNode()
-      : Node(OpKind(), /* num_outputs */ 1, /* hash_seed */ Hash("")) {}
+      : Node(OpKind(), /* num_outputs */ 1, /* hash_seed */ Hash(0)) {}
   ~IrUtilNode() override = default;
 
   void AddOperand(Value v) {
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
index e9f3178..e5339bf 100644
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -361,6 +361,7 @@
 
 lazy_tensor_core_sources = [
     "torch/csrc/lazy/backend/backend_device.cpp",
+    "torch/csrc/lazy/backend/lowering_context.cpp",
     "torch/csrc/lazy/core/config.cpp",
     "torch/csrc/lazy/core/hash.cpp",
     "torch/csrc/lazy/core/ir.cpp",
diff --git a/torch/csrc/lazy/backend/backend_data.h b/torch/csrc/lazy/backend/backend_data.h
new file mode 100644
index 0000000..15efd7a
--- /dev/null
+++ b/torch/csrc/lazy/backend/backend_data.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <cstring>
+#include <torch/csrc/lazy/core/shape.h>
+#include <torch/csrc/lazy/backend/backend_device.h>
+
+namespace torch {
+namespace lazy {
+
+class TORCH_API BackendData {
+ public:
+  struct Info {
+    /**
+     * Used by Lazy Graph Executor to tag info on BackendData objs
+     * */
+    virtual ~Info() = default;
+  };
+  /**
+   * Represents (Tensor) data stored on a backend device
+   * in its native format.
+   * */
+  using Handle = int64_t;
+
+  BackendData(BackendDevice device, Shape shape)
+      : device_(std::move(device)), shape_(std::move(shape)) {}
+
+  virtual ~BackendData() = default;
+
+  const BackendDevice& device() const {
+    return device_;
+  }
+
+  const Shape& shape() const {
+    return shape_;
+  }
+
+  Info* info() const {
+    return info_.get();
+  }
+
+  std::shared_ptr<Info> SetInfo(std::shared_ptr<Info> info) {
+    std::swap(info, info_);
+    return info;
+  }
+
+  virtual Handle GetHandle() = 0;
+
+  virtual void Assign(const BackendData& data) = 0;
+
+  virtual bool HasValue() const = 0;
+
+ private:
+  BackendDevice device_;
+  Shape shape_;
+  std::shared_ptr<Info> info_;
+};
+
+using BackendDataPtr = std::shared_ptr<BackendData>;
+
+} // namespace lazy
+} // namespace torch
diff --git a/torch/csrc/lazy/backend/backend_interface.h b/torch/csrc/lazy/backend/backend_interface.h
new file mode 100644
index 0000000..577ffb3
--- /dev/null
+++ b/torch/csrc/lazy/backend/backend_interface.h
@@ -0,0 +1,129 @@
+#pragma once
+
+#include <atomic>
+#include <ATen/Tensor.h>
+#include <torch/csrc/lazy/backend/backend_data.h>
+#include <torch/csrc/lazy/backend/backend_device.h>
+#include <torch/csrc/lazy/backend/lowering_context.h>
+#include <torch/csrc/lazy/core/shape.h>
+
+namespace torch {
+namespace lazy {
+
+/**
+ * Work in progress- don't treat this as a stable interface yet!
+ */
+class TORCH_API BackendImplInterface {
+ public:
+  /**
+   * Initialization/Teardown
+   * */
+  // No-op by default. Allows custom functionality to be exposed through
+  // extension bindings.
+  virtual void InitializeAtenBindings() const {}
+
+  virtual void PrepareToExit() const = 0;
+
+  /**
+   * Configuration
+   * */
+
+  virtual void SetRngSeed(size_t seed) const = 0;
+
+  /**
+   * Data Transfer
+   * */
+
+  virtual BackendDataPtr MakeComputationDataFromTensor(
+      const at::Tensor& tensor, const Shape& shape,
+      const BackendDevice& device) const = 0;
+
+  virtual BackendDataPtr CreateDataPlaceholder(
+      const BackendDevice& device, const Shape& shape) const = 0;
+
+  virtual at::Tensor MakeTensorFromComputationData(
+      const BackendDataPtr data,
+      c10::optional<at::ScalarType> logical_scalar_type) const = 0;
+
+  /**
+   * Lowering, Compilation, Execution
+   * */
+
+  virtual std::unique_ptr<LoweringContext> CreateLoweringContext(
+      const std::string& name, BackendDevice device,
+      c10::ArrayRef<torch::lazy::Node*> post_order,
+      Util::EmissionMap emit_status) const = 0;
+
+  virtual std::unique_ptr<LoweringContext> CreateLoweringContext(
+      const std::string& name, BackendDevice device) const = 0;
+
+  // TODO(whc) need to keep this?
+  virtual std::vector<std::string> GetCompilationDevices(
+      const std::string& device, c10::ArrayRef<std::string> devices) const = 0;
+
+  virtual std::vector<ComputationPtr> Compile(
+      std::vector<ComputationPtr> instances) const = 0;
+
+  virtual std::vector<BackendDataPtr> ExecuteComputation(
+      Computation& computation, c10::ArrayRef<BackendDataPtr> arguments,
+      const BackendDevice& device) const = 0;
+
+  /**
+   * Device Configuration
+   * */
+
+  // Set or get the default device type.
+  // For backends used with virtual c10:: Devices, this configures what real
+  // device type the backend should use, and matters if the backend supports
+  // more than one type of real device.
+  virtual std::shared_ptr<BackendDeviceType>
+  GetDefaultDeviceType() const = 0;
+  virtual void SetDefaultDeviceType(std::string) = 0;
+
+  // Specify which aten device should be used for eager fallback
+  // may change depending on current 'Default' DeviceType
+  virtual at::DeviceType EagerFallbackDeviceType() const = 0;
+
+
+  // Query all available backend devices
+  virtual std::vector<BackendDevice> GetBackendDevices() const = 0;
+
+  // Map a particular c10:: device to a concrete backend device
+  // Note:: c10:: devices may be virtual or concrete.  xla:: and lazy:: are
+  // virtual devices, meaning they may map to a gpu, tpu, etc. behind the
+  // scenes. In the future, non-virtual c10:: devices may also use lazy tensors
+  // through a mode, in which case these APIs should still work, but should be
+  // identity mappings.
+  virtual BackendDevice GetBackendDevice(c10::Device device) const = 0;
+
+  // TODO(whc)
+  // Additional APIs expected for supporting distributed training, to be
+  // designed
+
+  /**
+   * Debug/Metrics
+   * */
+
+  //   virtual std::map<std::string, Metric> GetMetrics() const = 0;
+
+  //   virtual MemoryInfo GetMemoryInfo(const std::string& device) = 0;
+
+  virtual std::string GetComputationBackendText(
+      const ComputationPtr computation) const = 0;
+};
+
+extern std::atomic<const BackendImplInterface*> backend_impl_registry;
+
+class TORCH_API BackendRegistrar {
+ public:
+  BackendRegistrar(const BackendImplInterface* backend_impl_interface);
+};
+
+inline const BackendImplInterface* TORCH_API getBackend() {
+  auto p = backend_impl_registry.load();
+  CHECK(p) << "Lazy tensor backend not registered.";
+  return p;
+}
+
+}  // lazy
+}  // torch
diff --git a/torch/csrc/lazy/backend/lowering_context.cpp b/torch/csrc/lazy/backend/lowering_context.cpp
new file mode 100644
index 0000000..c8c4669
--- /dev/null
+++ b/torch/csrc/lazy/backend/lowering_context.cpp
@@ -0,0 +1,19 @@
+#include <torch/csrc/lazy/backend/lowering_context.h>
+
+namespace torch {
+namespace lazy {
+
+LoweringContext::LoweringContext(const std::string& name, BackendDevice device)
+    : device_(std::move(device)) {}
+
+LoweringContext::LoweringContext(const std::string& name, BackendDevice device,
+                                 c10::ArrayRef<torch::lazy::Node*> post_order,
+                                 Util::EmissionMap emit_status)
+    : device_(std::move(device)), emit_status_(std::move(emit_status)) {}
+
+const std::vector<BackendDataPtr>& LoweringContext::GetParametersData() const {
+  return parameters_;
+}
+
+}  // namespace lazy
+}  // namespace torch
diff --git a/torch/csrc/lazy/backend/lowering_context.h b/torch/csrc/lazy/backend/lowering_context.h
new file mode 100644
index 0000000..f6c9977
--- /dev/null
+++ b/torch/csrc/lazy/backend/lowering_context.h
@@ -0,0 +1,85 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <torch/csrc/lazy/backend/backend_data.h>
+#include <torch/csrc/lazy/backend/backend_device.h>
+#include <torch/csrc/lazy/core/ir.h>
+#include <torch/csrc/lazy/core/ir_util.h>
+
+namespace torch {
+namespace lazy {
+
+class TORCH_API Computation {
+ public:
+  virtual int parameters_size() const  = 0;
+
+  virtual const std::vector<Shape>& parameter_shapes() const = 0;
+
+  virtual const std::vector<std::string>& parameter_names() const = 0;
+
+  virtual const Shape& result_shape() const = 0;
+
+  virtual ~Computation() = default;
+};
+
+using ComputationPtr = std::shared_ptr<Computation>;
+
+// Keeps track of the code generation state.
+class TORCH_API LoweringContext {
+ public:
+  LoweringContext(const std::string& name, BackendDevice device);
+  LoweringContext(const std::string& name, BackendDevice device,
+                  c10::ArrayRef<torch::lazy::Node*> post_order,
+                  Util::EmissionMap emit_status);
+
+  virtual ~LoweringContext() = default;
+
+  static std::unique_ptr<LoweringContext> Create(
+      const std::string& name, BackendDevice device,
+      c10::ArrayRef<torch::lazy::Node*> post_order,
+      Util::EmissionMap emit_status);
+
+  static std::unique_ptr<LoweringContext> Create(const std::string& name,
+                                                 BackendDevice device);
+
+  const BackendDevice& device() const { return device_; };
+
+  // Retrieves the vector holding all the tensors associated with the parameter
+  // instructions which have been created.
+  const std::vector<BackendDataPtr>&
+  GetParametersData() const;
+
+  // Get the shape of the result tuple component, given by index.
+  virtual Shape GetResultShape(size_t index) const = 0;
+
+  // Adds the given output as a component of the result tuple and returns its
+  // assigned position within the tuple.
+  virtual size_t AddResult(const torch::lazy::Output& output) = 0;
+
+  // Associates the given output with the input parameter of the given index and
+  // shape. Only used for the operator-by-operator execution, mostly for
+  // debugging purposes.
+  virtual void AddParameter(const torch::lazy::Output& output, size_t index,
+                            const Shape& shape,
+                            const std::string& name) = 0;
+
+  // Build the computation capturing all the operations created with the
+  // embedded builder (returned by the builder() API).
+  virtual ComputationPtr Build() = 0;
+
+  size_t GetEmittedNodeCount() const { return emit_status_.size(); }
+
+ protected:
+  BackendDevice device_;
+  std::vector<BackendDataPtr> parameters_;
+  std::vector<size_t> parameter_sequence_;
+  Util::EmissionMap emit_status_;
+};
+
+}  // namespace lazy
+}  // namespace torch
diff --git a/torch/csrc/lazy/core/hash.h b/torch/csrc/lazy/core/hash.h
index 5471bc8..5ca4638 100644
--- a/torch/csrc/lazy/core/hash.h
+++ b/torch/csrc/lazy/core/hash.h
@@ -81,6 +81,9 @@
   return DataHash(value.data(), value.size());
 }
 
+static inline hash_t Hash(const c10::string_view& value) {
+  return DataHash(value.data(), value.size());
+}
 // Taken from glibc's implementation of hashing optionals,
 // we want to include a contribution to the hash to distinguish
 // cases where one or another option was null, but we hope it doesn't
diff --git a/torch/csrc/lazy/core/ir_metadata.h b/torch/csrc/lazy/core/ir_metadata.h
index 1b398ec..437f177 100644
--- a/torch/csrc/lazy/core/ir_metadata.h
+++ b/torch/csrc/lazy/core/ir_metadata.h
@@ -13,7 +13,7 @@
   int line = -1;
 };
 
-void EmitShortFrameInfo(
+TORCH_API void EmitShortFrameInfo(
     std::ostream& stream,
     const std::vector<SourceLocation>& frames);