Change cmake to allow building with MLC kick-off build (#51326)

Summary:
- Allows build process to build with MLC enabled if subrepo folder mlc is in path and we can link against ML Compute on macOS BigSur
- To build with MLC enabled you will need to clone the mlc repo inside the pytorch repository.
- We need both this change and https://github.com/pytorch/pytorch/pull/50634 on pytorch/pytorch to enable the `mlc` device.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/51326

Reviewed By: glaringlee

Differential Revision: D26533138

Pulled By: malfet

fbshipit-source-id: 0baa06b4eb2d62dbfc0f6fc922096cb0db1cc7d1
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b0812cf..50251d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,6 +89,33 @@
 
   # RPATH stuff
   set(CMAKE_MACOSX_RPATH ON)
+  if(NOT IOS)
+    # Determine if we can link against ML Compute
+    set(MLCOMPUTE_FOUND OFF)
+    execute_process(
+      COMMAND bash -c "xcrun --sdk macosx --show-sdk-path"
+      OUTPUT_VARIABLE _macosx_sdk_path
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/")
+    set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/")
+
+    find_library(_MLCompute_fwrk_path_ NAMES MLCompute PATHS ${_FRAMEWORK_SEARCH_PATH} NO_DEFAULT_PATH)
+    find_library(_MLCompute_sdk_path_ NAMES MLCompute PATHS ${_SDK_SEARCH_PATH} NO_DEFAULT_PATH)
+
+    if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mlc)
+      set(_MLC_FOLDER_EXISTS YES)
+    else()
+      set(_MLC_FOLDER_EXISTS NO)
+    endif()
+
+    if(_MLCompute_fwrk_path_ AND _MLCompute_sdk_path_ AND _MLC_FOLDER_EXISTS)
+      set(MLCOMPUTE_FOUND ON)
+      message(STATUS "ML Compute framework found")
+    else()
+      message(STATUS "ML Compute framework not found")
+    endif()
+  endif()
 endif()
 
 set(CPU_AARCH64 OFF)
@@ -191,6 +218,9 @@
 option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
 option(USE_NATIVE_ARCH "Use -march=native" OFF)
 cmake_dependent_option(
+    USE_MLCOMPUTE "Use ML Compute for macOS build" ON
+    "MLCOMPUTE_FOUND" OFF)
+cmake_dependent_option(
     USE_NCCL "Use NCCL" ON
     "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
 cmake_dependent_option(USE_RCCL "Use RCCL" ON
@@ -758,6 +788,9 @@
 endif()
 
 if(APPLE)
+    if(USE_MLCOMPUTE)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_MLCOMPUTE -fobjc-arc -framework MLCompute -framework Metal")
+    endif()
     string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-private-field")
     string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-braces")
     string(APPEND CMAKE_CXX_FLAGS " -Wno-c++14-extensions")
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index e80d605..46e0ccb 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -601,6 +601,10 @@
     )
   endif()
 
+  if(USE_MLCOMPUTE)
+    include(../mlc/mlc_build.cmake)
+  endif()
+
   if(USE_ROCM)
     list(APPEND Caffe2_HIP_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
     if(USE_NCCL)
@@ -1282,6 +1286,9 @@
 elseif(USE_ROCM)
   target_link_libraries(torch PUBLIC torch_hip_library)
 endif()
+if(USE_MLCOMPUTE)
+  target_link_libraries(torch PUBLIC torch_mlc_library)
+endif()
 
 if(PRINT_CMAKE_DEBUG_INFO)
   print_target_properties(torch)
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 984859e..a275a92 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -155,6 +155,9 @@
     endif()
 endif()
 
+if(USE_MLCOMPUTE)
+    list(APPEND TORCH_PYTHON_SRCS ${MLC_PYTHON_SRCS})
+endif()
 
 if(USE_VALGRIND AND NOT WIN32)
     list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_VALGRIND)
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
index 032aad2..2bbcfd7 100644
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@@ -63,6 +63,10 @@
 #endif
 #endif
 
+#if defined(USE_MLCOMPUTE)
+#include <mlc/torch_mlc/csrc/MLCInit.h>
+#endif
+
 #if defined(USE_VALGRIND)
 #include <callgrind.h>
 #endif
@@ -729,6 +733,15 @@
 }} // namespace torch::cuda
 #endif
 
+#ifdef USE_MLCOMPUTE
+PyMethodDef* ModuleMLC_methods();
+namespace torch { namespace mlc {
+
+void initBindings(PyObject *module);
+
+}} // namespace torch::mlc
+#endif
+
 bool THDPDoubleStorage_init(PyObject *module);
 bool THDPFloatStorage_init(PyObject *module);
 // TODO: fix
@@ -777,6 +790,9 @@
 #ifdef USE_CUDA
   THPUtils_addPyMethodDefs(methods, THCPModule_methods());
 #endif
+#ifdef USE_MLCOMPUTE
+  THPUtils_addPyMethodDefs(methods, ModuleMLC_methods());
+#endif
 #if defined(USE_DISTRIBUTED) && defined(USE_C10D)
   THPUtils_addPyMethodDefs(methods, torch::distributed::c10d::python_functions());
 #ifndef _WIN32
@@ -823,6 +839,9 @@
 #ifdef USE_CUDA
   torch::cuda::initModule(module);
 #endif
+#ifdef USE_MLCOMPUTE
+  torch::mlc::init_bindings(module);
+#endif
   ASSERT_TRUE(THPDoubleStorage_init(module));
   ASSERT_TRUE(THPFloatStorage_init(module));
   ASSERT_TRUE(THPHalfStorage_init(module));
@@ -937,6 +956,14 @@
 #else
   PyObject *has_cuda = Py_False;
 #endif
+#ifdef USE_MLCOMPUTE
+  PyObject *has_mlc = Py_True;
+#else
+  PyObject *has_mlc = Py_False;
+#endif
+
+  ASSERT_TRUE(set_module_attr("has_mlc", has_mlc));
+
   ASSERT_TRUE(set_module_attr("has_cuda", has_cuda));
 
   ASSERT_TRUE(set_module_attr("has_mkldnn", at::hasMKLDNN() ? Py_True : Py_False));
diff --git a/torch/csrc/utils/variadic.h b/torch/csrc/utils/variadic.h
index b02b7d2..e72cc65 100644
--- a/torch/csrc/utils/variadic.h
+++ b/torch/csrc/utils/variadic.h
@@ -18,6 +18,9 @@
   void operator()(const at::Tensor& x) {
     out += 1;
   }
+  void operator()(const c10::optional<at::Tensor>& x) {
+    out += x.has_value();
+  }
   void operator()(at::ArrayRef<at::Tensor> xs) {
     out += xs.size();
   }