[PyTorch] Enable explicit ATen level sources for lite interpreter (#52769)

Summary:
Enable partial explicit Aten level sources list for lite interpreter. More aten level source list will be added.

x86:
`SELECTED_OP_LIST=/Users/chenlai/Documents/pytorch/experiemnt/deeplabv3_scripted.yaml BUILD_LITE_INTERPRETER=1 ./scripts/build_pytorch_android.sh x86`

libpytorch_jni_lite.so -- 3.8 MB

armeabi-v7a
`SELECTED_OP_LIST=/Users/chenlai/Documents/pytorch/experiemnt/deeplabv3_scripted.yaml BUILD_LITE_INTERPRETER=1 ./scripts/build_pytorch_android.sh armeabi-v7a`
libpytorch_jni_lite.so -- 2.8 MB

Pull Request resolved: https://github.com/pytorch/pytorch/pull/52769

Test Plan: Imported from OSS

Reviewed By: iseeyuan

Differential Revision: D26717268

Pulled By: cccclai

fbshipit-source-id: 208300f198071bd6751f76ff4bc24c7c9312d337
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index b78b403..5836c1a 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -116,7 +116,22 @@
 
 add_subdirectory(quantized)
 add_subdirectory(nnapi)
-set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${native_utils_cpp} ${native_xnnpack} ${generated_cpp} ${core_generated_cpp} ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp})
+
+if(BUILD_LITE_INTERPRETER)
+  set(all_cpu_cpp ${generated_cpp} ${core_generated_cpp} ${cpu_kernel_cpp})
+  append_filelist("jit_core_sources" all_cpu_cpp)
+  append_filelist("aten_cpu_source_non_codegen_list" all_cpu_cpp)
+  append_filelist("aten_native_source_non_codegen_list" all_cpu_cpp)
+  list(APPEND all_cpu_cpp ${Aten_TH_AVX_extra_src})
+else()
+  set(
+    all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp}
+    ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp}
+    ${native_utils_cpp} ${native_xnnpack} ${generated_cpp} ${core_generated_cpp}
+    ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp}
+  )
+endif()
+
 if(AT_MKL_ENABLED)
   set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
 endif()
diff --git a/aten/src/TH/CMakeLists.txt b/aten/src/TH/CMakeLists.txt
index 5661a69..be42191 100644
--- a/aten/src/TH/CMakeLists.txt
+++ b/aten/src/TH/CMakeLists.txt
@@ -1,8 +1,8 @@
-set(extra_src)
+set(Aten_TH_AVX_extra_src)
 
 # IF AVX FOUND
 if(C_AVX_FOUND)
-  list(APPEND extra_src ${CMAKE_CURRENT_SOURCE_DIR}/vector/AVX.cpp)
+  list(APPEND Aten_TH_AVX_extra_src ${CMAKE_CURRENT_SOURCE_DIR}/vector/AVX.cpp)
 endif(C_AVX_FOUND)
 
 set(hdr
@@ -22,11 +22,15 @@
   ${CMAKE_CURRENT_SOURCE_DIR}/THBlas.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/THLapack.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/THVector.cpp
-  ${extra_src}
+  ${Aten_TH_AVX_extra_src}
   )
 # Remember that PARENT_SCOPE variables are not in the current scope
 set(ATen_TH_SRCS ${ATen_TH_SRCS} PARENT_SCOPE)
 set(ATen_CPU_SRCS ${ATen_CPU_SRCS} ${ATen_TH_SRCS} PARENT_SCOPE)
+
+# Aten_TH_AVX_extra_src is used in aten/src/ATen/CMakeLists.txt
+# when built with BUILD_LITE_INTERPRETER=1
+set(Aten_TH_AVX_extra_src ${Aten_TH_AVX_extra_src} PARENT_SCOPE)
 ######################################################
 
 
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index c4a225c..386fe0a 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -923,15 +923,16 @@
     target_link_libraries(torch_cuda INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ")
   endif()
 
-
-  set(TH_CPU_INCLUDE
-    # dense
-    aten/src/TH
-    ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH
-    ${TORCH_ROOT}/aten/src
-    ${CMAKE_CURRENT_BINARY_DIR}/aten/src
-    ${CMAKE_BINARY_DIR}/aten/src)
-  target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
+  if(NOT BUILD_LITE_INTERPRETER)
+    set(TH_CPU_INCLUDE
+      # dense
+      aten/src/TH
+      ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH
+      ${TORCH_ROOT}/aten/src
+      ${CMAKE_CURRENT_BINARY_DIR}/aten/src
+      ${CMAKE_BINARY_DIR}/aten/src)
+    target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
+  endif()
 
   set(ATen_CPU_INCLUDE
     ${TORCH_ROOT}/aten/src
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
index 9e716ef..ccf87b5 100644
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -643,7 +643,7 @@
 
     return _libtorch_python_sources
 
-aten_cpu_source_list = [
+aten_cpu_source_non_codegen_list = [
     "aten/src/ATen/BatchedTensorImpl.cpp",
     "aten/src/ATen/CPUGeneratorImpl.cpp",
     "aten/src/ATen/Context.cpp",
@@ -706,7 +706,6 @@
     "aten/src/ATen/native/BatchLinearAlgebraKernel.cpp",
     "aten/src/ATen/native/DispatchStub.cpp",
     "aten/src/ATen/native/UpSample.cpp",
-    "aten/src/ATen/native/cpu/AdaptiveAvgPoolKernel.cpp",
     "aten/src/ATen/native/mkl/LinearAlgebra.cpp",
     "aten/src/ATen/native/mkl/SpectralOps.cpp",
     "aten/src/ATen/native/mkldnn/BinaryOps.cpp",
@@ -729,9 +728,60 @@
     "aten/src/ATen/vulkan/Context.cpp",
 ]
 
-# Files in ATen/native with a few exceptions
-# TODO: move the exceptions to proper locations
-aten_native_source_list = [
+aten_cpu_source_codegen_list = [
+    "aten/src/ATen/native/cpu/AdaptiveAvgPoolKernel.cpp",
+]
+
+# When buliding lite interpreter in OSS, "aten/src/ATen/native/cpu/AdaptiveAvgPoolKernel.cpp" will go through
+# codegen process. The codegen version of this file, like Activation.cpp.DEFAULT.cpp, will be included
+# in ${cpu_kernel_cpp} in aten/src/ATen/CMakeLists.txt. As a result, in aten/src/ATen/CMakeLists.txt,
+# only aten_cpu_source_non_codegen_list need to be added to ${all_cpu_cpp}.
+aten_cpu_source_list = sorted(aten_cpu_source_non_codegen_list + aten_cpu_source_codegen_list)
+
+# Same as ${aten_cpu_source_codegen_list}, this list will go through aten codegen, and be included in
+# ${cpu_kernel_cpp} in aten/src/ATen/CMakeLists.txt.
+aten_native_source_codegen_list = [
+    "aten/src/ATen/native/cpu/Activation.cpp",
+    "aten/src/ATen/native/cpu/BinaryOpsKernel.cpp",
+    "aten/src/ATen/native/cpu/BlasKernel.cpp",
+    "aten/src/ATen/native/cpu/CatKernel.cpp",
+    "aten/src/ATen/native/cpu/ComplexKernel.cpp",
+    "aten/src/ATen/native/cpu/CopyKernel.cpp",
+    "aten/src/ATen/native/cpu/CrossKernel.cpp",
+    "aten/src/ATen/native/cpu/DepthwiseConvKernel.cpp",
+    "aten/src/ATen/native/cpu/DistanceOpsKernel.cpp",
+    "aten/src/ATen/native/cpu/FillKernel.cpp",
+    "aten/src/ATen/native/cpu/FunctionOfAMatrixUtilsKernel.cpp",
+    "aten/src/ATen/native/cpu/GridSamplerKernel.cpp",
+    "aten/src/ATen/native/cpu/IndexKernel.cpp",
+    "aten/src/ATen/native/cpu/LerpKernel.cpp",
+    "aten/src/ATen/native/cpu/LinearAlgebraKernel.cpp",
+    "aten/src/ATen/native/cpu/MaxPooling.cpp",
+    "aten/src/ATen/native/cpu/MultinomialKernel.cpp",
+    "aten/src/ATen/native/cpu/PointwiseOpsKernel.cpp",
+    "aten/src/ATen/native/cpu/PowKernel.cpp",
+    "aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp",
+    "aten/src/ATen/native/cpu/ReduceAllOpsKernel.cpp",
+    "aten/src/ATen/native/cpu/ReduceOpsKernel.cpp",
+    "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp",
+    "aten/src/ATen/native/cpu/SoftMaxKernel.cpp",
+    "aten/src/ATen/native/cpu/SortingKernel.cpp",
+    "aten/src/ATen/native/cpu/StackKernel.cpp",
+    "aten/src/ATen/native/cpu/SumKernel.cpp",
+    "aten/src/ATen/native/cpu/TensorCompareKernel.cpp",
+    "aten/src/ATen/native/cpu/UnaryOpsKernel.cpp",
+    "aten/src/ATen/native/cpu/Unfold2d.cpp",
+    "aten/src/ATen/native/cpu/UnfoldBackwardKernel.cpp",
+    "aten/src/ATen/native/cpu/UpSampleKernel.cpp",
+    "aten/src/ATen/native/cpu/UpSampleMoreKernel.cpp",
+    "aten/src/ATen/native/cpu/batch_norm_kernel.cpp",
+    "aten/src/ATen/native/cpu/group_norm_kernel.cpp",
+    "aten/src/ATen/native/cpu/layer_norm_kernel.cpp",
+    "aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp",
+]
+
+# This aten native source file list will not go through aten codegen process
+aten_native_source_non_codegen_list = [
     "aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp",
     "aten/src/ATen/native/quantized/cpu/int_repr_quant.cpp",
     "aten/src/ATen/native/quantized/cpu/make_per_tensor_quantized_tensor.cpp",
@@ -898,43 +948,6 @@
     "aten/src/ATen/native/WeightNorm.cpp",
     "aten/src/ATen/native/group_norm.cpp",
     "aten/src/ATen/native/layer_norm.cpp",
-    "aten/src/ATen/native/cpu/Activation.cpp",
-    "aten/src/ATen/native/cpu/BinaryOpsKernel.cpp",
-    "aten/src/ATen/native/cpu/BlasKernel.cpp",
-    "aten/src/ATen/native/cpu/CatKernel.cpp",
-    "aten/src/ATen/native/cpu/ComplexKernel.cpp",
-    "aten/src/ATen/native/cpu/CopyKernel.cpp",
-    "aten/src/ATen/native/cpu/CrossKernel.cpp",
-    "aten/src/ATen/native/cpu/DepthwiseConvKernel.cpp",
-    "aten/src/ATen/native/cpu/DistanceOpsKernel.cpp",
-    "aten/src/ATen/native/cpu/FillKernel.cpp",
-    "aten/src/ATen/native/cpu/FunctionOfAMatrixUtilsKernel.cpp",
-    "aten/src/ATen/native/cpu/GridSamplerKernel.cpp",
-    "aten/src/ATen/native/cpu/IndexKernel.cpp",
-    "aten/src/ATen/native/cpu/LerpKernel.cpp",
-    "aten/src/ATen/native/cpu/LinearAlgebraKernel.cpp",
-    "aten/src/ATen/native/cpu/MaxPooling.cpp",
-    "aten/src/ATen/native/cpu/MultinomialKernel.cpp",
-    "aten/src/ATen/native/cpu/PointwiseOpsKernel.cpp",
-    "aten/src/ATen/native/cpu/PowKernel.cpp",
-    "aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp",
-    "aten/src/ATen/native/cpu/ReduceAllOpsKernel.cpp",
-    "aten/src/ATen/native/cpu/ReduceOpsKernel.cpp",
-    "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp",
-    "aten/src/ATen/native/cpu/SoftMaxKernel.cpp",
-    "aten/src/ATen/native/cpu/SortingKernel.cpp",
-    "aten/src/ATen/native/cpu/StackKernel.cpp",
-    "aten/src/ATen/native/cpu/SumKernel.cpp",
-    "aten/src/ATen/native/cpu/TensorCompareKernel.cpp",
-    "aten/src/ATen/native/cpu/UnaryOpsKernel.cpp",
-    "aten/src/ATen/native/cpu/Unfold2d.cpp",
-    "aten/src/ATen/native/cpu/UnfoldBackwardKernel.cpp",
-    "aten/src/ATen/native/cpu/UpSampleKernel.cpp",
-    "aten/src/ATen/native/cpu/UpSampleMoreKernel.cpp",
-    "aten/src/ATen/native/cpu/batch_norm_kernel.cpp",
-    "aten/src/ATen/native/cpu/group_norm_kernel.cpp",
-    "aten/src/ATen/native/cpu/layer_norm_kernel.cpp",
-    "aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp",
     "aten/src/ATen/native/sparse/ParamUtils.cpp",
     "aten/src/ATen/native/sparse/SoftMax.cpp",
     "aten/src/ATen/native/sparse/SparseMatMul.cpp",
@@ -966,3 +979,8 @@
     "aten/src/ATen/TensorIterator.cpp",
     "aten/src/ATen/LegacyTHFunctionsCPU.cpp",
 ]
+
+# 1. Files in ATen/native with a few exceptions
+# TODO: move the exceptions to proper locations
+# 2. The whole aten native source list includes the list with and without aten codegen process.
+aten_native_source_list = sorted(aten_native_source_non_codegen_list + aten_native_source_codegen_list)