[PyTorch] Enable explicit ATen level sources for lite interpreter (#52769)
Summary:
Enable partial explicit Aten level sources list for lite interpreter. More aten level source list will be added.
x86:
`SELECTED_OP_LIST=/Users/chenlai/Documents/pytorch/experiemnt/deeplabv3_scripted.yaml BUILD_LITE_INTERPRETER=1 ./scripts/build_pytorch_android.sh x86`
libpytorch_jni_lite.so -- 3.8 MB
armeabi-v7a
`SELECTED_OP_LIST=/Users/chenlai/Documents/pytorch/experiemnt/deeplabv3_scripted.yaml BUILD_LITE_INTERPRETER=1 ./scripts/build_pytorch_android.sh armeabi-v7a`
libpytorch_jni_lite.so -- 2.8 MB
Pull Request resolved: https://github.com/pytorch/pytorch/pull/52769
Test Plan: Imported from OSS
Reviewed By: iseeyuan
Differential Revision: D26717268
Pulled By: cccclai
fbshipit-source-id: 208300f198071bd6751f76ff4bc24c7c9312d337
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index b78b403..5836c1a 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -116,7 +116,22 @@
add_subdirectory(quantized)
add_subdirectory(nnapi)
-set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${native_utils_cpp} ${native_xnnpack} ${generated_cpp} ${core_generated_cpp} ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp})
+
+if(BUILD_LITE_INTERPRETER)
+ set(all_cpu_cpp ${generated_cpp} ${core_generated_cpp} ${cpu_kernel_cpp})
+ append_filelist("jit_core_sources" all_cpu_cpp)
+ append_filelist("aten_cpu_source_non_codegen_list" all_cpu_cpp)
+ append_filelist("aten_native_source_non_codegen_list" all_cpu_cpp)
+ list(APPEND all_cpu_cpp ${Aten_TH_AVX_extra_src})
+else()
+ set(
+ all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp}
+ ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp}
+ ${native_utils_cpp} ${native_xnnpack} ${generated_cpp} ${core_generated_cpp}
+ ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp}
+ )
+endif()
+
if(AT_MKL_ENABLED)
set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
endif()
diff --git a/aten/src/TH/CMakeLists.txt b/aten/src/TH/CMakeLists.txt
index 5661a69..be42191 100644
--- a/aten/src/TH/CMakeLists.txt
+++ b/aten/src/TH/CMakeLists.txt
@@ -1,8 +1,8 @@
-set(extra_src)
+set(Aten_TH_AVX_extra_src)
# IF AVX FOUND
if(C_AVX_FOUND)
- list(APPEND extra_src ${CMAKE_CURRENT_SOURCE_DIR}/vector/AVX.cpp)
+ list(APPEND Aten_TH_AVX_extra_src ${CMAKE_CURRENT_SOURCE_DIR}/vector/AVX.cpp)
endif(C_AVX_FOUND)
set(hdr
@@ -22,11 +22,15 @@
${CMAKE_CURRENT_SOURCE_DIR}/THBlas.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THLapack.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THVector.cpp
- ${extra_src}
+ ${Aten_TH_AVX_extra_src}
)
# Remember that PARENT_SCOPE variables are not in the current scope
set(ATen_TH_SRCS ${ATen_TH_SRCS} PARENT_SCOPE)
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} ${ATen_TH_SRCS} PARENT_SCOPE)
+
+# Aten_TH_AVX_extra_src is used in aten/src/ATen/CMakeLists.txt
+# when built with BUILD_LITE_INTERPRETER=1
+set(Aten_TH_AVX_extra_src ${Aten_TH_AVX_extra_src} PARENT_SCOPE)
######################################################
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index c4a225c..386fe0a 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -923,15 +923,16 @@
target_link_libraries(torch_cuda INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ")
endif()
-
- set(TH_CPU_INCLUDE
- # dense
- aten/src/TH
- ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH
- ${TORCH_ROOT}/aten/src
- ${CMAKE_CURRENT_BINARY_DIR}/aten/src
- ${CMAKE_BINARY_DIR}/aten/src)
- target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
+ if(NOT BUILD_LITE_INTERPRETER)
+ set(TH_CPU_INCLUDE
+ # dense
+ aten/src/TH
+ ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH
+ ${TORCH_ROOT}/aten/src
+ ${CMAKE_CURRENT_BINARY_DIR}/aten/src
+ ${CMAKE_BINARY_DIR}/aten/src)
+ target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
+ endif()
set(ATen_CPU_INCLUDE
${TORCH_ROOT}/aten/src
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
index 9e716ef..ccf87b5 100644
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -643,7 +643,7 @@
return _libtorch_python_sources
-aten_cpu_source_list = [
+aten_cpu_source_non_codegen_list = [
"aten/src/ATen/BatchedTensorImpl.cpp",
"aten/src/ATen/CPUGeneratorImpl.cpp",
"aten/src/ATen/Context.cpp",
@@ -706,7 +706,6 @@
"aten/src/ATen/native/BatchLinearAlgebraKernel.cpp",
"aten/src/ATen/native/DispatchStub.cpp",
"aten/src/ATen/native/UpSample.cpp",
- "aten/src/ATen/native/cpu/AdaptiveAvgPoolKernel.cpp",
"aten/src/ATen/native/mkl/LinearAlgebra.cpp",
"aten/src/ATen/native/mkl/SpectralOps.cpp",
"aten/src/ATen/native/mkldnn/BinaryOps.cpp",
@@ -729,9 +728,60 @@
"aten/src/ATen/vulkan/Context.cpp",
]
-# Files in ATen/native with a few exceptions
-# TODO: move the exceptions to proper locations
-aten_native_source_list = [
+aten_cpu_source_codegen_list = [
+ "aten/src/ATen/native/cpu/AdaptiveAvgPoolKernel.cpp",
+]
+
+# When buliding lite interpreter in OSS, "aten/src/ATen/native/cpu/AdaptiveAvgPoolKernel.cpp" will go through
+# codegen process. The codegen version of this file, like Activation.cpp.DEFAULT.cpp, will be included
+# in ${cpu_kernel_cpp} in aten/src/ATen/CMakeLists.txt. As a result, in aten/src/ATen/CMakeLists.txt,
+# only aten_cpu_source_non_codegen_list need to be added to ${all_cpu_cpp}.
+aten_cpu_source_list = sorted(aten_cpu_source_non_codegen_list + aten_cpu_source_codegen_list)
+
+# Same as ${aten_cpu_source_codegen_list}, this list will go through aten codegen, and be included in
+# ${cpu_kernel_cpp} in aten/src/ATen/CMakeLists.txt.
+aten_native_source_codegen_list = [
+ "aten/src/ATen/native/cpu/Activation.cpp",
+ "aten/src/ATen/native/cpu/BinaryOpsKernel.cpp",
+ "aten/src/ATen/native/cpu/BlasKernel.cpp",
+ "aten/src/ATen/native/cpu/CatKernel.cpp",
+ "aten/src/ATen/native/cpu/ComplexKernel.cpp",
+ "aten/src/ATen/native/cpu/CopyKernel.cpp",
+ "aten/src/ATen/native/cpu/CrossKernel.cpp",
+ "aten/src/ATen/native/cpu/DepthwiseConvKernel.cpp",
+ "aten/src/ATen/native/cpu/DistanceOpsKernel.cpp",
+ "aten/src/ATen/native/cpu/FillKernel.cpp",
+ "aten/src/ATen/native/cpu/FunctionOfAMatrixUtilsKernel.cpp",
+ "aten/src/ATen/native/cpu/GridSamplerKernel.cpp",
+ "aten/src/ATen/native/cpu/IndexKernel.cpp",
+ "aten/src/ATen/native/cpu/LerpKernel.cpp",
+ "aten/src/ATen/native/cpu/LinearAlgebraKernel.cpp",
+ "aten/src/ATen/native/cpu/MaxPooling.cpp",
+ "aten/src/ATen/native/cpu/MultinomialKernel.cpp",
+ "aten/src/ATen/native/cpu/PointwiseOpsKernel.cpp",
+ "aten/src/ATen/native/cpu/PowKernel.cpp",
+ "aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp",
+ "aten/src/ATen/native/cpu/ReduceAllOpsKernel.cpp",
+ "aten/src/ATen/native/cpu/ReduceOpsKernel.cpp",
+ "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp",
+ "aten/src/ATen/native/cpu/SoftMaxKernel.cpp",
+ "aten/src/ATen/native/cpu/SortingKernel.cpp",
+ "aten/src/ATen/native/cpu/StackKernel.cpp",
+ "aten/src/ATen/native/cpu/SumKernel.cpp",
+ "aten/src/ATen/native/cpu/TensorCompareKernel.cpp",
+ "aten/src/ATen/native/cpu/UnaryOpsKernel.cpp",
+ "aten/src/ATen/native/cpu/Unfold2d.cpp",
+ "aten/src/ATen/native/cpu/UnfoldBackwardKernel.cpp",
+ "aten/src/ATen/native/cpu/UpSampleKernel.cpp",
+ "aten/src/ATen/native/cpu/UpSampleMoreKernel.cpp",
+ "aten/src/ATen/native/cpu/batch_norm_kernel.cpp",
+ "aten/src/ATen/native/cpu/group_norm_kernel.cpp",
+ "aten/src/ATen/native/cpu/layer_norm_kernel.cpp",
+ "aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp",
+]
+
+# This aten native source file list will not go through aten codegen process
+aten_native_source_non_codegen_list = [
"aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp",
"aten/src/ATen/native/quantized/cpu/int_repr_quant.cpp",
"aten/src/ATen/native/quantized/cpu/make_per_tensor_quantized_tensor.cpp",
@@ -898,43 +948,6 @@
"aten/src/ATen/native/WeightNorm.cpp",
"aten/src/ATen/native/group_norm.cpp",
"aten/src/ATen/native/layer_norm.cpp",
- "aten/src/ATen/native/cpu/Activation.cpp",
- "aten/src/ATen/native/cpu/BinaryOpsKernel.cpp",
- "aten/src/ATen/native/cpu/BlasKernel.cpp",
- "aten/src/ATen/native/cpu/CatKernel.cpp",
- "aten/src/ATen/native/cpu/ComplexKernel.cpp",
- "aten/src/ATen/native/cpu/CopyKernel.cpp",
- "aten/src/ATen/native/cpu/CrossKernel.cpp",
- "aten/src/ATen/native/cpu/DepthwiseConvKernel.cpp",
- "aten/src/ATen/native/cpu/DistanceOpsKernel.cpp",
- "aten/src/ATen/native/cpu/FillKernel.cpp",
- "aten/src/ATen/native/cpu/FunctionOfAMatrixUtilsKernel.cpp",
- "aten/src/ATen/native/cpu/GridSamplerKernel.cpp",
- "aten/src/ATen/native/cpu/IndexKernel.cpp",
- "aten/src/ATen/native/cpu/LerpKernel.cpp",
- "aten/src/ATen/native/cpu/LinearAlgebraKernel.cpp",
- "aten/src/ATen/native/cpu/MaxPooling.cpp",
- "aten/src/ATen/native/cpu/MultinomialKernel.cpp",
- "aten/src/ATen/native/cpu/PointwiseOpsKernel.cpp",
- "aten/src/ATen/native/cpu/PowKernel.cpp",
- "aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp",
- "aten/src/ATen/native/cpu/ReduceAllOpsKernel.cpp",
- "aten/src/ATen/native/cpu/ReduceOpsKernel.cpp",
- "aten/src/ATen/native/cpu/ScatterGatherKernel.cpp",
- "aten/src/ATen/native/cpu/SoftMaxKernel.cpp",
- "aten/src/ATen/native/cpu/SortingKernel.cpp",
- "aten/src/ATen/native/cpu/StackKernel.cpp",
- "aten/src/ATen/native/cpu/SumKernel.cpp",
- "aten/src/ATen/native/cpu/TensorCompareKernel.cpp",
- "aten/src/ATen/native/cpu/UnaryOpsKernel.cpp",
- "aten/src/ATen/native/cpu/Unfold2d.cpp",
- "aten/src/ATen/native/cpu/UnfoldBackwardKernel.cpp",
- "aten/src/ATen/native/cpu/UpSampleKernel.cpp",
- "aten/src/ATen/native/cpu/UpSampleMoreKernel.cpp",
- "aten/src/ATen/native/cpu/batch_norm_kernel.cpp",
- "aten/src/ATen/native/cpu/group_norm_kernel.cpp",
- "aten/src/ATen/native/cpu/layer_norm_kernel.cpp",
- "aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp",
"aten/src/ATen/native/sparse/ParamUtils.cpp",
"aten/src/ATen/native/sparse/SoftMax.cpp",
"aten/src/ATen/native/sparse/SparseMatMul.cpp",
@@ -966,3 +979,8 @@
"aten/src/ATen/TensorIterator.cpp",
"aten/src/ATen/LegacyTHFunctionsCPU.cpp",
]
+
+# 1. Files in ATen/native with a few exceptions
+# TODO: move the exceptions to proper locations
+# 2. The whole aten native source list includes the list with and without aten codegen process.
+aten_native_source_list = sorted(aten_native_source_non_codegen_list + aten_native_source_codegen_list)