| load("@bazel_skylib//lib:paths.bzl", "paths") |
| load( |
| "//tools:build_variables.bzl", |
| "aten_native_source_list", |
| ) |
| load( |
| "//tools:ufunc_defs.bzl", |
| "aten_ufunc_generated_cpu_kernel_sources", |
| "aten_ufunc_generated_cpu_sources", |
| "aten_ufunc_generated_cuda_sources", |
| ) |
| load("//tools/build_defs:fb_xplat_genrule.bzl", "fb_xplat_genrule") |
| load("//tools/build_defs:type_defs.bzl", "is_list", "is_string") |
| |
| USED_PT_BACKENDS = [ |
| "CPU", |
| "QuantizedCPU", |
| "SparseCPU", # brings ~20 kb size regression |
| ] |
| |
| # This needs to be kept in sync with https://github.com/pytorch/pytorch/blob/release/1.9/torchgen/gen.py#L892 |
| PT_BACKEND_HEADERS = [ |
| "CPU", |
| "CUDA", |
| "CompositeExplicitAutograd", |
| "CompositeImplicitAutograd", |
| "Meta", |
| ] |
| |
| PT_BASE_OPS = [ |
| "aten::_coalesced_", |
| "aten::_copy_from", |
| "aten::_empty_affine_quantized", |
| "aten::_empty_per_channel_affine_quantized", |
| "aten::_indices", |
| "aten::_nnz", |
| "aten::_values", |
| "aten::add", |
| "aten::add_", |
| "aten::arange", |
| "aten::as_strided", |
| "aten::as_strided_", |
| "aten::cat", |
| "aten::clone", |
| "aten::coalesce", |
| "aten::contiguous", |
| "aten::copy_", |
| "aten::copy_sparse_to_sparse_", |
| "aten::dense_dim", |
| "aten::dequantize", |
| "aten::div", |
| "aten::div_", |
| "aten::empty", |
| "aten::empty_like", |
| "aten::empty_strided", |
| "aten::empty.memory_format", |
| "aten::eq", |
| "aten::equal", |
| "aten::expand", |
| "aten::fill_", |
| "aten::is_coalesced", |
| "aten::is_complex", |
| "aten::is_floating_point", |
| "aten::is_leaf", |
| "aten::is_nonzero", |
| "aten::item", |
| "aten::max", |
| "aten::min", |
| "aten::mul", |
| "aten::mul_", |
| "aten::narrow", |
| "aten::ne", |
| "aten::permute", |
| "aten::q_per_channel_axis", |
| "aten::q_per_channel_scales", |
| "aten::q_per_channel_zero_points", |
| "aten::q_scale", |
| "aten::q_zero_point", |
| "aten::qscheme", |
| "aten::quantize_per_tensor", |
| "aten::reshape", |
| "aten::_reshape_alias", |
| "aten::resize_", |
| "aten::resize_as_", |
| "aten::scalar_tensor", |
| "aten::select", |
| "aten::set_", |
| "aten::size", |
| "aten::slice", |
| "aten::sparse_dim", |
| "aten::sparse_resize_and_clear_", |
| "aten::squeeze", |
| "aten::squeeze_", |
| "aten::stride", |
| "aten::sub", |
| "aten::sub_", |
| "aten::sum", |
| "aten::t", |
| "aten::to", |
| "aten::_to_copy", |
| "aten::unsqueeze", |
| "aten::view", |
| "aten::zero_", |
| "aten::zeros", |
| "aten::zeros_like", |
| ] |
| |
| def get_aten_compiler_flags(): |
| return ATEN_COMPILER_FLAGS |
| |
| def get_generate_code_bin_outs(): |
| return { |
| "autograd/generated/ADInplaceOrViewTypeEverything.cpp": ["autograd/generated/ADInplaceOrViewTypeEverything.cpp"], |
| "autograd/generated/ADInplaceOrViewType_0.cpp": ["autograd/generated/ADInplaceOrViewType_0.cpp"], |
| "autograd/generated/ADInplaceOrViewType_1.cpp": ["autograd/generated/ADInplaceOrViewType_1.cpp"], |
| "autograd/generated/Functions.cpp": ["autograd/generated/Functions.cpp"], |
| "autograd/generated/Functions.h": ["autograd/generated/Functions.h"], |
| "autograd/generated/TraceTypeEverything.cpp": ["autograd/generated/TraceTypeEverything.cpp"], |
| "autograd/generated/TraceType_0.cpp": ["autograd/generated/TraceType_0.cpp"], |
| "autograd/generated/TraceType_1.cpp": ["autograd/generated/TraceType_1.cpp"], |
| "autograd/generated/TraceType_2.cpp": ["autograd/generated/TraceType_2.cpp"], |
| "autograd/generated/TraceType_3.cpp": ["autograd/generated/TraceType_3.cpp"], |
| "autograd/generated/TraceType_4.cpp": ["autograd/generated/TraceType_4.cpp"], |
| "autograd/generated/VariableType.h": ["autograd/generated/VariableType.h"], |
| "autograd/generated/VariableTypeEverything.cpp": ["autograd/generated/VariableTypeEverything.cpp"], |
| "autograd/generated/VariableType_0.cpp": ["autograd/generated/VariableType_0.cpp"], |
| "autograd/generated/VariableType_1.cpp": ["autograd/generated/VariableType_1.cpp"], |
| "autograd/generated/VariableType_2.cpp": ["autograd/generated/VariableType_2.cpp"], |
| "autograd/generated/VariableType_3.cpp": ["autograd/generated/VariableType_3.cpp"], |
| "autograd/generated/VariableType_4.cpp": ["autograd/generated/VariableType_4.cpp"], |
| "autograd/generated/variable_factories.h": ["autograd/generated/variable_factories.h"], |
| } |
| |
| ATEN_COMPILER_FLAGS = [ |
| "-fexceptions", |
| "-frtti", |
| "-fPIC", |
| "-Os", |
| "-Wno-absolute-value", |
| "-Wno-deprecated-declarations", |
| "-Wno-macro-redefined", |
| "-Wno-tautological-constant-out-of-range-compare", |
| "-Wno-unknown-pragmas", |
| "-Wno-unknown-warning-option", |
| "-Wno-unused-function", |
| "-Wno-unused-variable", |
| "-Wno-pass-failed", |
| "-Wno-shadow", |
| ] |
| |
| PT_COMPILER_FLAGS = [ |
| "-frtti", |
| "-Os", |
| "-Wno-unknown-pragmas", |
| "-Wno-write-strings", |
| "-Wno-unused-variable", |
| "-Wno-unused-function", |
| "-Wno-deprecated-declarations", |
| "-Wno-shadow", |
| "-Wno-global-constructors", |
| "-Wno-missing-prototypes", |
| "-std=gnu++17", # to accommodate Eigen |
| ] |
| |
| def get_template_source_dict(): |
| ret = {} |
| for file_path in TEMPLATE_SOURCE_LIST: |
| path_prefix = paths.dirname(file_path) |
| if path_prefix not in ret: |
| ret[path_prefix] = [] |
| ret[path_prefix].append(file_path) |
| return ret |
| |
| def get_gen_oplist_outs(): |
| return { |
| #"SupportedMobileModelsRegistration.cpp": [ |
| # "SupportedMobileModelsRegistration.cpp", |
| #], |
| "selected_mobile_ops.h": [ |
| "selected_mobile_ops.h", |
| ], |
| "selected_operators.yaml": [ |
| "selected_operators.yaml", |
| ], |
| } |
| |
| def get_pt_compiler_flags(): |
| return PT_COMPILER_FLAGS |
| |
| def get_aten_preprocessor_flags(): |
| # read_config is not allowed outside of function in Starlark |
| ATEN_PREPROCESSOR_FLAGS = [ |
| "-DC10_MOBILE", |
| "-DCPU_CAPABILITY_DEFAULT", |
| "-DCPU_CAPABILITY=DEFAULT", |
| "-DCAFFE2_USE_LITE_PROTO", |
| "-DATEN_CUDNN_ENABLED_FBXPLAT=0", |
| "-DATEN_MKLDNN_ENABLED_FBXPLAT=0", |
| "-DATEN_NNPACK_ENABLED_FBXPLAT=0", |
| "-DATEN_MKL_ENABLED_FBXPLAT=0", |
| "-DATEN_MKL_SEQUENTIAL_FBXPLAT=0", |
| "-DUSE_PYTORCH_METAL", |
| "-DUSE_PYTORCH_QNNPACK", |
| "-DUSE_XNNPACK", |
| "-DNO_EXPORT", |
| "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", |
| "-DAT_PARALLEL_OPENMP_FBXPLAT=0", |
| "-DAT_PARALLEL_NATIVE_FBXPLAT=1", |
| "-DAT_PARALLEL_NATIVE_TBB_FBXPLAT=0", |
| "-DUSE_LAPACK_FBXPLAT=0", |
| "-DAT_BLAS_F2C_FBXPLAT=0", |
| "-DAT_BLAS_USE_CBLAS_DOT_FBXPLAT=0", |
| "-DUSE_RUY_QMATMUL", # need third_party:ruy |
| ] |
| |
| # if get_disable_per_op_profiling(): |
| ATEN_PREPROCESSOR_FLAGS.append("-DPYTORCH_DISABLE_PER_OP_PROFILING") |
| return ATEN_PREPROCESSOR_FLAGS |
| |
| TEMPLATE_SOURCE_LIST = [ |
| "torch/csrc/jit/runtime/register_prim_ops.cpp", |
| "torch/csrc/jit/runtime/register_special_ops.cpp", |
| ] + aten_native_source_list |
| |
| # For selective build, we can lump the CPU and CPU kernel sources altogether |
| # because there is only ever one vectorization variant that is compiled |
| def aten_ufunc_generated_all_cpu_sources(gencode_pattern = "{}"): |
| return ( |
| aten_ufunc_generated_cpu_sources(gencode_pattern) + |
| aten_ufunc_generated_cpu_kernel_sources(gencode_pattern) |
| ) |
| |
| def get_template_registration_files_outs(): |
| outs = {} |
| |
| for file_path in TEMPLATE_SOURCE_LIST: |
| outs[file_path] = [file_path] |
| |
| for base_name in aten_ufunc_generated_all_cpu_sources(): |
| file_path = "aten/src/ATen/{}".format(base_name) |
| outs[file_path] = [file_path] |
| |
| return outs |
| |
| def get_pt_preprocessor_flags(): |
| # read_config is not allowed outside of function in Starlark |
| PT_PREPROCESSOR_FLAGS = [ |
| "-D_THP_CORE", |
| "-DC10_MOBILE", |
| "-DUSE_SCALARS", |
| "-DNO_CUDNN_DESTROY_HANDLE", |
| "-DNO_EXPORT", |
| "-DBUILD_CAFFE2", |
| ] |
| return PT_PREPROCESSOR_FLAGS |
| |
| def is_arvr_mode(): |
| return False |
| |
| def get_build_from_deps_query(): |
| build_from_query = native.read_config("pt", "build_from_deps_query", "1") |
| return bool(int(build_from_query)) |
| |
| def get_enable_lightweight_dispatch(): |
| enable_lightweight_dispatch = native.read_config("pt", "enable_lightweight_dispatch", "0") |
| return bool(int(enable_lightweight_dispatch)) |
| |
| def get_static_dispatch_backend(): |
| static_dispatch_backend = native.read_config("pt", "static_dispatch_backend", None) |
| if static_dispatch_backend == None: |
| return [] |
| return static_dispatch_backend.split(";") |
| |
| def get_aten_codegen_extra_params(backends): |
| if get_build_from_deps_query(): |
| extra_params = { |
| "force_schema_registration": True, |
| } |
| static_backends = get_static_dispatch_backend() |
| if static_backends: |
| extra_params["static_dispatch_backend"] = static_backends |
| extra_params["enabled_backends"] = static_backends |
| else: |
| extra_params["enabled_backends"] = backends |
| return extra_params |
| else: |
| return {} |
| |
| def gen_aten_files( |
| name, |
| extra_flags = {}, |
| visibility = [], |
| compatible_with = []): |
| extra_params = [] |
| force_schema_registration = extra_flags.get("force_schema_registration", False) |
| op_registration_allowlist = extra_flags.get("op_registration_allowlist", None) |
| op_selection_yaml_path = extra_flags.get("op_selection_yaml_path", None) |
| enabled_backends = extra_flags.get("enabled_backends", None) |
| static_dispatch_backend = extra_flags.get("static_dispatch_backend", None) |
| |
| if force_schema_registration: |
| extra_params.append("--force_schema_registration") |
| if op_registration_allowlist != None and is_string(op_registration_allowlist): |
| extra_params.append("--op_registration_whitelist") |
| extra_params.append(op_registration_allowlist) |
| if op_selection_yaml_path != None and is_string(op_selection_yaml_path): |
| extra_params.append("--op_selection_yaml_path") |
| extra_params.append(op_selection_yaml_path) |
| if enabled_backends != None and is_list(enabled_backends): |
| extra_params.append("--backend_whitelist") |
| extra_params.extend(enabled_backends) |
| if get_enable_lightweight_dispatch(): |
| extra_params.append("--skip_dispatcher_op_registration") |
| if static_dispatch_backend: |
| extra_params.append("--static_dispatch_backend") |
| extra_params.extend(static_dispatch_backend) |
| backends = static_dispatch_backend |
| else: |
| backends = enabled_backends |
| fb_xplat_genrule( |
| name = name, |
| default_outs = ["."], |
| outs = get_aten_generated_files(backends), |
| cmd = "$(exe //torchgen:gen) " + " ".join([ |
| "--source-path $(location //:aten_src_path)/aten/src/ATen", |
| "--install_dir $OUT", |
| ] + extra_params), |
| visibility = visibility, |
| compatible_with = compatible_with, |
| ) |
| |
| def get_aten_generated_files(enabled_backends): |
| # NB: RegisterMeta counts as an optionally enabled backend, |
| # and is intentionally omitted from here |
| src_files = [ |
| "RegisterBackendSelect.cpp", |
| "RegisterCompositeImplicitAutograd.cpp", |
| "RegisterCompositeExplicitAutograd.cpp", |
| "CompositeViewCopyKernels.cpp", |
| "RegisterSchema.cpp", |
| "Declarations.yaml", |
| "Functions.cpp", |
| "Functions.h", |
| "RedispatchFunctions.h", |
| "NativeFunctions.h", |
| "NativeMetaFunctions.h", |
| "MethodOperators.h", |
| "FunctionalInverses.h", |
| "Operators.h", |
| "Operators_0.cpp", |
| "Operators_1.cpp", |
| "Operators_2.cpp", |
| "Operators_3.cpp", |
| "Operators_4.cpp", |
| "CompositeImplicitAutogradFunctions.h", |
| "CompositeImplicitAutogradFunctions_inl.h", |
| "CompositeExplicitAutogradFunctions.h", |
| "CompositeExplicitAutogradFunctions_inl.h", |
| "core/ATenOpList.cpp", |
| "core/TensorBody.h", |
| "core/TensorMethods.cpp", |
| "core/aten_interned_strings.h", |
| ] + get_aten_derived_type_srcs(enabled_backends) |
| |
| # This is tiresome. A better strategy would be to unconditionally |
| # generate these files, and then only actually COMPILE them depended |
| # on the generated set. C'est la vie... |
| if "CPU" in enabled_backends: |
| src_files.extend(aten_ufunc_generated_cpu_sources()) |
| src_files.extend(aten_ufunc_generated_cpu_kernel_sources()) |
| if "CUDA" in enabled_backends: |
| # Cannot unconditionally include this, because in the Edge selective |
| # build CUDA is not enabled and thus the ufunc codegen for CUDA gets |
| # skipped |
| src_files.extend(aten_ufunc_generated_cuda_sources()) |
| |
| res = {} |
| for file_name in src_files: |
| res[file_name] = [file_name] |
| return res |
| |
| def get_template_registration_file_rules(rule_name): |
| rules = [] |
| for file_path in TEMPLATE_SOURCE_LIST: |
| rules.append(":{}[{}]".format(rule_name, file_path)) |
| for file_path in aten_ufunc_generated_all_cpu_sources(): |
| rules.append(":{}[aten/src/ATen/{}]".format(rule_name, file_path)) |
| |
| return rules |
| |
| # Originally, there were two sets of codes in caffe2:aten_cpu, native codes and non-native. |
| # Now we have only non-naitve sources in aten_cpu. However, there are some aten related |
| # tests that may require both native and non-native codes. This rule is used to generate |
| # both aten_cpu and aten_native_cpu. They are using the same compilation setups. |
| def build_aten_cpu(name, srcs, deps = []): |
| cxx_library( |
| name = name, |
| srcs = srcs, |
| header_namespace = "", |
| compiler_flags = get_pt_compiler_flags(), |
| exported_preprocessor_flags = get_aten_preprocessor_flags(), |
| link_whole = True, |
| linker_flags = ["-Wl,--no-as-needed", "-ldl"], |
| visibility = ["PUBLIC"], |
| deps = [ |
| "//third_party:cpuinfo", |
| "//third_party:glog", |
| "//third_party:XNNPACK", |
| #"//third_party/linker_lib:omp", |
| ], |
| exported_deps = [ |
| "//third_party:fmt", |
| "//aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack", |
| "//c10:c10", |
| ":aten_header", |
| ":caffe2_headers", |
| ":common_core", |
| ":generated_aten_config_header", |
| ":generated_aten_headers_cpu", |
| ":jit_core_headers", |
| ":pthreadpool", |
| ":th_header", |
| "//third_party:ruy_lib", |
| ], |
| ) |
| |
| ######### selective build ######### |
| |
| def get_pt_ops_deps(name, deps, train = False, enforce_traced_op_list = False, enable_flatbuffer = False, **kwargs): |
| if not get_build_from_deps_query(): |
| return deps |
| pt_operator_registry( |
| name, |
| deps, |
| train = train, |
| enforce_traced_op_list = enforce_traced_op_list, |
| enable_flatbuffer = enable_flatbuffer, |
| **kwargs |
| ) |
| return deps + [":" + name] |
| |
| # pt_operator_registry is the method that defines the fb_xplat_cxx_library that contains |
| # code for all selected PyTorch Operators and kernel functions. This also includes |
| # operator registration into the dispatcher. |
| # |
| # template_select: bool: Indicates if template based selective build is enabled. |
| # |
| # enforce_traced_op_list: bool: Enforces that only new-style operator |
| # lists based on the all_mobile_model_configs.yaml file and tracing based selective |
| # build are used in this library. |
| # |
| # train: bool: Build this library for training (True) or inference only (False). |
| # If built for training, codegen for VariableType is also included. |
| # |
| # pt_allow_forced_schema_registration: Manually disables forced schema registration when set to false, Default is true. |
| # Only does anything when train=True and the app requires full jit then force_schema_registration needs to occur. |
| # As Federated Learning migrates to lite interpreter |
| # we can slowly turn off forced schema registration as it is useless space and floods the compatibility api |
| # |
| def pt_operator_registry( |
| name, |
| deps = [], |
| train = False, |
| labels = [], |
| env = [], |
| template_select = True, |
| enforce_traced_op_list = False, |
| pt_allow_forced_schema_registration = True, |
| enable_flatbuffer = False, |
| **kwargs): |
| compatible_with = kwargs.get("compatible_with", []) |
| code_gen_files = pt_operator_query_codegen(name, deps = deps, train = train, enforce_traced_op_list = enforce_traced_op_list, pt_allow_forced_schema_registration = pt_allow_forced_schema_registration, compatible_with = compatible_with) |
| code_gen_srcs = code_gen_files["srcs"] |
| |
| lib_deps = [ |
| ":aten_cpu", |
| ":torch_mobile_core", |
| "//c10:c10", |
| "//third_party:glog", |
| ] |
| |
| #if train: |
| # lib_deps = lib_deps + ["fbsource//xplat/caffe2:torch_mobile_train"] |
| |
| exported_preprocessor_flags = get_aten_preprocessor_flags() |
| exported_preprocessor_flags += kwargs.pop("exported_preprocessor_flags", []) |
| if template_select: |
| # In addition to the |
| # original code-gen select, this option further filter more operators based on |
| # compile-time calculation. Examples include prim ops and any other ops that were |
| # not filtered out before. The purpose of this option is to reduce the production |
| # size further. However, it may have less flexibility, especially for tests from |
| # python, where the used operator list is not explicitly generated. If the tests |
| # are for functionality but not for size, and it's difficult to maintain an explicit |
| # operator list, it's suggested to turn this option off. |
| exported_preprocessor_flags.append("-DTEMPLATE_SELECTIVE_BUILD") |
| kwargs.pop("exported_headers", []) |
| cxx_library( |
| name = name, |
| srcs = code_gen_srcs, |
| linker_flags = [ |
| "-Wl,--no-as-needed", |
| "-ldl", |
| ], |
| link_whole = True, |
| soname = "libtorch-code-gen.$(ext)", |
| compiler_flags = get_aten_compiler_flags(), |
| platform_compiler_flags = get_cpukernel_avx2_flags(), |
| platform_deps = get_cpukernel_avx2_deps(), |
| header_namespace = "ATen", |
| exported_headers = code_gen_files["headers"], |
| exported_preprocessor_flags = exported_preprocessor_flags, |
| headers = kwargs.pop("headers", []), |
| deps = lib_deps + [ |
| "//third_party:XNNPACK", |
| ], |
| **kwargs |
| ) |
| |
| def get_aten_derived_type_src_rules(aten_rule_name, enabled_backends): |
| return [ |
| ":{}[{}]".format(aten_rule_name, "Register" + backend + ".cpp") |
| for backend in enabled_backends |
| ] |
| |
| def get_aten_selective_cpp_rules(aten_rule_name, enabled_backends): |
| return [ |
| ":{}[{}]".format(aten_rule_name, f) |
| for f in ["RegisterCompositeImplicitAutograd.cpp", "RegisterCompositeExplicitAutograd.cpp", "RegisterSchema.cpp", "RegisterBackendSelect.cpp", "CompositeViewCopyKernels.cpp"] |
| ] + get_aten_derived_type_src_rules(aten_rule_name, enabled_backends) |
| |
| def get_aten_derived_type_srcs(enabled_backends): |
| return [ |
| "Register" + derived_type + ".cpp" |
| for derived_type in enabled_backends |
| ] + [ |
| derived_type + "Functions.h" |
| for derived_type in enabled_backends |
| if derived_type in PT_BACKEND_HEADERS or derived_type in get_static_dispatch_backend() |
| ] + [ |
| derived_type + "Functions_inl.h" |
| for derived_type in enabled_backends |
| if derived_type in PT_BACKEND_HEADERS or derived_type in get_static_dispatch_backend() |
| ] |
| |
| def pt_operator_query_codegen(name, deps = [], train = False, enforce_traced_op_list = False, pt_allow_forced_schema_registration = True, compatible_with = []): |
| oplist_dir_name = name + "_pt_oplist" |
| |
| # @lint-ignore BUCKLINT |
| fb_xplat_genrule( |
| name = oplist_dir_name, |
| cmd = ("$(exe //:gen_oplist) " + |
| "--model_file_list_path $(@query_outputs 'attrfilter(labels, pt_operator_library, deps(set({deps})))') " + |
| ("" if enforce_traced_op_list else "--allow_include_all_overloads ") + |
| "--output_dir $OUT ").format(deps = " ".join(["\"{}\"".format(d) for d in deps])), |
| outs = get_gen_oplist_outs(), |
| default_outs = ["."], |
| compatible_with = compatible_with, |
| ) |
| |
| # Aten files |
| aten_genrule = name + "_aten" |
| extra_flags = { |
| "enabled_backends": USED_PT_BACKENDS, |
| "op_selection_yaml_path": "$(location :{}[selected_operators.yaml])".format(oplist_dir_name), |
| } |
| |
| if train and pt_allow_forced_schema_registration: |
| extra_flags["force_schema_registration"] = True |
| |
| # if get_enable_lightweight_dispatch(): |
| # unboxing_genrule = name + "_unboxing" |
| # gen_aten_unboxing_files( |
| # unboxing_genrule, |
| # extra_flags = extra_flags, |
| # ) |
| |
| static_dispatch_backend = get_static_dispatch_backend() |
| if static_dispatch_backend: |
| extra_flags["static_dispatch_backend"] = static_dispatch_backend |
| |
| gen_aten_files( |
| aten_genrule, |
| extra_flags = extra_flags, |
| compatible_with = compatible_with, |
| ) |
| |
| # unboxing_wrappers files |
| extra_params = [ |
| "--operators_yaml_path", |
| "$(location :" + oplist_dir_name + "[selected_operators.yaml])", |
| ] |
| unboxing_and_autograd_genrule = name + "_unboxing_and_autograd" |
| gen_aten_libtorch_files(unboxing_and_autograd_genrule, extra_params, compatible_with) |
| |
| # Template runtime files (prim ops, etc) |
| template_registration_genrule = name + "_template_registration" |
| copy_template_registration_files(template_registration_genrule) |
| |
| srcs = get_aten_selective_cpp_rules( |
| aten_genrule, |
| static_dispatch_backend if static_dispatch_backend else USED_PT_BACKENDS, |
| ) + get_template_registration_file_rules( |
| template_registration_genrule, |
| ) + ([ |
| ":{}[autograd/generated/VariableType_0.cpp]".format(unboxing_and_autograd_genrule), |
| ":{}[autograd/generated/VariableType_1.cpp]".format(unboxing_and_autograd_genrule), |
| ":{}[autograd/generated/VariableType_2.cpp]".format(unboxing_and_autograd_genrule), |
| ":{}[autograd/generated/VariableType_3.cpp]".format(unboxing_and_autograd_genrule), |
| ":{}[autograd/generated/VariableType_4.cpp]".format(unboxing_and_autograd_genrule), |
| ":{}[autograd/generated/ADInplaceOrViewType_0.cpp]".format(unboxing_and_autograd_genrule), |
| ":{}[autograd/generated/ADInplaceOrViewType_1.cpp]".format(unboxing_and_autograd_genrule), |
| ] if train else []) + ([ |
| #":{}[SupportedMobileModelsRegistration.cpp]".format(oplist_dir_name), |
| ]) |
| |
| headers = { |
| "selected_mobile_ops.h": ":{}[selected_mobile_ops.h]".format(oplist_dir_name), |
| } |
| |
| # if get_enable_lightweight_dispatch(): |
| # srcs.extend([ |
| # ":{}[UnboxingFunctions_0.cpp]".format(unboxing_genrule), |
| # ":{}[UnboxingFunctions_1.cpp]".format(unboxing_genrule), |
| # ":{}[UnboxingFunctions_2.cpp]".format(unboxing_genrule), |
| # ":{}[UnboxingFunctions_3.cpp]".format(unboxing_genrule), |
| # ":{}[UnboxingFunctions_4.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_0.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_1.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_2.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_3.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_4.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_5.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_6.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_7.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_8.cpp]".format(unboxing_genrule), |
| # ":{}[RegisterCodegenUnboxedKernels_9.cpp]".format(unboxing_genrule), |
| # ]) |
| # headers["UnboxingFunctions.h"] = ":{}[UnboxingFunctions.h]".format(unboxing_genrule) |
| return {"headers": headers, "srcs": srcs} |
| |
| def gen_aten_libtorch_files(name, extra_params = [], compatible_with = []): |
| fb_xplat_genrule( |
| name = name, |
| outs = get_generate_code_bin_outs(), |
| default_outs = ["."], |
| cmd = "mkdir -p tools && " + |
| "$(exe //tools/setup_helpers:generate_code_bin) " + " ".join( |
| # Mobile build only needs libtorch - skip python bindings for now, except |
| # for ovrsource, which needs Python bindings. |
| (["--subset libtorch"] if not is_arvr_mode() else []) + [ |
| "--native-functions-path $(location :aten_src_path)/aten/src/ATen/native/native_functions.yaml", |
| "--tags-path $(location :aten_src_path)/aten/src/ATen/native/tags.yaml", # todo D35992309 |
| "--install_dir $OUT", |
| ] + extra_params, |
| ), |
| cmd_exe = "@powershell -Command New-Item -Path tools -ItemType Directory -Force; " + |
| "$(exe //tools/setup_helpers:generate_code_bin) " + " ".join( |
| # Mobile build only needs libtorch - skip python bindings for now, except |
| # for ovrsource, which needs Python bindings. |
| (["--subset libtorch"] if not is_arvr_mode() else []) + [ |
| "--native-functions-path $(location :aten_src_path)/aten/src/ATen/native/native_functions.yaml", |
| "--tags-path $(location :aten_src_path)/aten/src/ATen/native/tags.yaml", |
| "--install_dir $OUT", |
| ] + extra_params, |
| ), |
| compatible_with = compatible_with, |
| ) |
| |
| def copy_template_registration_files(name): |
| cmd = [] |
| cmd_exe = [] |
| |
| template_source_dict = get_template_source_dict() |
| |
| # Ideally, we would run one copy command for a single source directory along |
| # with all its child directories, but it's somewhat hard to know if a directory |
| # is a child of another just bu looking at the metadata (directory relative |
| # path) that we currently have since 1 directory could look like a parent of |
| # another and yet come from a different filegroup() rule. |
| # |
| for (path_prefix, file_paths) in template_source_dict.items(): |
| cmd.append("mkdir -p $OUT/{}".format(path_prefix)) |
| cmd_exe.append("md $OUT/{}".format(path_prefix)) |
| |
| # Adding *.cpp is a workaround to prevent cp from thrown an error when it |
| # encounters a directory (since -r was not specified). If files with an |
| # extension other than .cpp need to be copied, then the command below |
| # will not work and will need to be updated. |
| # |
| cmd.append("cp -f {0}/{1}/*.cpp $OUT/{1}/".format("$(location :templated_selective_build_srcs)", path_prefix)) |
| cmd_exe.append("robocopy /E {0}/{1} $OUT/{1}".format("$(location :templated_selective_build_srcs)", path_prefix)) |
| |
| cmd.append("mkdir -p $OUT/aten/src/ATen") |
| cmd_exe.append("md $OUT/aten/src/ATen") |
| |
| # NB: CUDA is skipped here because this is selective build and CUDA is not |
| # supported for selective build |
| for ufunc_file in aten_ufunc_generated_all_cpu_sources("$(location :gen_aten[{}])"): |
| cmd.append("cp -f " + ufunc_file + " $OUT/aten/src/ATen") |
| cmd_exe.append("copy " + ufunc_file + " $OUT/aten/src/ATen") |
| |
| fb_xplat_genrule( |
| name = name, |
| cmd = " && ".join(cmd), |
| cmd_exe = "@powershell -Command " + ("; ".join(cmd_exe)), |
| outs = get_template_registration_files_outs(), |
| default_outs = ["."], |
| ) |
| |
| def pt_operator_library( |
| name, |
| ops = [], |
| exported_deps = [], |
| check_decl = True, |
| train = False, |
| model = None, |
| include_all_operators = False, |
| **kwargs): |
| model_name = name |
| |
| if get_build_from_deps_query(): |
| ops = [op.strip() for op in ops] |
| |
| # If ops are specified, then we are in static selective build mode, so we append |
| # base ops to this list to avoid additional special case logic in subsequent code. |
| if len(ops) > 0: |
| ops.extend(PT_BASE_OPS) |
| |
| visibility = kwargs.pop("visibility", ["PUBLIC"]) |
| |
| fb_xplat_genrule( |
| name = name, |
| out = "model_operators.yaml", |
| cmd = ( |
| "$(exe :gen_operators_yaml) " + |
| "{optionally_root_ops} " + |
| "{optionally_training_root_ops} " + |
| "--rule_name {rule_name} " + |
| "--output_path \"${{OUT}}\" " + |
| "--model_name {model_name} " + |
| "--dep_graph_yaml_path pytorch_op_deps.yaml " + |
| "--models_yaml_path all_mobile_model_configs.yaml " + |
| #"{optionally_model_versions} " + |
| #"{optionally_model_assets} " + |
| #"{optionally_model_traced_backends} " + |
| "{optionally_include_all_operators}" |
| ).format( |
| rule_name = name, |
| model_name = model_name, |
| optionally_root_ops = "--root_ops " + (",".join(ops)) if len(ops) > 0 else "", |
| optionally_training_root_ops = "--training_root_ops " + (",".join(ops)) if len(ops) > 0 and train else "", |
| #optionally_model_versions = "--model_versions " + (",".join(model_versions)) if model_versions != None else "", |
| #optionally_model_assets = "--model_assets " + (",".join(model_assets)) if model_assets != None else "", |
| #optionally_model_traced_backends = "--model_traced_backends " + (",".join(model_traced_backends)) if model_traced_backends != None else "", |
| optionally_include_all_operators = "--include_all_operators " if include_all_operators else "", |
| ), |
| labels = ["pt_operator_library"], # for pt_operator_query_codegen query |
| visibility = visibility, |
| **kwargs |
| ) |
| else: |
| if check_decl: |
| pass |
| # ensure_ops_are_declared(ops) |
| |
| cxx_library( |
| name = name, |
| compiler_flags = get_pt_compiler_flags(), |
| cxx_platform_compiler_flags = get_cpukernel_avx2_flags(), |
| exported_deps = exported_deps, |
| **kwargs |
| ) |
| |
| def compose_platform_setting_list(settings): |
| """Settings object: |
| os/cpu pair: should be valid key, or at most one part can be wildcard. |
| flags: the values added to the compiler flags |
| """ |
| result = [] |
| for setting in settings: |
| result = result.append([ |
| "^{}-{}$".format(setting["os"], setting["cpu"]), |
| setting["flags"], |
| ]) |
| return result |
| |
| def get_cpukernel_avx2_flags(): |
| # flags = compose_platform_setting_list([ |
| # { |
| # "cpu": "x86_64", |
| # "flags": ["-DHAVE_AVX2_CPU_DEFINITION"], |
| # "os": "macosx", |
| # }, |
| # ]) if build_cpukernel_avx2() else [] |
| return [] |
| |
| def build_cpukernel_avx2(): |
| return not is_arvr_mode() |
| |
| def get_cpukernel_avx2_deps(): |
| # flags = compose_platform_setting_list([ |
| # { |
| # "cpu": "x86_64", |
| # "flags": ["fbsource//xplat/caffe2:cpukernel_avx2"], |
| # "os": "macosx", |
| # }, |
| # ]) if build_cpukernel_avx2() else [] |
| return [] |