blob: ccd3e3b7d847f557afcc02adeaa9c63bc1b5779d [file] [log] [blame]
# Owner(s): ["module: inductor"]
import sys
import unittest
from typing import NamedTuple
import torch._dynamo
from torch._inductor import config
from torch.testing._internal.common_utils import (
IS_MACOS,
slowTest,
TEST_WITH_ASAN,
TEST_WITH_ROCM,
TestCase as TorchTestCase,
)
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
try:
try:
from . import (
test_cpu_repro,
test_foreach,
test_mkldnn_pattern_matcher,
test_torchinductor,
test_torchinductor_dynamic_shapes,
)
except ImportError:
import test_cpu_repro
import test_foreach
import test_mkldnn_pattern_matcher
import test_torchinductor
import test_torchinductor_dynamic_shapes
except unittest.SkipTest:
if __name__ == "__main__":
sys.exit(0)
raise
RUN_CPU = HAS_CPU and not torch.backends.mps.is_available() and not IS_MACOS
RUN_CUDA = HAS_CUDA and not TEST_WITH_ASAN and not TEST_WITH_ROCM
class CppWrapperTemplate:
pass
class CudaWrapperTemplate:
pass
class TestCppWrapper(TorchTestCase):
device = "cpu"
class DynamicShapesCppWrapperCpuTests(TorchTestCase):
device = "cpu"
class TestCudaWrapper(TorchTestCase):
device = "cuda"
class DynamicShapesCudaWrapperCudaTests(TorchTestCase):
device = "cuda"
# conv2d will fallback for dynamic shapes; the fallback path is not yet supported
test_failures_cpp_wrapper = {
"test_conv2d_unary_cpu_dynamic_shapes": test_torchinductor.TestFailure(
("cpp_wrapper",), is_skip=True
),
}
def make_test_case(name, device, tests, condition=True, slow=False):
test_name = f"{name}_{device}" if device else name
@config.patch(cpp_wrapper=True, search_autotune_cache=False)
def fn(self):
tests.setUpClass()
tests.setUp()
try:
func = getattr(tests, test_name)
assert callable(func), "not a callable"
func = slowTest(func) if slow else func
code = test_torchinductor.run_and_get_cpp_code(func)
self.assertEqual("load_inline" in code, True)
finally:
tests.tearDown()
tests.tearDownClass()
fn.__name__ = test_name
if condition:
setattr(
CppWrapperTemplate if device == "cpu" else CudaWrapperTemplate,
test_name,
fn,
)
if RUN_CPU:
class BaseTest(NamedTuple):
name: str
device: str = "cpu"
tests: TorchTestCase = test_torchinductor.CpuTests()
condition: bool = True
slow: bool = False
for item in [
BaseTest("test_as_strided"), # buffer reuse
BaseTest("test_bitwise"), # int32
BaseTest("test_bmm1"),
BaseTest("test_bmm2"),
BaseTest("test_cat"), # alias
BaseTest(
"test_conv2d_unary",
"cpu",
test_mkldnn_pattern_matcher.TestPaternMatcher(),
condition=torch._C.has_mkldnn,
slow=True,
),
BaseTest("test_dtype_sympy_expr"),
BaseTest("test_embedding_bag"), # test default FallbackKernel
BaseTest("test_index_put_deterministic_fallback"),
BaseTest("test_int_div", "", test_cpu_repro.CPUReproTests()),
BaseTest("test_linear1"),
BaseTest("test_linear2"),
BaseTest(
"test_linear_binary",
"",
test_mkldnn_pattern_matcher.TestPaternMatcher(),
torch._C.has_mkldnn and torch.ops.mkldnn._is_mkldnn_bf16_supported(),
),
BaseTest("test_linear_packed", "", test_cpu_repro.CPUReproTests()),
BaseTest("test_mm_views"),
BaseTest("test_profiler_mark_wrapper_call"),
BaseTest("test_reduction1"), # Reduction
BaseTest("test_relu"), # multiple inputs
BaseTest("test_repeat_interleave", "", test_cpu_repro.CPUReproTests()),
BaseTest("test_scalar_input"),
BaseTest("test_silu"), # single input, single output
BaseTest("test_sort"),
BaseTest("test_sum_dtype"), # float64
BaseTest("test_sum_int"), # bool, int64, int8, uint8
BaseTest("test_transpose"), # multiple outputs, buffer clear
]:
make_test_case(item.name, item.device, item.tests, item.condition, item.slow)
test_torchinductor.copy_tests(CppWrapperTemplate, TestCppWrapper, "cpp_wrapper")
DynamicShapesCppWrapperTemplate = (
test_torchinductor_dynamic_shapes.make_dynamic_cls(CppWrapperTemplate)
)
test_torchinductor.copy_tests(
DynamicShapesCppWrapperTemplate,
DynamicShapesCppWrapperCpuTests,
"cpp_wrapper",
test_failures_cpp_wrapper,
)
if RUN_CUDA:
class BaseTest(NamedTuple):
name: str
device: str = "cuda"
tests: TorchTestCase = test_torchinductor.CudaTests()
# Maintain two separate test lists for cuda and cpp for now
for item in [
BaseTest("test_as_strided"), # buffer reuse
BaseTest("test_bitwise"), # int32
BaseTest("test_bmm1"),
BaseTest("test_bmm2"),
BaseTest("test_cat"), # alias
BaseTest("test_convolution1"),
BaseTest("test_conv_backward"),
BaseTest("test_embedding_bag"), # test default FallbackKernel
BaseTest("test_index_put_deterministic_fallback"),
BaseTest("test_linear1"),
# BaseTest("test_linear2"),
BaseTest("test_mm_views"),
BaseTest("test_multi_device"),
BaseTest("test_profiler_mark_wrapper_call"),
BaseTest("test_reduction1"), # Reduction
BaseTest("test_relu"), # multiple inputs
BaseTest("test_scalar_input"),
BaseTest("test_sort"),
BaseTest("test_silu"), # single input, single output
BaseTest("test_sum_dtype"), # float64
BaseTest("test_sum_int"), # bool, int64, int8, uint8
BaseTest("test_transpose"), # multiple outputs, buffer clear
BaseTest(
"test_foreach_cpp_wrapper",
device=None,
tests=test_foreach.ForeachTests(),
), # test foreach
]:
make_test_case(item.name, item.device, item.tests)
test_torchinductor.copy_tests(CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper")
DynamicShapesCudaWrapperTemplate = (
test_torchinductor_dynamic_shapes.make_dynamic_cls(CudaWrapperTemplate)
)
test_torchinductor.copy_tests(
DynamicShapesCudaWrapperTemplate,
DynamicShapesCudaWrapperCudaTests,
"cuda_wrapper",
)
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
if RUN_CPU or RUN_CUDA:
run_tests(needs="filelock")