| # Owner(s): ["module: inductor"] |
| import functools |
| import re |
| import sys |
| import unittest |
| from importlib.machinery import SourceFileLoader |
| from pathlib import Path |
| from unittest import mock |
| |
| import torch |
| import torch.nn as nn |
| from torch import _inductor as inductor |
| from torch._dynamo import compiled_autograd |
| from torch._dynamo.test_case import run_tests, TestCase |
| from torch._dynamo.utils import counters |
| from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA |
| |
| # note: these tests are not run on windows due to inductor_utils.HAS_CPU |
| |
| |
| def compiler_fn(gm): |
| """Same as torch.compile() but counts number of compiles""" |
| |
| def inner_compiler(gm_, example_inputs_): |
| counters["compiled_autograd"]["compiles"] += 1 |
| return inductor.compile(gm_, example_inputs_) |
| |
| return torch.compile(gm, backend=inner_compiler, fullgraph=True, dynamic=True) |
| |
| |
| # TODO(jansel): hooks as lambdas creates recompiles in dynamo, we should fix that |
| def hook1(grad): |
| return grad * 2 |
| |
| |
| def hook2(grads): |
| return (grads[0] + 1,) |
| |
| |
| def hook3(gI, gO): |
| return (torch.sin(gI[0]) + gO[0],) |
| |
| |
| class TestCompiledAutograd(TestCase): |
| def check_output_and_recompiles(self, fn, count=1): |
| with torch.autograd.set_multithreading_enabled(False): |
| torch._dynamo.reset() |
| counters["compiled_autograd"].clear() |
| torch.manual_seed(123) |
| expected = list(fn()) |
| torch.manual_seed(123) |
| with compiled_autograd.enable(compiler_fn): |
| actual = list(fn()) |
| self.assertEqual(expected, actual) |
| self.assertEqual(counters["compiled_autograd"]["captures"], count) |
| self.assertEqual(counters["compiled_autograd"]["compiles"], count) |
| |
| def test_basic(self): |
| def fn(): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| x = torch.randn([2, 4]) |
| result = model(x).sum() |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| yield model[2].weight.grad |
| yield model[2].bias.grad |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_cache_hit(self): |
| def fn(): |
| for _ in range(3): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| x = torch.randn([2, 4]) |
| result = model(x).sum() |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| yield model[2].weight.grad |
| yield model[2].bias.grad |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_tensor_grad_hook1(self): |
| def fn(): |
| for _ in range(3): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| x = torch.randn([2, 4]) |
| |
| model[0].weight.register_hook(hook1) |
| |
| result = model(x).sum() |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_tensor_grad_hook2(self): |
| def fn(): |
| for _ in range(3): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| x = torch.randn([1, 4]) |
| |
| result = model(x).sum() |
| result.grad_fn.register_prehook(hook2) |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_tensor_grad_hook3(self): |
| def fn(): |
| for _ in range(3): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| x = torch.randn([1, 4]) |
| |
| result = model(x).sum() |
| result.grad_fn.register_hook(hook3) |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_torch_compile(self): |
| def fn(): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.Sigmoid(), |
| ) |
| opt_model = torch.compile(model, fullgraph=True) |
| |
| for _ in range(3): |
| x = torch.randn([1, 4]) |
| |
| result = opt_model(x).sum() |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| model.zero_grad() |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_implicit_add(self): |
| def fn(): |
| y = torch.randn(1, 4, requires_grad=True) |
| |
| def model(x): |
| # y is used multiple times, gradients get added |
| return torch.sigmoid(x * y + torch.sin(y) + torch.cos(y)) |
| |
| for _ in range(3): |
| x = torch.randn([1, 4]) |
| |
| result = model(x).sum() |
| result.backward() |
| yield result |
| yield y.grad |
| y.grad = None |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_output_nodes(self): |
| def fn(): |
| y = torch.randn(1, 4, requires_grad=True) |
| z = torch.randn(1, 4, requires_grad=True) |
| |
| def model(x): |
| return torch.sigmoid(x * z + torch.sin(y) + torch.cos(y)) |
| |
| for _ in range(3): |
| x = torch.randn([1, 4]) |
| |
| result = model(x).sum() |
| gy, gz = torch.autograd.grad(result, [y, z]) |
| assert y.grad is None |
| assert z.grad is None |
| yield gy |
| yield gz |
| |
| self.check_output_and_recompiles(fn) |
| |
| def test_dynamic_shapes(self): |
| def fn(): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| opt_model = torch.compile(model, dynamic=True) |
| |
| for b in range(10, 100, 10): |
| x = torch.randn([b, 4]) |
| result = opt_model(x).sum() |
| result.backward() |
| yield model[0].weight.grad |
| yield model[0].bias.grad |
| yield model[2].weight.grad |
| yield model[2].bias.grad |
| model.zero_grad() |
| |
| # TODO(jansel): we should be able to get this count to 1 |
| self.check_output_and_recompiles(fn, count=2) |
| |
| def test_accumulate_without_zero(self): |
| def fn(): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| opt_model = torch.compile(model, dynamic=True) |
| |
| for _ in range(10): |
| x = torch.randn([10, 4]) |
| result = opt_model(x).sum() |
| result.backward() |
| yield model[0].weight.grad.clone() |
| yield model[0].bias.grad.clone() |
| yield model[2].weight.grad.clone() |
| yield model[2].bias.grad.clone() |
| |
| self.check_output_and_recompiles(fn, count=2) |
| |
| def test_inplace_grad_update(self): |
| def fn(): |
| model = torch.nn.Sequential( |
| torch.nn.Linear(4, 4), |
| torch.nn.ReLU(), |
| ) |
| opt_model = torch.compile(model, dynamic=True) |
| |
| for _ in range(10): |
| w_grad = torch.rand_like(model[0].weight) |
| b_grad = torch.rand_like(model[0].bias) |
| model[0].weight.grad = w_grad |
| model[0].bias.grad = b_grad |
| |
| x = torch.randn([10, 4]) |
| result = opt_model(x).sum() |
| result.backward() |
| assert model[0].weight.grad is w_grad |
| assert model[0].bias.grad is b_grad |
| yield w_grad.clone() |
| yield b_grad.clone() |
| |
| self.check_output_and_recompiles(fn, count=1) |
| |
| @unittest.skipIf(not HAS_CUDA, "requires cuda") |
| def test_issue106555(self): |
| DEVICE = torch.device("cuda:0") |
| NUM_FEATURES = 256 |
| |
| def bias_sigmoid_mul(x1, x2, bias): |
| x2 = torch.sigmoid(x2 + bias) |
| y = x1 * x2 |
| return y |
| |
| bias_sigmoid_mul_jit = torch.compile(bias_sigmoid_mul) |
| |
| class ModuleWithJit(nn.Module): |
| def __init__(self): |
| super().__init__() |
| self.linear_1 = nn.Linear(NUM_FEATURES, NUM_FEATURES, bias=True) |
| self.linear_2 = nn.Linear(NUM_FEATURES, NUM_FEATURES, bias=False) |
| self.linear_2_bias = nn.Parameter(torch.zeros(NUM_FEATURES)) |
| |
| def forward(self, input_tensor): |
| x1 = self.linear_1(input_tensor) |
| x2 = self.linear_2(input_tensor) |
| output = bias_sigmoid_mul_jit(x1, x2, self.linear_2_bias) |
| return output |
| |
| class Model(nn.Module): |
| def __init__(self): |
| super().__init__() |
| self.module_with_jit_1 = ModuleWithJit() |
| self.module_with_jit_2 = ModuleWithJit() |
| |
| def forward(self, x, gradient_checkpointing: bool): |
| if gradient_checkpointing: |
| y = torch.utils.checkpoint.checkpoint( |
| self._forward, x, use_reentrant=True |
| ) |
| else: |
| y = self._forward(x) |
| return y |
| |
| def _forward(self, x): |
| x = x + self.module_with_jit_1(x) |
| x = x + self.module_with_jit_2(x.transpose(-2, -3)).transpose(-2, -3) |
| return x |
| |
| torch.cuda.set_device(device=DEVICE) |
| torch.manual_seed(1234567890) |
| model = Model() |
| model.train() |
| model.to(device=DEVICE) |
| model_parameters = list(model.parameters()) |
| |
| torch.manual_seed(1234567890) |
| input_tensor = torch.randn(1, 128, 256, NUM_FEATURES).to(device=DEVICE) |
| input_tensor.requires_grad = True |
| target_tensor = torch.randn(1, 128, 256, NUM_FEATURES).to( |
| dtype=input_tensor.dtype, device=DEVICE |
| ) |
| |
| for iteration in range(10): |
| for param in model_parameters: |
| param.grad = None |
| output_tensor = model( |
| x=input_tensor.clone(), |
| gradient_checkpointing=True, |
| ) |
| loss = torch.mean(torch.abs(target_tensor - output_tensor)) |
| loss.backward() |
| |
| |
| def load_test_module(name): |
| testdir = Path(__file__).absolute().parent.parent |
| with mock.patch("sys.path", [*sys.path, str(testdir)]): |
| return SourceFileLoader( |
| name, str(testdir / f"{name.replace('.', '/')}.py") |
| ).load_module() |
| |
| |
| test_autograd = load_test_module("test_autograd") |
| |
| |
| class EagerAutogradTests(TestCase): |
| @classmethod |
| def add_test(cls, name, fn): |
| @functools.wraps(fn) |
| def wrapped(self: EagerAutogradTests): |
| torch._dynamo.reset() |
| try: |
| with compiled_autograd.enable(compiler_fn): |
| return fn(self) |
| except Exception as e: |
| if not_implemented_re.search(str(e)): |
| raise unittest.SkipTest("not implemented") |
| raise |
| |
| if skip_re.match(name) or name in skips or not callable(fn): |
| return |
| elif name.startswith("test"): |
| setattr(cls, name, wrapped) |
| else: |
| setattr(cls, name, fn) |
| |
| |
| not_implemented_re = re.compile( |
| r"|".join( |
| map( |
| re.escape, |
| [ |
| # compiled autograd nyi errors: |
| "compiled_autograd does not support", |
| "not supported by compiled autograd", |
| "not yet implemented for compiled autograd", |
| "not implemented for compiled autograd", |
| "has no attribute '_compiled_autograd_key'", |
| # make_fx() tracing errors: |
| "Cannot access storage of BatchedTensorImpl", |
| "Cannot access storage of SparseTensorImpl", |
| "data dependent operator:", |
| "dynamic shape operator:", |
| # inductor errors: |
| "inductor does not support", |
| ], |
| ) |
| ) |
| ) |
| |
| # These groups of tests aren't supported yet |
| skip_re = re.compile(r"^test_(sparse|profiler|gradcheck|checkpoint|named_tensor)") |
| |
| # Bugs needing investigation: |
| skips = { |
| "test_current_graph_task_execution_order", # torch._dynamo.exc.TorchRuntimeError: Failed running call_function < |
| "test_input_buffer_accum", # RuntimeError: Cannot access data pointer of Tensor that doesn't have storage |
| "test_graph_save_on_cpu_cuda", # AssertionError: 0 not greater than 0 |
| "test_graph_save_on_cpu", # torch._dynamo.exc.BackendCompilerFailed: backend='inner_compiler' raised: |
| "test_reentrant_with_leaf_variable_hook", # torch._dynamo.exc.Unsupported: inline in skipfiles: RemovableHandle. |
| "test_reentrant_with_non_leaf_variable_hook", # torch._dynamo.exc.Unsupported: inline in skipfiles: RemovableHan |
| "test_saved_variable_saved_original_inplace_detach", # AssertionError: RuntimeError not raised |
| "test_saving_variable_to_disk", # Cannot call numel() on tensor with symbolic sizes/strides |
| "test_setitem_mask", # torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode: It appears that you're |
| "test_tensor_hooks_inplace_over_view", # torch._dynamo.exc.Unsupported: call_function UserDefinedClassVariable() [] {} |
| "test_tensor_hooks_inplace", # torch._dynamo.exc.Unsupported: call_function UserDefinedClassVariable() [] {} |
| "test_wrapped_number_saved_variable_hooks", # RuntimeError: this hook should not be called |
| } |
| |
| if not HAS_CUDA: |
| # Found Tesla M60 which is too old to be supported by the triton GPU compiler |
| skips.add("test_type_conversions") |
| |
| for name, fn in test_autograd.TestAutograd.__dict__.items(): |
| EagerAutogradTests.add_test(name, fn) |
| |
| |
| if __name__ == "__main__": |
| if HAS_CPU: |
| run_tests(needs="filelock") |