| import torch |
| import functools |
| import warnings |
| import numpy as np |
| from torch._six import container_abcs, string_classes |
| |
| |
| class autocast(object): |
| r""" |
| Instances of :class:`autocast` serve as context managers or decorators that |
| allow regions of your script to run in mixed precision. |
| |
| In these regions, CUDA ops run in an op-specific dtype chosen by autocast |
| to improve performance while maintaining accuracy. |
| See the :ref:`Autocast Op Reference<autocast-op-reference>` for details. |
| |
| When entering an autocast-enabled region, Tensors may be any type. |
| You should not call ``.half()`` on your model(s) or inputs when using autocasting. |
| |
| :class:`autocast` should wrap only the forward pass(es) of your network, including the loss |
| computation(s). Backward passes under autocast are not recommended. |
| Backward ops run in the same type that autocast used for corresponding forward ops. |
| |
| Example:: |
| |
| # Creates model and optimizer in default precision |
| model = Net().cuda() |
| optimizer = optim.SGD(model.parameters(), ...) |
| |
| for input, target in data: |
| optimizer.zero_grad() |
| |
| # Enables autocasting for the forward pass (model + loss) |
| with autocast(): |
| output = model(input) |
| loss = loss_fn(output, target) |
| |
| # Exits the context manager before backward() |
| loss.backward() |
| optimizer.step() |
| |
| See the :ref:`Automatic Mixed Precision examples<amp-examples>` for usage (along with gradient scaling) |
| in more complex scenarios (e.g., gradient penalty, multiple models/losses, custom autograd functions). |
| |
| :class:`autocast` can also be used as a decorator, e.g., on the ``forward`` method of your model:: |
| |
| class AutocastModel(nn.Module): |
| ... |
| @autocast() |
| def forward(self, input): |
| ... |
| |
| Floating-point Tensors produced in an autocast-enabled region may be ``float16``. |
| After returning to an autocast-disabled region, using them with floating-point |
| Tensors of different dtypes may cause type mismatch errors. If so, cast the Tensor(s) |
| produced in the autocast region back to ``float32`` (or other dtype if desired). |
| If a Tensor from the autocast region is already ``float32``, the cast is a no-op, |
| and incurs no additional overhead. Example:: |
| |
| # Creates some tensors in default dtype (here assumed to be float32) |
| a_float32 = torch.rand((8, 8), device="cuda") |
| b_float32 = torch.rand((8, 8), device="cuda") |
| c_float32 = torch.rand((8, 8), device="cuda") |
| d_float32 = torch.rand((8, 8), device="cuda") |
| |
| with autocast(): |
| # torch.mm is on autocast's list of ops that should run in float16. |
| # Inputs are float32, but the op runs in float16 and produces float16 output. |
| # No manual casts are required. |
| e_float16 = torch.mm(a_float32, b_float32) |
| # Also handles mixed input types |
| f_float16 = torch.mm(d_float32, e_float16) |
| |
| # After exiting autocast, calls f_float16.float() to use with d_float32 |
| g_float32 = torch.mm(d_float32, f_float16.float()) |
| |
| Type mismatch errors *in* an autocast-enabled region are a bug; if this is what you observe, |
| please file an issue. |
| |
| ``autocast(enabled=False)`` subregions can be nested in autocast-enabled regions. |
| Locally disabling autocast can be useful, for example, if you want to force a subregion |
| to run in a particular ``dtype``. Disabling autocast gives you explicit control over |
| the execution type. In the subregion, inputs from the surrounding region |
| should be cast to ``dtype`` before use:: |
| |
| # Creates some tensors in default dtype (here assumed to be float32) |
| a_float32 = torch.rand((8, 8), device="cuda") |
| b_float32 = torch.rand((8, 8), device="cuda") |
| c_float32 = torch.rand((8, 8), device="cuda") |
| d_float32 = torch.rand((8, 8), device="cuda") |
| |
| with autocast(): |
| e_float16 = torch.mm(a_float32, b_float32) |
| |
| with autocast(enabled=False): |
| # Calls e_float16.float() to ensure float32 execution |
| # (necessary because e_float16 was created in an autocasted region) |
| f_float32 = torch.mm(c_float32, e_float16.float()) |
| |
| # No manual casts are required when re-entering the autocast-enabled region. |
| # torch.mm again runs in float16 and produces float16 output, regardless of input types. |
| g_float16 = torch.mm(d_float32, f_float32) |
| |
| The autocast state is thread-local. If you want it enabled in a new thread, the context manager or decorator |
| must be invoked in that thread. This affects :class:`torch.nn.DataParallel` and |
| :class:`torch.nn.parallel.DistributedDataParallel` when used with more than one GPU per process |
| (see :ref:`Working with Multiple GPUs<amp-multigpu>`). |
| |
| Arguments: |
| enabled(bool, optional, default=True): Whether autocasting should be enabled in the region. |
| """ |
| def __init__(self, enabled=True): |
| if enabled and not torch.cuda.is_available(): |
| warnings.warn("torch.cuda.amp.autocast only affects CUDA ops, but CUDA is not available. Disabling.") |
| self._enabled = False |
| else: |
| self._enabled = enabled |
| |
| def __enter__(self): |
| self.prev = torch.is_autocast_enabled() |
| torch.set_autocast_enabled(self._enabled) |
| torch.autocast_increment_nesting() |
| |
| def __exit__(self, *args): |
| # Drop the cache when we exit to a nesting level that's outside any instance of autocast. |
| if torch.autocast_decrement_nesting() == 0: |
| torch.clear_autocast_cache() |
| torch.set_autocast_enabled(self.prev) |
| return False |
| |
| def __call__(self, func): |
| @functools.wraps(func) |
| def decorate_autocast(*args, **kwargs): |
| with self: |
| return func(*args, **kwargs) |
| return decorate_autocast |
| |
| |
| # Casts Tensors and containers of Tensors. Special-cases passthroughs for strings and np.ndarrays, which |
| # may be falsely detected as "Iterables." |
| def _cast(value, dtype): |
| if isinstance(value, torch.Tensor): |
| is_eligible = (value.is_floating_point() and value.is_cuda and (value.dtype is not torch.float64)) |
| return value.to(dtype) if is_eligible else value |
| elif isinstance(value, string_classes): |
| return value |
| elif isinstance(value, np.ndarray): |
| return value |
| elif isinstance(value, container_abcs.Mapping): |
| return {_cast(k, dtype): _cast(v, dtype) for k, v in value.items()} |
| elif isinstance(value, container_abcs.Iterable): |
| return type(value)(_cast(v, dtype) for v in value) |
| else: |
| return value |
| |
| |
| # custom_fwd is a decorator that may or may not be used with arguments, following |
| # https://github.com/dabeaz/python-cookbook/tree/master/src/9/defining_a_decorator_that_takes_an_optional_argument. |
| # this works: |
| # @custom_fwd |
| # def forward(...): |
| # this also works: |
| # @custom_fwd(cast_inputs=torch.float) |
| # def forward(...): |
| # TODO: when python 2 support is dropped, change the signature to |
| # def custom_fwd(fwd=None, *, cast_inputs=None) with internal changes following the link above. |
| def custom_fwd(fwd=None, **kwargs): |
| """ |
| Helper decorator for ``forward`` methods of custom autograd functions (subclasses of |
| :class:`torch.autograd.Function`). See the :ref:`example page<amp-custom-examples>` for more detail. |
| |
| Arguments: |
| cast_inputs (:class:`torch.dtype` or None, optional, default=None): If not ``None``, |
| when ``forward`` runs in an autocast-enabled region, casts incoming |
| floating-point CUDA Tensors to the target dtype (non-floating-point Tensors are not affected), |
| then executes ``forward`` with autocast disabled. |
| If ``None``, ``forward``'s internal ops execute with the current autocast state. |
| |
| .. note:: |
| If the decorated ``forward`` is called outside an autocast-enabled region, |
| :func:`custom_fwd<custom_fwd>` is a no-op and ``cast_inputs`` has no effect. |
| """ |
| if fwd is None: |
| if len(kwargs) == 0: |
| cast_inputs = None |
| else: |
| assert len(kwargs) == 1 |
| cast_inputs = kwargs["cast_inputs"] |
| return functools.partial(custom_fwd, cast_inputs=cast_inputs) |
| |
| if len(kwargs) == 0: |
| cast_inputs = None |
| else: |
| assert len(kwargs) == 1 |
| cast_inputs = kwargs["cast_inputs"] |
| |
| @functools.wraps(fwd) |
| def decorate_fwd(*args, **kwargs): |
| if cast_inputs is None: |
| args[0]._fwd_used_autocast = torch.is_autocast_enabled() |
| return fwd(*args, **kwargs) |
| else: |
| autocast_context = torch.is_autocast_enabled() |
| args[0]._fwd_used_autocast = False |
| if autocast_context: |
| with autocast(enabled=False): |
| return fwd(*_cast(args, cast_inputs), **_cast(kwargs, cast_inputs)) |
| else: |
| return fwd(*args, **kwargs) |
| return decorate_fwd |
| |
| |
| # Autograd ensures incoming gradients are the same type as forward outputs. Allowing a separate |
| # cast_inputs argument on custom_bwd is unnecessary and could cause errors if it doesn't match |
| # cast_inputs supplied to custom_fwd. |
| def custom_bwd(bwd): |
| """ |
| Helper decorator for backward methods of custom autograd functions (subclasses of |
| :class:`torch.autograd.Function`). |
| Ensures that ``backward`` executes with the same autocast state as ``forward``. |
| See the :ref:`example page<amp-custom-examples>` for more detail. |
| """ |
| @functools.wraps(bwd) |
| def decorate_bwd(*args, **kwargs): |
| with autocast(args[0]._fwd_used_autocast): |
| return bwd(*args, **kwargs) |
| return decorate_bwd |