torch/jit/cuda.py - platform/external/pytorch - Git at Google

 # mypy: ignore-errors

 r"""
 This package adds support for JIT compilation for CUDA Streams and events,
 This is similar to API's available in the eager mode
 :ref:`cuda-semantics` has more details about working with CUDA.
 """

 import torch
 from typing import Optional, Any
 from torch import device as _device

 def get_current_device_index() -> int:
     r"""Checks if there are CUDA devices available and
     returns the device index of the current default CUDA device.
     Returns -1 in case there are no CUDA devices available.

     Arguments: ``None``
     """
     if torch.cuda.device_count() > 0:
         return torch.cuda._current_device()
     return -1

 def get_device_index(device: Optional[_device] = None, optional: bool = False, allow_cpu: bool = False) -> int:
     r"""Gets the device index from :attr:`device`, which can be a torch.device
     object, a Python integer, or ``None``.

     If :attr:`device` is a torch.device object, returns the device index if it
     is a CUDA device. Note that for a CUDA device without a specified index,
     , this will return the current default CUDA device if :attr:`optional` is ``True``.
     If :attr:`allow_cpu` is ``True``,CPU devices will be accepted and ``-1`` will be
     returned in this case.

     If :attr:`device` is a Python integer, it is returned as is.

     If :attr:`device` is ``None``, this will return the current default CUDA
     device if :attr:`optional` is ``True``.
     """
     if device is None:
         if optional:
             return get_current_device_index()
         else:
             raise ValueError('Expected a torch.device with a specified index '
                              f'or an integer, but got: {device}')
     device_index = -1
     if isinstance(device, str):
         device = torch.device(device)

     if isinstance(device, torch.device):
         if not allow_cpu and device.type == 'cpu':
             raise ValueError(f'Expected a non cpu device, but got: {device}')
         device_index = -1 if device.type == 'cpu' else torch.cuda.device_index(device)

     if isinstance(device, int):
         device_index = device

     return device_index

 class device(object):
     r"""Context-manager that changes the selected device.
     This is similar to device (torch.device or int), but has been
     introduced for JIT compatibility.
     Arguments:
         device (torch.device or int): device index to select. It's a no-op if
             this argument is a negative integer or ``None``.
     """
     def __init__(self, device: Optional[_device]):
         self.idx = -1
         self.prev_idx = -1
         self.device = device

     def __enter__(self):
         self.idx = get_device_index(self.device, optional=True)

         if self.idx == -1:
             return
         self.prev_idx = torch.cuda._current_device()

         if self.prev_idx != self.idx:
             torch.cuda._set_device(self.idx)

     def __exit__(self, type: Any, value: Any, traceback: Any):
         if self.prev_idx != self.idx:
             torch.cuda._set_device(self.prev_idx)

 class StreamContext(object):
     r"""Context-manager that selects a given stream.
     All CUDA kernels queued within its context will be enqueued on a selected
     stream.
     Arguments:
         StreamContext (Stream): selected stream. This manager is a no-op if it's
             ``None``.
     .. note:: Streams are per-device. If the selected stream is not on the
         current device, this function will also change the current device to
         match the stream.
     """
     cur_stream : Optional['torch.classes.cuda.Stream']

     def __init__(self, stream: Optional['torch.classes.cuda.Stream']):
         self.idx = -1
         self.stream = stream
         # Initialize the below streams to default stream on the current device
         self.device_index = get_current_device_index()
         self.src_prev_stream = torch.cuda.default_stream(self.device_index)
         self.dst_prev_stream = torch.cuda.default_stream(self.device_index)

     def __enter__(self):
         self.idx = get_device_index(device=None, optional=True)
         # If there is no CUDA device available, return
         if self.idx == -1:
             return

         # Local cur_stream variable for type refinement
         cur_stream = self.stream
         # Return if stream is None
         if cur_stream is None:
             return
         self.src_prev_stream = torch.cuda.current_stream(self.idx)
         # If the stream is not on the current device, then change the device
         # and set the current stream on the device
         if self.src_prev_stream.device_index() != cur_stream.device_index():
             with device(cur_stream.device()):
                 self.dst_prev_stream = torch.cuda.current_stream(cur_stream.device_index())
             torch.cuda._set_device(cur_stream.device_index())
         torch.cuda.set_stream(cur_stream)

     def __exit__(self, type: Any, value: Any, traceback: Any):
         # Local cur_stream variable for type refinement
         cur_stream = self.stream
         # If stream is None or no CUDA device available, return
         if cur_stream is None or self.idx == -1:
             return
         # If the stream was not on the current device, restore the previous stream on
         # the destination device and also reset the current device to the previous device.
         # Set the current stream on the device to the src_prev_stream
         if self.src_prev_stream.device_index() != cur_stream.device_index():
             torch.cuda.set_stream(self.dst_prev_stream)
             torch.cuda._set_device(self.idx)
         torch.cuda.set_stream(self.src_prev_stream)

 def stream(stream: Optional['torch.classes.cuda.Stream']) -> StreamContext:
     r"""Wrapper around the Context-manager that selects a given stream.
     All CUDA kernels queued within its context will be enqueued on a selected
     stream.
     Arguments:
         stream (Stream): selected stream. This manager is a no-op if it's
             ``None``.
     """
     return StreamContext(stream)

 def Stream(device: int = -1, priority: int = 0) -> 'torch.classes.cuda.Stream':
     r"""Wrapper around a CUDA stream.
     A CUDA stream is a linear sequence of execution that belongs to a specific
     device, independent from other streams.  See :ref:`cuda-semantics` for
     details.
     Arguments:
         device(int, optional): a device on which to allocate
             the stream. If :attr:`device` is ``None`` (default) or a negative
             integer, this will use the current device.
         priority(int, optional): priority of the stream. Can be either
             -1 (high priority) or 0 (low priority). By default, streams have
             priority 0.
     .. note:: Although CUDA versions >= 11 support more than two levels of
         priorities, in PyTorch, we only support two levels of priorities.
     """
     return torch.classes.cuda.Stream(device, priority)

 def Event(enable_timing: bool = False, blocking: bool = False, interprocess: bool = False) -> 'torch.classes.cuda.Event':
     r"""Wrapper around a CUDA event.
     CUDA events are synchronization markers that can be used to monitor the
     device's progress, to accurately measure timing, and to synchronize CUDA
     streams.
     Arguments:
         enable_timing (bool, optional): indicates if the event should measure time
             (default: ``False``)
         blocking (bool, optional): if ``True``, :meth:`wait` will be blocking (default: ``False``)
         interprocess (bool): if ``True``, the event can be shared between processes
             (default: ``False``)
     .. _CUDA Event Documentation:
        https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
     """
     return torch.classes.cuda.Event(enable_timing, blocking, interprocess)
	# mypy: ignore-errors

	r"""
	This package adds support for JIT compilation for CUDA Streams and events,
	This is similar to API's available in the eager mode
	:ref:`cuda-semantics` has more details about working with CUDA.
	"""

	import torch
	from typing import Optional, Any
	from torch import device as _device

	def get_current_device_index() -> int:
	r"""Checks if there are CUDA devices available and
	returns the device index of the current default CUDA device.
	Returns -1 in case there are no CUDA devices available.

	Arguments: ``None``
	"""
	if torch.cuda.device_count() > 0:
	return torch.cuda._current_device()
	return -1

	def get_device_index(device: Optional[_device] = None, optional: bool = False, allow_cpu: bool = False) -> int:
	r"""Gets the device index from :attr:`device`, which can be a torch.device
	object, a Python integer, or ``None``.

	If :attr:`device` is a torch.device object, returns the device index if it
	is a CUDA device. Note that for a CUDA device without a specified index,
	, this will return the current default CUDA device if :attr:`optional` is ``True``.
	If :attr:`allow_cpu` is ``True``,CPU devices will be accepted and ``-1`` will be
	returned in this case.

	If :attr:`device` is a Python integer, it is returned as is.

	If :attr:`device` is ``None``, this will return the current default CUDA
	device if :attr:`optional` is ``True``.
	"""
	if device is None:
	if optional:
	return get_current_device_index()
	else:
	raise ValueError('Expected a torch.device with a specified index '
	f'or an integer, but got: {device}')
	device_index = -1
	if isinstance(device, str):
	device = torch.device(device)

	if isinstance(device, torch.device):
	if not allow_cpu and device.type == 'cpu':
	raise ValueError(f'Expected a non cpu device, but got: {device}')
	device_index = -1 if device.type == 'cpu' else torch.cuda.device_index(device)

	if isinstance(device, int):
	device_index = device

	return device_index

	class device(object):
	r"""Context-manager that changes the selected device.
	This is similar to device (torch.device or int), but has been
	introduced for JIT compatibility.
	Arguments:
	device (torch.device or int): device index to select. It's a no-op if
	this argument is a negative integer or ``None``.
	"""
	def __init__(self, device: Optional[_device]):
	self.idx = -1
	self.prev_idx = -1
	self.device = device

	def __enter__(self):
	self.idx = get_device_index(self.device, optional=True)

	if self.idx == -1:
	return
	self.prev_idx = torch.cuda._current_device()

	if self.prev_idx != self.idx:
	torch.cuda._set_device(self.idx)

	def __exit__(self, type: Any, value: Any, traceback: Any):
	if self.prev_idx != self.idx:
	torch.cuda._set_device(self.prev_idx)

	class StreamContext(object):
	r"""Context-manager that selects a given stream.
	All CUDA kernels queued within its context will be enqueued on a selected
	stream.
	Arguments:
	StreamContext (Stream): selected stream. This manager is a no-op if it's
	``None``.
	.. note:: Streams are per-device. If the selected stream is not on the
	current device, this function will also change the current device to
	match the stream.
	"""
	cur_stream : Optional['torch.classes.cuda.Stream']

	def __init__(self, stream: Optional['torch.classes.cuda.Stream']):
	self.idx = -1
	self.stream = stream
	# Initialize the below streams to default stream on the current device
	self.device_index = get_current_device_index()
	self.src_prev_stream = torch.cuda.default_stream(self.device_index)
	self.dst_prev_stream = torch.cuda.default_stream(self.device_index)

	def __enter__(self):
	self.idx = get_device_index(device=None, optional=True)
	# If there is no CUDA device available, return
	if self.idx == -1:
	return

	# Local cur_stream variable for type refinement
	cur_stream = self.stream
	# Return if stream is None
	if cur_stream is None:
	return
	self.src_prev_stream = torch.cuda.current_stream(self.idx)
	# If the stream is not on the current device, then change the device
	# and set the current stream on the device
	if self.src_prev_stream.device_index() != cur_stream.device_index():
	with device(cur_stream.device()):
	self.dst_prev_stream = torch.cuda.current_stream(cur_stream.device_index())
	torch.cuda._set_device(cur_stream.device_index())
	torch.cuda.set_stream(cur_stream)

	def __exit__(self, type: Any, value: Any, traceback: Any):
	# Local cur_stream variable for type refinement
	cur_stream = self.stream
	# If stream is None or no CUDA device available, return
	if cur_stream is None or self.idx == -1:
	return
	# If the stream was not on the current device, restore the previous stream on
	# the destination device and also reset the current device to the previous device.
	# Set the current stream on the device to the src_prev_stream
	if self.src_prev_stream.device_index() != cur_stream.device_index():
	torch.cuda.set_stream(self.dst_prev_stream)
	torch.cuda._set_device(self.idx)
	torch.cuda.set_stream(self.src_prev_stream)

	def stream(stream: Optional['torch.classes.cuda.Stream']) -> StreamContext:
	r"""Wrapper around the Context-manager that selects a given stream.
	All CUDA kernels queued within its context will be enqueued on a selected
	stream.
	Arguments:
	stream (Stream): selected stream. This manager is a no-op if it's
	``None``.
	"""
	return StreamContext(stream)

	def Stream(device: int = -1, priority: int = 0) -> 'torch.classes.cuda.Stream':
	r"""Wrapper around a CUDA stream.
	A CUDA stream is a linear sequence of execution that belongs to a specific
	device, independent from other streams. See :ref:`cuda-semantics` for
	details.
	Arguments:
	device(int, optional): a device on which to allocate
	the stream. If :attr:`device` is ``None`` (default) or a negative
	integer, this will use the current device.
	priority(int, optional): priority of the stream. Can be either
	-1 (high priority) or 0 (low priority). By default, streams have
	priority 0.
	.. note:: Although CUDA versions >= 11 support more than two levels of
	priorities, in PyTorch, we only support two levels of priorities.
	"""
	return torch.classes.cuda.Stream(device, priority)

	def Event(enable_timing: bool = False, blocking: bool = False, interprocess: bool = False) -> 'torch.classes.cuda.Event':
	r"""Wrapper around a CUDA event.
	CUDA events are synchronization markers that can be used to monitor the
	device's progress, to accurately measure timing, and to synchronize CUDA
	streams.
	Arguments:
	enable_timing (bool, optional): indicates if the event should measure time
	(default: ``False``)
	blocking (bool, optional): if ``True``, :meth:`wait` will be blocking (default: ``False``)
	interprocess (bool): if ``True``, the event can be shared between processes
	(default: ``False``)
	.. _CUDA Event Documentation:
	https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
	"""
	return torch.classes.cuda.Event(enable_timing, blocking, interprocess)