torchgen/_autoheuristic/benchmark_utils.py - platform/external/pytorch - Git at Google

 import random
 from typing import Any, Tuple

 import torch


 def transpose_tensors(p_transpose_both: float = 0.05) -> Tuple[bool, bool]:
     transpose_both = random.choices(
         [True, False], [p_transpose_both, 1 - p_transpose_both]
     )[0]
     if transpose_both:
         return (True, True)
     transpose_left = (True, False)
     transpose_right = (False, True)
     no_transpose = (False, False)
     return random.choices([transpose_left, transpose_right, no_transpose])[0]


 def fits_in_memory(dtype: Any, m: int, k: int, n: int) -> Any:
     threshold_memory = torch.cuda.get_device_properties(0).total_memory / 4
     # dividing by 4 beause we otherwise sometimes run out of memory, I assume because
     # inductor creates copies of tensors for benchmarking?
     return dtype.itemsize * (m * k + k * n + m * n) < threshold_memory


 def get_mm_tensors(
     m: int,
     k: int,
     n: int,
     transpose_left: bool,
     transpose_right: bool,
     dtype_left: Any,
     dtype_right: Any,
 ) -> Tuple[Any, Any]:
     if transpose_left:
         a = torch.randn(k, m, dtype=dtype_left).t()
     else:
         a = torch.randn(m, k, dtype=dtype_left)

     if transpose_right:
         b = torch.randn(n, k, dtype=dtype_right).t()
     else:
         b = torch.randn(k, n, dtype=dtype_right)
     return (a, b)
	import random
	from typing import Any, Tuple

	import torch


	def transpose_tensors(p_transpose_both: float = 0.05) -> Tuple[bool, bool]:
	transpose_both = random.choices(
	[True, False], [p_transpose_both, 1 - p_transpose_both]
	)[0]
	if transpose_both:
	return (True, True)
	transpose_left = (True, False)
	transpose_right = (False, True)
	no_transpose = (False, False)
	return random.choices([transpose_left, transpose_right, no_transpose])[0]


	def fits_in_memory(dtype: Any, m: int, k: int, n: int) -> Any:
	threshold_memory = torch.cuda.get_device_properties(0).total_memory / 4
	# dividing by 4 beause we otherwise sometimes run out of memory, I assume because
	# inductor creates copies of tensors for benchmarking?
	return dtype.itemsize * (m * k + k * n + m * n) < threshold_memory


	def get_mm_tensors(
	m: int,
	k: int,
	n: int,
	transpose_left: bool,
	transpose_right: bool,
	dtype_left: Any,
	dtype_right: Any,
	) -> Tuple[Any, Any]:
	if transpose_left:
	a = torch.randn(k, m, dtype=dtype_left).t()
	else:
	a = torch.randn(m, k, dtype=dtype_left)

	if transpose_right:
	b = torch.randn(n, k, dtype=dtype_right).t()
	else:
	b = torch.randn(k, n, dtype=dtype_right)
	return (a, b)