benchmarks/tensorexpr/elementwise.py - platform/external/pytorch - Git at Google

 from . import benchmark
 import itertools
 import numpy as np
 import torch
 import scipy.special

 # A template class for elementwise operations.
 # A derived class will override the class instance to customize its behavior.
 class ElementBench(benchmark.Benchmark):
     # List of customization class variables.
     op_str = None
     binary_op_pt_func = None
     binary_op_np_func = None
     unary_op_pt_func = None
     unary_op_np_func = None
     split_input = True

     def __init__(self, mode, device, dtype, N):
         super().__init__(mode, device, dtype)
         self.N = N
         self.d1 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
         self.d2 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
         self.d3 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
         self.d4 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
         self.inputs = [self.d1, self.d2, self.d3, self.d4]
         self.deterministic = "rand" not in self.op_str

     def _eval(self, d1, d2, d3, d4, binary_op, unary_op):
         if not binary_op:
             def binary_op(x, y):
                 return x + y
         if not unary_op:
             def unary_op(x):
                 return x

         if self.split_input:
             d1 = unary_op(d1)
             d2 = unary_op(d2)
             d3 = unary_op(d3)
             d4 = unary_op(d4)
         else:
             d2 = unary_op(d1 + 0.001)
             d3 = unary_op(d1 + 0.002)
             d4 = unary_op(d1 + 0.003)
             d1 = unary_op(d1)
         a = binary_op(d1, d2)
         b = binary_op(d3, d4)
         c = a + b
         return c

     def forward(self, d1, d2, d3, d4):
         binary_op = self.__class__.binary_op_pt_func
         unary_op = self.__class__.unary_op_pt_func
         return self._eval(d1, d2, d3, d4, binary_op, unary_op)

     def reference(self):
         binary_op = self.__class__.binary_op_np_func
         unary_op = self.__class__.unary_op_np_func
         [d1, d2, d3, d4] = [self.numpy(d) for d in [self.d1, self.d2, self.d3, self.d4]]
         return self._eval(d1, d2, d3, d4, binary_op, unary_op)

     def config(self):
         return [self.N]

     @classmethod
     def module(cls):
         return "element_" + cls.op_str

     def memory_workload(self):
         input_count = len(self.inputs)
         if self.mode == "fwd":
             if self.split_input:
                 sol_count = input_count + 1
                 algorithmic_count = input_count + 1
             else:
                 sol_count = 1 + 1
                 algorithmic_count = 1 + 1
             if "rand" in self.op_str:
                 sol_count = 1
                 algorithmic_count = 1
         else:
             if self.split_input:
                 sol_count = (input_count + 1) + (1 + input_count)
                 algorithmic_count = (input_count + 1) + ((2 + 1) * input_count)
             else:
                 sol_count = 1 + 1
                 algorithmic_count = 1 + 1
             if "rand" in self.op_str:
                 sol_count = 1
                 algorithmic_count = 1

         buffer_size = self.N
         return {
             "sol": buffer_size * sol_count,
             "algorithmic": buffer_size * algorithmic_count,
         }

     @staticmethod
     def default_configs():
         return [[1 << 25]]


 def register_element_ops():
     binary_op_list = [
         ["mul", lambda a, b: a * b],
         ["add", lambda a, b: a + b],
         ["sub", lambda a, b: a - b],
         ["div", lambda a, b: a / (b + 1e-4)],
         [
             "pow",
             lambda a, b: torch.pow(a, b),
             lambda a, b: np.power(a, b),
         ],  # no fuson triggered
         ["max", lambda a, b: torch.max(a, b), lambda a, b: np.maximum(a, b)],
         ["min", lambda a, b: torch.min(a, b), lambda a, b: np.minimum(a, b)],
     ]

     unary_op_list = [
         ["erf", lambda x: torch.erf(x), lambda x: scipy.special.erf(x)],
         ["exp", lambda x: torch.exp(x), lambda x: np.exp(x)],
         ["sin", lambda x: torch.sin(x), lambda x: np.sin(x)],
         ["cos", lambda x: torch.cos(x), lambda x: np.cos(x)],
         ["rand_like", lambda x: torch.rand_like(x), lambda x: np.random.rand(*x.shape)],
     ]

     for split_input, binary_op in itertools.product([True, False], binary_op_list):
         # Make a copy of ElementBench
         if len(binary_op) == 2:
             [op_str, op_pt_func] = binary_op
             op_np_func = op_pt_func
         elif len(binary_op) == 3:
             [op_str, op_pt_func, op_np_func] = binary_op
         split_str = "split" if split_input else "shared"
         op_str = split_str + "_" + op_str
         bm_cls = type("ElementBench_" + op_str, (ElementBench,), {})
         bm_cls.op_str = op_str
         bm_cls.binary_op_pt_func = op_pt_func
         bm_cls.binary_op_np_func = op_np_func
         bm_cls.split_input = split_input
         benchmark.register_benchmark_class(bm_cls)

     for split_input, unary_op in itertools.product([True, False], unary_op_list):
         # Make a copy of ElementBench
         if len(unary_op) == 2:
             [op_str, op_pt_func] = unary_op
             op_np_func = op_pt_func
         elif len(unary_op) == 3:
             [op_str, op_pt_func, op_np_func] = unary_op
         split_str = "split" if split_input else "shared"
         op_str = split_str + "_" + op_str
         bm_cls = type("ElementBench_" + op_str, (ElementBench,), {})
         bm_cls.op_str = op_str
         bm_cls.unary_op_pt_func = op_pt_func
         bm_cls.unary_op_np_func = op_np_func
         bm_cls.split_input = split_input
         benchmark.register_benchmark_class(bm_cls)


 # benchmark.register_benchmark_class(ElementMulBench)
 register_element_ops()


 class SimpleElementBench(benchmark.Benchmark):
     def __init__(self, mode, device, dtype, N):
         super().__init__(mode, device, dtype)
         self.N = N
         self.data = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
         self.inputs = [self.data]

     def forward(self, data):
         a = data + 0.001
         b = a + 0.002
         return b

     def reference(self):
         binary_op = self.__class__.binary_op_np_func
         unary_op = self.__class__.unary_op_np_func
         [d1, d2, d3, d4] = [self.numpy(d) for d in [self.d1, self.d2, self.d3, self.d4]]
         return self._eval(d1, d2, d3, d4, binary_op, unary_op)

     def config(self):
         return [self.N]

     @staticmethod
     def input_iterable():
         return True

     @classmethod
     def module(cls):
         return "simple_element"

     def memory_workload(self):
         input_count = len(self.inputs)
         if self.mode == "fwd":
             sol_count = 2
             algorithmic_count = 2
         else:
             sol_count = 2
             algorithmic_count = 2

         buffer_size = self.N
         return {
             "sol": buffer_size * sol_count,
             "algorithmic": buffer_size * algorithmic_count,
         }

     @staticmethod
     def default_configs():
         return [[1 << 25]]


 benchmark.register_benchmark_class(SimpleElementBench)


 class DynamicSimpleElementBench(benchmark.DynamicShape, SimpleElementBench):
     def __init__(self, mode, device, dtype, N):
         benchmark.DynamicShape.__init__(self)
         SimpleElementBench.__init__(self, mode, device, dtype, N)

     @classmethod
     def module(cls):
         return "simple_dynamic_element"

     def instantiate_input(self):
         N, = self.rand_shape([self.N])
         data = self.rand([N], device=self.device, dtype=self.dtype, requires_grad=self.requires_grad)
         self.inputs = [data]


 benchmark.register_benchmark_class(DynamicSimpleElementBench)
	from . import benchmark
	import itertools
	import numpy as np
	import torch
	import scipy.special

	# A template class for elementwise operations.
	# A derived class will override the class instance to customize its behavior.
	class ElementBench(benchmark.Benchmark):
	# List of customization class variables.
	op_str = None
	binary_op_pt_func = None
	binary_op_np_func = None
	unary_op_pt_func = None
	unary_op_np_func = None
	split_input = True

	def __init__(self, mode, device, dtype, N):
	super().__init__(mode, device, dtype)
	self.N = N
	self.d1 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
	self.d2 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
	self.d3 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
	self.d4 = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
	self.inputs = [self.d1, self.d2, self.d3, self.d4]
	self.deterministic = "rand" not in self.op_str

	def _eval(self, d1, d2, d3, d4, binary_op, unary_op):
	if not binary_op:
	def binary_op(x, y):
	return x + y
	if not unary_op:
	def unary_op(x):
	return x

	if self.split_input:
	d1 = unary_op(d1)
	d2 = unary_op(d2)
	d3 = unary_op(d3)
	d4 = unary_op(d4)
	else:
	d2 = unary_op(d1 + 0.001)
	d3 = unary_op(d1 + 0.002)
	d4 = unary_op(d1 + 0.003)
	d1 = unary_op(d1)
	a = binary_op(d1, d2)
	b = binary_op(d3, d4)
	c = a + b
	return c

	def forward(self, d1, d2, d3, d4):
	binary_op = self.__class__.binary_op_pt_func
	unary_op = self.__class__.unary_op_pt_func
	return self._eval(d1, d2, d3, d4, binary_op, unary_op)

	def reference(self):
	binary_op = self.__class__.binary_op_np_func
	unary_op = self.__class__.unary_op_np_func
	[d1, d2, d3, d4] = [self.numpy(d) for d in [self.d1, self.d2, self.d3, self.d4]]
	return self._eval(d1, d2, d3, d4, binary_op, unary_op)

	def config(self):
	return [self.N]

	@classmethod
	def module(cls):
	return "element_" + cls.op_str

	def memory_workload(self):
	input_count = len(self.inputs)
	if self.mode == "fwd":
	if self.split_input:
	sol_count = input_count + 1
	algorithmic_count = input_count + 1
	else:
	sol_count = 1 + 1
	algorithmic_count = 1 + 1
	if "rand" in self.op_str:
	sol_count = 1
	algorithmic_count = 1
	else:
	if self.split_input:
	sol_count = (input_count + 1) + (1 + input_count)
	algorithmic_count = (input_count + 1) + ((2 + 1) * input_count)
	else:
	sol_count = 1 + 1
	algorithmic_count = 1 + 1
	if "rand" in self.op_str:
	sol_count = 1
	algorithmic_count = 1

	buffer_size = self.N
	return {
	"sol": buffer_size * sol_count,
	"algorithmic": buffer_size * algorithmic_count,
	}

	@staticmethod
	def default_configs():
	return [[1 << 25]]


	def register_element_ops():
	binary_op_list = [
	["mul", lambda a, b: a * b],
	["add", lambda a, b: a + b],
	["sub", lambda a, b: a - b],
	["div", lambda a, b: a / (b + 1e-4)],
	[
	"pow",
	lambda a, b: torch.pow(a, b),
	lambda a, b: np.power(a, b),
	], # no fuson triggered
	["max", lambda a, b: torch.max(a, b), lambda a, b: np.maximum(a, b)],
	["min", lambda a, b: torch.min(a, b), lambda a, b: np.minimum(a, b)],
	]

	unary_op_list = [
	["erf", lambda x: torch.erf(x), lambda x: scipy.special.erf(x)],
	["exp", lambda x: torch.exp(x), lambda x: np.exp(x)],
	["sin", lambda x: torch.sin(x), lambda x: np.sin(x)],
	["cos", lambda x: torch.cos(x), lambda x: np.cos(x)],
	["rand_like", lambda x: torch.rand_like(x), lambda x: np.random.rand(*x.shape)],
	]

	for split_input, binary_op in itertools.product([True, False], binary_op_list):
	# Make a copy of ElementBench
	if len(binary_op) == 2:
	[op_str, op_pt_func] = binary_op
	op_np_func = op_pt_func
	elif len(binary_op) == 3:
	[op_str, op_pt_func, op_np_func] = binary_op
	split_str = "split" if split_input else "shared"
	op_str = split_str + "_" + op_str
	bm_cls = type("ElementBench_" + op_str, (ElementBench,), {})
	bm_cls.op_str = op_str
	bm_cls.binary_op_pt_func = op_pt_func
	bm_cls.binary_op_np_func = op_np_func
	bm_cls.split_input = split_input
	benchmark.register_benchmark_class(bm_cls)

	for split_input, unary_op in itertools.product([True, False], unary_op_list):
	# Make a copy of ElementBench
	if len(unary_op) == 2:
	[op_str, op_pt_func] = unary_op
	op_np_func = op_pt_func
	elif len(unary_op) == 3:
	[op_str, op_pt_func, op_np_func] = unary_op
	split_str = "split" if split_input else "shared"
	op_str = split_str + "_" + op_str
	bm_cls = type("ElementBench_" + op_str, (ElementBench,), {})
	bm_cls.op_str = op_str
	bm_cls.unary_op_pt_func = op_pt_func
	bm_cls.unary_op_np_func = op_np_func
	bm_cls.split_input = split_input
	benchmark.register_benchmark_class(bm_cls)


	# benchmark.register_benchmark_class(ElementMulBench)
	register_element_ops()


	class SimpleElementBench(benchmark.Benchmark):
	def __init__(self, mode, device, dtype, N):
	super().__init__(mode, device, dtype)
	self.N = N
	self.data = self.rand([N], device=device, dtype=dtype, requires_grad=self.requires_grad)
	self.inputs = [self.data]

	def forward(self, data):
	a = data + 0.001
	b = a + 0.002
	return b

	def reference(self):
	binary_op = self.__class__.binary_op_np_func
	unary_op = self.__class__.unary_op_np_func
	[d1, d2, d3, d4] = [self.numpy(d) for d in [self.d1, self.d2, self.d3, self.d4]]
	return self._eval(d1, d2, d3, d4, binary_op, unary_op)

	def config(self):
	return [self.N]

	@staticmethod
	def input_iterable():
	return True

	@classmethod
	def module(cls):
	return "simple_element"

	def memory_workload(self):
	input_count = len(self.inputs)
	if self.mode == "fwd":
	sol_count = 2
	algorithmic_count = 2
	else:
	sol_count = 2
	algorithmic_count = 2

	buffer_size = self.N
	return {
	"sol": buffer_size * sol_count,
	"algorithmic": buffer_size * algorithmic_count,
	}

	@staticmethod
	def default_configs():
	return [[1 << 25]]


	benchmark.register_benchmark_class(SimpleElementBench)


	class DynamicSimpleElementBench(benchmark.DynamicShape, SimpleElementBench):
	def __init__(self, mode, device, dtype, N):
	benchmark.DynamicShape.__init__(self)
	SimpleElementBench.__init__(self, mode, device, dtype, N)

	@classmethod
	def module(cls):
	return "simple_dynamic_element"

	def instantiate_input(self):
	N, = self.rand_shape([self.N])
	data = self.rand([N], device=self.device, dtype=self.dtype, requires_grad=self.requires_grad)
	self.inputs = [data]


	benchmark.register_benchmark_class(DynamicSimpleElementBench)