torch/utils/throughput_benchmark.py - platform/external/pytorch - Git at Google

 from __future__ import absolute_import, division, print_function, unicode_literals

 import torch._C

 class ThroughputBenchmark(object):
     '''
     This class is a wrapper around a c++ component throughput_benchmark::ThroughputBenchmark
     responsible for executing a PyTorch module (nn.Module or ScriptModule)
     under an inference server like load. It can emulate multiple calling threads
     to a single module provided. In the future we plan to enhance this component
     to support inter and intra-op parallelism as well as multiple models
     running in a single process.

     Please note that even though nn.Module is supported, it might incur an overhead
     from the need to hold GIL every time we execute Python code or pass around
     inputs as Python objects. As soon as you have a ScriptModule version of your
     model for inference deployment it is better to switch to using it in this
     benchmark.

     Example::

         >>> from torch.utils import ThroughputBenchmark
         >>> bench = ThroughputBenchmark(my_module)
         >>> # Pre-populate benchmark's data set with the inputs
         >>> for input in inputs:
             # Both args and kwargs work, same as any PyTorch Module / ScriptModule
             bench.add_input(input[0], x2=input[1])
         >>> Inputs supplied above are randomly used during the execution
         >>> stats = bench.benchmark(
                 num_calling_threads=4,
                 num_warmup_iters = 100,
                 num_iters = 1000,
             )
         >>> print("Avg latency (ms): {}".format(stats.latency_avg_ms))
         >>> print("Number of iterations: {}".format(stats.num_iters))

     '''

     def __init__(self, module):
         if isinstance(module, torch.jit.ScriptModule):
             self._benchmark = torch._C.ThroughputBenchmark(module._c)
         else:
             self._benchmark = torch._C.ThroughputBenchmark(module)

     def run_once(self, *args, **kwargs):
         '''
         Given input id (input_idx) run benchmark once and return prediction.
         This is useful for testing that benchmark actually runs the module you
         want it to run. input_idx here is an index into inputs array populated
         by calling add_input() method.
         '''
         return self._benchmark.run_once(*args, **kwargs)

     def add_input(self, *args, **kwargs):
         '''
         Store a single input to a module into the benchmark memory and keep it
         there. During the benchmark execution every thread is going to pick up a
         random input from the all the inputs ever supplied to the benchmark via
         this function.
         '''
         self._benchmark.add_input(*args, **kwargs)

     def benchmark(self, num_calling_threads=1, num_warmup_iters=10, num_iters=100):
         '''
         Args:
             num_warmup_iters (int): Warmup iters are used to make sure we run a module
                 a few times before actually measuring things. This way we avoid cold
                 caches and any other similar problems. This is the number of warmup
                 iterations for each of the thread in separate

             num_iters (int): Number of iterations the benchmark should run with.
                 This number is separate from the warmup iterations. Also the number is
                 shared across all the threads. Once the num_iters iterations across all
                 the threads is reached, we will stop execution. Though total number of
                 iterations might be slightly larger. Which is reported as
                 stats.num_iters where stats is the result of this function

         This function returns BenchmarkExecutionStats object which is defined via pybind11.
         It currently has two fields:
             - num_iters - number of actual iterations the benchmark have made
             - avg_latency_ms - average time it took to infer on one input example in milliseconds
         '''
         config = torch._C.BenchmarkConfig()
         config.num_calling_threads = num_calling_threads
         config.num_warmup_iters = num_warmup_iters
         config.num_iters = num_iters
         return self._benchmark.benchmark(config)
	from __future__ import absolute_import, division, print_function, unicode_literals

	import torch._C

	class ThroughputBenchmark(object):
	'''
	This class is a wrapper around a c++ component throughput_benchmark::ThroughputBenchmark
	responsible for executing a PyTorch module (nn.Module or ScriptModule)
	under an inference server like load. It can emulate multiple calling threads
	to a single module provided. In the future we plan to enhance this component
	to support inter and intra-op parallelism as well as multiple models
	running in a single process.

	Please note that even though nn.Module is supported, it might incur an overhead
	from the need to hold GIL every time we execute Python code or pass around
	inputs as Python objects. As soon as you have a ScriptModule version of your
	model for inference deployment it is better to switch to using it in this
	benchmark.

	Example::

	>>> from torch.utils import ThroughputBenchmark
	>>> bench = ThroughputBenchmark(my_module)
	>>> # Pre-populate benchmark's data set with the inputs
	>>> for input in inputs:
	# Both args and kwargs work, same as any PyTorch Module / ScriptModule
	bench.add_input(input[0], x2=input[1])
	>>> Inputs supplied above are randomly used during the execution
	>>> stats = bench.benchmark(
	num_calling_threads=4,
	num_warmup_iters = 100,
	num_iters = 1000,
	)
	>>> print("Avg latency (ms): {}".format(stats.latency_avg_ms))
	>>> print("Number of iterations: {}".format(stats.num_iters))

	'''

	def __init__(self, module):
	if isinstance(module, torch.jit.ScriptModule):
	self._benchmark = torch._C.ThroughputBenchmark(module._c)
	else:
	self._benchmark = torch._C.ThroughputBenchmark(module)

	def run_once(self, args, *kwargs):
	'''
	Given input id (input_idx) run benchmark once and return prediction.
	This is useful for testing that benchmark actually runs the module you
	want it to run. input_idx here is an index into inputs array populated
	by calling add_input() method.
	'''
	return self._benchmark.run_once(args, *kwargs)

	def add_input(self, args, *kwargs):
	'''
	Store a single input to a module into the benchmark memory and keep it
	there. During the benchmark execution every thread is going to pick up a
	random input from the all the inputs ever supplied to the benchmark via
	this function.
	'''
	self._benchmark.add_input(args, *kwargs)

	def benchmark(self, num_calling_threads=1, num_warmup_iters=10, num_iters=100):
	'''
	Args:
	num_warmup_iters (int): Warmup iters are used to make sure we run a module
	a few times before actually measuring things. This way we avoid cold
	caches and any other similar problems. This is the number of warmup
	iterations for each of the thread in separate

	num_iters (int): Number of iterations the benchmark should run with.
	This number is separate from the warmup iterations. Also the number is
	shared across all the threads. Once the num_iters iterations across all
	the threads is reached, we will stop execution. Though total number of
	iterations might be slightly larger. Which is reported as
	stats.num_iters where stats is the result of this function

	This function returns BenchmarkExecutionStats object which is defined via pybind11.
	It currently has two fields:
	- num_iters - number of actual iterations the benchmark have made
	- avg_latency_ms - average time it took to infer on one input example in milliseconds
	'''
	config = torch._C.BenchmarkConfig()
	config.num_calling_threads = num_calling_threads
	config.num_warmup_iters = num_warmup_iters
	config.num_iters = num_iters
	return self._benchmark.benchmark(config)