|  | from __future__ import absolute_import, division, print_function, unicode_literals | 
|  | import time | 
|  | from collections import namedtuple | 
|  | from torch.utils import ThroughputBenchmark | 
|  |  | 
|  | NUM_LOOP_ITERS = 1000 | 
|  | BenchmarkConfig = namedtuple('BenchmarkConfig', 'num_warmup_iters num_iters') | 
|  | ModuleConfig = namedtuple('ModuleConfig', 'pt_fn c2_op num_params graph_mode') | 
|  |  | 
|  | def ms_to_us(time_ms): | 
|  | return (time_ms * 1e3) | 
|  |  | 
|  | def secs_to_us(time_s): | 
|  | return (time_s * 1e6) | 
|  |  | 
|  | def secs_to_ms(time_s): | 
|  | return (time_s * 1e3) | 
|  |  | 
|  | def benchmark_using_throughput_benchmark(config, module): | 
|  | print("Benchmarking via ThroughputBenchmark") | 
|  | bench = ThroughputBenchmark(module.module) | 
|  | bench.add_input(*module.tensor_inputs) | 
|  | stats = bench.benchmark(1, config.num_warmup_iters, config.num_iters) | 
|  | return stats.latency_avg_ms / NUM_LOOP_ITERS | 
|  |  | 
|  | def benchmark_module(config, module, use_throughput_benchmark=False): | 
|  | if use_throughput_benchmark: | 
|  | return benchmark_using_throughput_benchmark(config, module) | 
|  | module.forward(config.num_warmup_iters) | 
|  | print("Running module for {} iterations".format(config.num_iters)) | 
|  | start = time.time() | 
|  | module.forward(config.num_iters) | 
|  | end = time.time() | 
|  | time_elapsed_s = (end - start) | 
|  | return (secs_to_ms(time_elapsed_s) / config.num_iters / NUM_LOOP_ITERS) |