blob: 7451204eb4289e89e6b3550b1eae47fab8b0188d [file] [log] [blame]
from __future__ import print_function
import pytest
import torch
from .runner import get_nn_runners
default_rnns = ['cudnn', 'aten', 'jit', 'jit_premul', 'jit_premul_bias', 'jit_simple',
'jit_multilayer', 'py']
default_cnns = ['resnet18', 'resnet18_jit', 'resnet50', 'resnet50_jit']
all_nets = default_rnns + default_cnns
def pytest_generate_tests(metafunc):
# This creates lists of tests to generate, can be customized
if metafunc.cls.__name__ == "TestBenchNetwork":
metafunc.parametrize('net_name', all_nets, scope="class")
metafunc.parametrize("executor_and_fuser", ["legacy-old", "profiling-te"], scope="class")
def set_fuser(fuser_name):
if fuser_name == 'te':
torch._C._jit_override_can_fuse_on_cpu(False)
torch._C._jit_override_can_fuse_on_gpu(False)
torch._C._jit_set_texpr_fuser_enabled(True)
elif fuser_name == 'old':
torch._C._jit_override_can_fuse_on_gpu(True)
torch._C._jit_set_texpr_fuser_enabled(False)
elif fuser_name == 'none':
torch._C._jit_override_can_fuse_on_gpu(False)
torch._C._jit_override_can_fuse_on_cpu(False)
torch._C._jit_set_texpr_fuser_enabled(False)
def set_executor(executor_name):
if executor_name == 'profiling':
torch._C._jit_set_profiling_executor(True)
torch._C._jit_set_profiling_mode(True)
torch._C._jit_set_bailout_depth(20)
elif executor_name == 'simple':
torch._C._jit_set_profiling_executor(True)
torch._C._jit_set_profiling_mode(False)
elif executor_name == 'legacy':
torch._C._jit_set_profiling_executor(False)
torch._C._jit_set_profiling_mode(False)
@pytest.fixture(scope='class')
def modeldef(request, net_name, executor_and_fuser):
executor, fuser = executor_and_fuser.split("-")
set_executor(executor)
set_fuser(fuser)
# Given a 'net_name' provided by generate_tests, build the thing
name, rnn_creator, context = get_nn_runners(net_name)[0]
creator_args = creator_args = {
'seqLength': 100, 'numLayers': 1,
'inputSize': 512, 'hiddenSize': 512,
'miniBatch': 64, 'device': 'cuda', 'seed': None
}
return rnn_creator(**creator_args)
def cuda_sync(func, *args, **kwargs):
out = func(*args, **kwargs)
torch.cuda.synchronize()
return out
@pytest.mark.benchmark(
warmup=True,
warmup_iterations=3,
disable_gc=True,
max_time=0.1,
group="fastrnns",
)
class TestBenchNetwork:
# See 'modeldef' fixture, which provides the things to benchmark
def test_forward(self, modeldef, benchmark):
forward_output = benchmark(cuda_sync, modeldef.forward, *modeldef.inputs)
def test_backward(self, modeldef, benchmark):
backward_input = modeldef.forward(*modeldef.inputs)
if modeldef.backward_setup is not None:
backward_input = modeldef.backward_setup(backward_input)
if modeldef.backward is not None:
benchmark(cuda_sync, modeldef.backward, *backward_input, retain_graph=True)
for param in modeldef.params:
assert param.grad is not None
param.grad.data.zero_()