| import sys |
| import tempfile |
| import unittest |
| from copy import deepcopy |
| from itertools import product |
| from functools import reduce |
| from operator import mul |
| |
| |
| import torch |
| import torch.cuda |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from torch.nn.functional import _Reduction |
| from common_utils import TestCase, to_gpu, freeze_rng_state, is_iterable, \ |
| TEST_WITH_ROCM |
| from common_cuda import TEST_CUDA |
| from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors |
| from torch.autograd import Variable |
| import torch.backends.cudnn |
| |
| |
| # tarfile module tries to obtain a file object name in python 3.3 |
| if sys.version_info[:2] == (3, 3): |
| TemporaryFile = tempfile.NamedTemporaryFile |
| else: |
| TemporaryFile = tempfile.TemporaryFile |
| PRECISION = 1e-5 |
| |
| |
| def get_reduction(m): |
| result = getattr(m, 'reduction', None) |
| if result is None: |
| result = _Reduction.legacy_get_string(getattr(m, 'sizeAverage', None), True, emit_warning=False) |
| assert result is not None |
| return result |
| |
| |
| def get_weight(m): |
| result = getattr(m, 'weight', None) |
| if result is not None: |
| return result |
| return getattr(m, 'weights', None) |
| |
| module_tests = [ |
| dict( |
| module_name='Linear', |
| constructor_args=(10, 8), |
| input_size=(4, 10), |
| reference_fn=lambda i, p, _: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8), |
| ), |
| dict( |
| module_name='Linear', |
| constructor_args=(10, 8, False), |
| input_size=(4, 10), |
| desc='no_bias', |
| reference_fn=lambda i, p, _: torch.mm(i, p[0].t()) |
| ), |
| dict( |
| module_name='Threshold', |
| constructor_args=(2., 1.), |
| input_size=(2, 3, 4, 5), |
| check_inplace=True, |
| desc='threshold_value' |
| ), |
| dict( |
| module_name='Threshold', |
| constructor_args=(2., 10.), |
| input_size=(2, 3, 4, 5), |
| desc='large_value' |
| ), |
| dict( |
| module_name='ReLU', |
| input_size=(2, 3, 4, 5), |
| check_inplace=True, |
| ), |
| dict( |
| module_name='ReLU6', |
| input_size=(2, 3, 4, 5), |
| check_inplace=True, |
| ), |
| dict( |
| module_name='RReLU', |
| input_size=(1, 2, 2), |
| test_cuda=False, |
| ), |
| dict( |
| module_name='RReLU', |
| constructor_args=(0.1, 0.9), |
| input_size=(4, 4, 5), |
| desc='with_up_down', |
| test_cuda=False, |
| ), |
| dict( |
| module_name='Hardtanh', |
| input_size=(3, 2, 5), |
| reference_fn=lambda i, *_: i.clamp(-1, 1), |
| ), |
| dict( |
| module_name='Sigmoid', |
| input_size=(2, 3, 4, 5) |
| ), |
| dict( |
| module_name='Tanh', |
| input_size=(2, 3, 4, 5) |
| ), |
| dict( |
| module_name='Softmax', |
| constructor_args=(1,), |
| input_size=(10, 20), |
| reference_fn=lambda i, *_: torch.exp(i).div(torch.exp(i).sum(1, True).expand(10, 20)), |
| ), |
| dict( |
| module_name='Softmax2d', |
| input_size=(1, 3, 10, 20), |
| reference_fn=lambda i, *_: torch.exp(i).div(torch.exp(i).sum(1, False)), |
| ), |
| dict( |
| module_name='LogSoftmax', |
| constructor_args=(1,), |
| input_size=(10, 20), |
| reference_fn=lambda i, *_: torch.exp(i).div_(torch.exp(i).sum(1, True).expand(10, 20)).log_(), |
| ), |
| dict( |
| module_name='LogSoftmax', |
| constructor_args=(1,), |
| input_size=(1, 3, 10, 20), |
| reference_fn=lambda i, *_: torch.exp(i).div_(torch.exp(i).sum(1, False)).log_(), |
| desc='multiparam', |
| ), |
| dict( |
| module_name='ELU', |
| constructor_args=(2.,), |
| input_size=(3, 2, 5), |
| reference_fn=lambda x, *_: torch.where(x >= 0, x, 2 * (x.exp() - 1)), |
| ), |
| # TODO: reference function |
| dict( |
| module_name='Hardshrink', |
| constructor_args=(2.,), |
| input_size=(4, 3, 2, 4), |
| ), |
| dict( |
| module_name='LeakyReLU', |
| input_size=(3, 2, 5), |
| check_inplace=True |
| ), |
| dict( |
| module_name='LeakyReLU', |
| constructor_args=(0.5,), |
| input_size=(3, 2, 5), |
| check_inplace=True, |
| desc='with_negval' |
| ), |
| dict( |
| module_name='LogSigmoid', |
| input_size=(2, 3, 4), |
| reference_fn=lambda i, *_: i.sigmoid().log(), |
| ), |
| dict( |
| module_name='Softplus', |
| input_size=(10, 20), |
| reference_fn=lambda i, *_: torch.log(1 + torch.exp(i)), |
| ), |
| dict( |
| module_name='Softplus', |
| constructor_args=(2,), |
| input_size=(10, 20), |
| reference_fn=lambda i, *_: 1. / 2. * torch.log(1 + torch.exp(2 * i)), |
| desc='beta', |
| ), |
| dict( |
| module_name='Softplus', |
| constructor_args=(2, -100), |
| input_size=(10, 20), |
| reference_fn=(lambda i, *_: ((i * 2) > -100).type_as(i) * i + |
| ((i * 2) <= -100).type_as(i) * 1. / 2. * torch.log(1 + torch.exp(2 * i))), |
| desc='beta_threshold', |
| ), |
| dict( |
| module_name='Softshrink', |
| input_size=(3, 2, 5), |
| ), |
| dict( |
| module_name='Softshrink', |
| constructor_args=(1,), |
| input_size=(3, 2, 5), |
| desc='lambda', |
| ), |
| dict( |
| module_name='CrossMapLRN2d', |
| constructor_args=(5, 5e-3, 1e-3, 2), |
| input_size=(2, 3, 6, 6), |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='PReLU', |
| input_size=(2, 3, 4), |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| desc='1d', |
| ), |
| dict( |
| module_name='PReLU', |
| constructor_args=(3,), |
| input_size=(2, 3, 4), |
| desc='1d_multiparam', |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| ), |
| dict( |
| module_name='PReLU', |
| input_size=(2, 3, 4, 5), |
| desc='2d', |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| ), |
| dict( |
| module_name='PReLU', |
| constructor_args=(3,), |
| input_size=(2, 3, 4, 5), |
| desc='2d_multiparam', |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| ), |
| dict( |
| module_name='PReLU', |
| input_size=(2, 3, 4, 5, 6), |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| desc='3d', |
| ), |
| dict( |
| module_name='PReLU', |
| constructor_args=(3,), |
| input_size=(2, 3, 4, 5, 6), |
| desc='3d_multiparam', |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| ), |
| dict( |
| module_name='Softsign', |
| input_size=(3, 2, 5), |
| reference_fn=lambda i, *_: i.div(1 + torch.abs(i)), |
| ), |
| dict( |
| module_name='Softmin', |
| constructor_args=(1,), |
| input_size=(10, 20), |
| ), |
| dict( |
| module_name='Softmin', |
| constructor_args=(1,), |
| input_size=(2, 3, 5, 10), |
| desc='multidim', |
| ), |
| dict( |
| module_name='Tanhshrink', |
| input_size=(2, 3, 4, 5), |
| ), |
| ] |
| |
| |
| # Generates rand tensor with non-equal values. This ensures that duplicate |
| # values won't be causing test failure for modules like MaxPooling. |
| # size should be small, otherwise randperm fails / long overflows. |
| def _rand_tensor_non_equal(*size): |
| total = reduce(mul, size, 1) |
| return torch.randperm(total).view(*size).double() |
| |
| |
| def wrap_functional(fn, **kwargs): |
| class FunctionalModule(nn.Module): |
| def forward(self, *args): |
| return fn(*args, **kwargs) |
| return FunctionalModule |
| |
| |
| def poissonnllloss_no_reduce_test(): |
| t = torch.randn(10, 10) |
| return dict( |
| fullname='PoissonNLLLLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.poisson_nll_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(10, 10), |
| pickle=False) |
| |
| |
| def bceloss_no_reduce_test(): |
| t = Variable(torch.randn(15, 10).gt(0).double()) |
| return dict( |
| fullname='BCELoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, *_: -(t * i.log() + (1 - t) * (1 - i).log()), |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def bceloss_no_reduce_scalar_test(): |
| t = torch.randn(()).gt(0).double() |
| return dict( |
| fullname='BCELoss_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(()).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, *_: -(t * i.log() + (1 - t) * (1 - i).log()), |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def bceloss_weights_no_reduce_test(): |
| t = Variable(torch.randn(15, 10).gt(0).double()) |
| weights = torch.rand(10) |
| return dict( |
| fullname='BCELoss_weights_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy(i, t.type_as(i), |
| weight=weights.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, p, m: -(t * i.log() + (1 - t) * (1 - i).log()) * weights, |
| check_gradgrad=False, |
| pickle=False |
| ) |
| |
| |
| def bceloss_weights_no_reduce_scalar_test(): |
| t = torch.randn(()).double() |
| weights = torch.rand(()) |
| return dict( |
| fullname='BCELoss_weights_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy(i, t.type_as(i), |
| weight=weights.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(()).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, *_: -(t * i.log() + (1 - t) * (1 - i).log()) * weights, |
| check_gradgrad=False, |
| pickle=False |
| ) |
| |
| |
| def bce_with_logistic_legacy_enum_test(): |
| t = Variable(torch.randn(15, 10).gt(0).double()) |
| sigmoid = nn.Sigmoid() |
| return dict( |
| fullname='BCEWithLogitsLoss_legacy_enum', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy_with_logits(i, t.type_as(i), reduce=False)), |
| input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, *_: -(t * sigmoid(i).log() + (1 - t) * (1 - sigmoid(i)).log()), |
| check_gradgrad=False, |
| pickle=False, |
| ) |
| |
| |
| def bce_with_logistic_no_reduce_test(): |
| t = Variable(torch.randn(15, 10).gt(0).double()) |
| sigmoid = nn.Sigmoid() |
| return dict( |
| fullname='BCEWithLogitsLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy_with_logits(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, *_: -(t * sigmoid(i).log() + (1 - t) * (1 - sigmoid(i)).log()), |
| check_gradgrad=False, |
| pickle=False, |
| ) |
| |
| |
| def bce_with_logistic_no_reduce_scalar_test(): |
| t = torch.randn(()).gt(0).double() |
| sigmoid = nn.Sigmoid() |
| return dict( |
| fullname='BCEWithLogitsLoss_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.binary_cross_entropy_with_logits(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(()).clamp_(2.8e-2, 1 - 2.8e-2), |
| reference_fn=lambda i, *_: -(t * sigmoid(i).log() + (1 - t) * (1 - sigmoid(i)).log()), |
| check_gradgrad=False, |
| pickle=False |
| ) |
| |
| |
| def kldivloss_with_target_no_reduce_test(): |
| i = torch.rand(10, 10).log() |
| return dict( |
| fullname='KLDivLoss_with_target_no_reduce', |
| constructor=wrap_functional( |
| lambda t: F.kl_div(i.type_as(t), t, reduction='none')), |
| input_fn=lambda: torch.rand(10, 10), |
| reference_fn=lambda t, *_: |
| loss_reference_fns['KLDivLoss'](i.type_as(t), t, reduction='none'), |
| pickle=False) |
| |
| |
| def kldivloss_no_reduce_test(): |
| t = torch.randn(10, 10) |
| return dict( |
| fullname='KLDivLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.kl_div(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(10, 10).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['KLDivLoss'](i, t.type_as(i), reduction='none'), |
| pickle=False, |
| ) |
| |
| |
| def kldivloss_no_reduce_scalar_test(): |
| t = torch.randn(()) |
| return dict( |
| fullname='KLDivLoss_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.kl_div(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.rand(()).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['KLDivLoss'](i, t.type_as(i), reduction='none'), |
| pickle=False) |
| |
| |
| def l1loss_no_reduce_test(): |
| t = torch.randn(2, 3, 4) |
| return dict( |
| fullname='L1Loss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.l1_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(2, 3, 4), |
| reference_fn=lambda i, *_: (i - t.type_as(i)).abs(), |
| pickle=False) |
| |
| |
| def l1loss_no_reduce_scalar_test(): |
| t = torch.randn(()) |
| return dict( |
| fullname='L1Loss_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.l1_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(()), |
| reference_fn=lambda i, *_: (i - t.type_as(i)).abs(), |
| pickle=False) |
| |
| |
| def mseloss_no_reduce_test(): |
| input_size = (2, 3, 4, 5) |
| target = torch.randn(*input_size) |
| return dict( |
| fullname='MSELoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.mse_loss(i, target.type_as(i), reduction='none')), |
| input_size=input_size, |
| reference_fn=lambda i, *_: (i - target).pow(2), |
| pickle=False) |
| |
| |
| def mseloss_no_reduce_scalar_test(): |
| input_size = () |
| target = torch.randn(input_size) |
| return dict( |
| fullname='MSELoss_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.mse_loss(i, target.type_as(i), reduction='none')), |
| input_size=input_size, |
| reference_fn=lambda i, *_: (i - target).pow(2), |
| pickle=False) |
| |
| |
| def nllloss_no_reduce_test(): |
| t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long()) |
| kwargs = {'reduction': 'none'} |
| return dict( |
| fullname='NLLLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), |
| input_fn=lambda: torch.rand(15, 10).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs), |
| pickle=False) |
| |
| |
| def nllloss_no_reduce_ignore_index_test(): |
| t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long()) |
| kwargs = {'ignore_index': 2, 'reduction': 'none'} |
| return dict( |
| fullname='NLLLoss_no_reduce_ignore_index', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), |
| input_fn=lambda: torch.rand(15, 10).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs), |
| pickle=False) |
| |
| |
| def nllloss_no_reduce_weights_test(): |
| t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long()) |
| weight = torch.rand(10) |
| |
| def kwargs(i): |
| return {'weight': weight.type_as(i), 'reduction': 'none'} |
| |
| return dict( |
| fullname='NLLLoss_no_reduce_weights', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))), |
| input_fn=lambda: torch.rand(15, 10).add(1e-2).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)), |
| pickle=False) |
| |
| |
| def nllloss_no_reduce_weights_ignore_index_test(): |
| t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long()) |
| weight = torch.rand(10) |
| |
| def kwargs(i): |
| return {'weight': weight.type_as(i), 'reduction': 'none', |
| 'ignore_index': 2} |
| |
| return dict( |
| fullname='NLLLoss_no_reduce_weights_ignore_index', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))), |
| input_fn=lambda: torch.rand(15, 10).add(1e-2).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)), |
| pickle=False) |
| |
| |
| def nllloss_no_reduce_weights_ignore_index_neg_test(): |
| t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long()) |
| weight = torch.rand(10) |
| |
| def kwargs(i): |
| return {'weight': weight.type_as(i), 'reduction': 'none', |
| 'ignore_index': -1} |
| |
| return dict( |
| fullname='NLLLoss_no_reduce_weights_ignore_index_neg', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))), |
| input=torch.rand(15, 10).add(1e-2).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)), |
| pickle=False) |
| |
| |
| def nllloss2d_no_reduce_test(): |
| t = Variable(torch.rand(2, 5, 5).mul(3).floor().long()) |
| kwargs = {'reduction': 'none'} |
| return dict( |
| fullname='NLLLoss2d_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), |
| input_fn=lambda: torch.rand(2, 3, 5, 5).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), |
| pickle=False) |
| |
| |
| def nllloss2d_no_reduce_ignore_index_test(): |
| t = Variable(torch.rand(2, 5, 5).mul(3).floor().long()) |
| kwargs = {'ignore_index': 1, 'reduction': 'none'} |
| return dict( |
| fullname='NLLLoss2d_no_reduce_ignore_index', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), |
| input_fn=lambda: torch.rand(2, 3, 5, 5).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), |
| pickle=False) |
| |
| |
| def nllloss2d_no_reduce_weights_test(): |
| t = Variable(torch.rand(2, 5, 5).mul(3).floor().long()) |
| weight = torch.rand(3) |
| |
| def kwargs(i): |
| return {'weight': weight.type_as(i), 'reduction': 'none'} |
| |
| return dict( |
| fullname='NLLLoss2d_no_reduce_weights', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))), |
| input_fn=lambda: torch.rand(2, 3, 5, 5).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)), |
| pickle=False) |
| |
| |
| def nlllossNd_no_reduce_test(): |
| t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long()) |
| kwargs = {'reduction': 'none'} |
| return dict( |
| fullname='NLLLossNd_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), |
| input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), |
| pickle=False) |
| |
| |
| def nlllossNd_no_reduce_ignore_index_test(): |
| t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long()) |
| kwargs = {'ignore_index': 1, 'reduction': 'none'} |
| return dict( |
| fullname='NLLLossNd_no_reduce_ignore_index', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), |
| input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), |
| pickle=False) |
| |
| |
| def nlllossNd_no_reduce_weights_test(): |
| t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long()) |
| weight = torch.rand(3) |
| |
| def kwargs(i): |
| return {'weight': weight.type_as(i), 'reduction': 'none'} |
| |
| return dict( |
| fullname='NLLLossNd_no_reduce_weights', |
| constructor=wrap_functional( |
| lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))), |
| input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)), |
| pickle=False) |
| |
| |
| def smoothl1loss_no_reduce_test(): |
| t = torch.randn(2, 3, 4) |
| return dict( |
| fullname='SmoothL1Loss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.smooth_l1_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(2, 3, 4), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['SmoothL1Loss'](i, t.type_as(i), reduction='none'), |
| pickle=False) |
| |
| |
| def smoothl1loss_no_reduce_scalar_test(): |
| t = torch.randn(()) |
| return dict( |
| fullname='SmoothL1Loss_no_reduce_scalar', |
| constructor=wrap_functional( |
| lambda i: F.smooth_l1_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(()), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['SmoothL1Loss'](i, t.type_as(i), reduction='none'), |
| pickle=False) |
| |
| |
| def multilabelmarginloss_1d_no_reduce_test(): |
| t = Variable(torch.rand(10).mul(10).floor().long()) |
| return dict( |
| fullname='MultiLabelMarginLoss_1d_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multilabel_margin_loss(i, t.type_as(i).long(), reduction='none')), |
| input_fn=lambda: torch.randn(10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiLabelMarginLoss'](i, t.data.type_as(i).long(), reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multilabelmarginloss_index_neg_test(): |
| t = Variable(torch.clamp(torch.rand(5, 10).add(-.5).mul(20).floor().long(), min=-1)) |
| return dict( |
| fullname='MultiLabelMarginLoss_index_neg', |
| constructor=wrap_functional( |
| lambda i: F.multilabel_margin_loss(i, t.type_as(i).long(), reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiLabelMarginLoss'](i, t.data.type_as(i).long(), reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multilabelmarginloss_no_reduce_test(): |
| t = Variable(torch.rand(5, 10).mul(10).floor().long()) |
| return dict( |
| fullname='MultiLabelMarginLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multilabel_margin_loss(i, t.type_as(i).long(), reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiLabelMarginLoss'](i, t.data.type_as(i).long(), reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def hingeembeddingloss_no_reduce_test(): |
| t = Variable(torch.randn(10).gt(0).double().mul_(2).sub(1)) |
| return dict( |
| fullname='HingeEmbeddingLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.hinge_embedding_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['HingeEmbeddingLoss'](i, t.type_as(i), reduction='none'), |
| check_sum_reduction=True, |
| pickle=False) |
| |
| |
| def hingeembeddingloss_margin_no_reduce_test(): |
| t = Variable(torch.randn(10).gt(0).double().mul_(2).sub(1)) |
| return dict( |
| fullname='HingeEmbeddingLoss_margin_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.hinge_embedding_loss(i, t.type_as(i), margin=0.5, reduction='none')), |
| input_fn=lambda: torch.randn(10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['HingeEmbeddingLoss'](i, t.type_as(i), margin=0.5, reduction='none'), |
| check_sum_reduction=True, |
| pickle=False) |
| |
| |
| def softmarginloss_no_reduce_test(): |
| t = torch.randn(5, 5) |
| return dict( |
| fullname='SoftMarginLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.soft_margin_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(5, 5), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['SoftMarginLoss'](i, t.type_as(i), reduction='none'), |
| pickle=False) |
| |
| |
| def multilabelsoftmarginloss_no_reduce_test(): |
| t = torch.rand(5, 10).mul(2).floor() |
| return dict( |
| fullname='MultiLabelSoftMarginLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multilabel_soft_margin_loss(i, t.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| (-(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log())).sum(dim=1) / i.size(1), |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multilabelsoftmarginloss_weights_no_reduce_test(): |
| t = torch.rand(5, 10).mul(2).floor() |
| weights = torch.rand(10) |
| return dict( |
| fullname='MultiLabelSoftMarginLoss_weights_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multilabel_soft_margin_loss(i, t.type_as(i), |
| weight=weights.type_as(i), reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| (-(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()) * weights).sum(dim=1) / i.size(1), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multimarginloss_no_reduce_test(): |
| t = torch.rand(5).mul(8).floor().long() |
| return dict( |
| fullname='MultiMarginLoss_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multi_margin_loss(i, t.type_as(i).long(), reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multimarginloss_1d_no_reduce_test(): |
| t = torch.rand(1).mul(8).floor().long() |
| return dict( |
| fullname='MultiMarginLoss_1d_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multi_margin_loss(i, t.type_as(i).long(), reduction='none')), |
| input_fn=lambda: torch.randn(10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multimarginloss_p_no_reduce_test(): |
| t = torch.rand(5).mul(8).floor().long() |
| return dict( |
| fullname='MultiMarginLoss_p_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multi_margin_loss(i, t.type_as(i).long(), p=2, reduction='none')), |
| input_fn=lambda: torch.randn(5, 10).clamp_(1e-2, 1 - 1e-2), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), p=2, reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multimarginloss_margin_no_reduce_test(): |
| t = torch.rand(5).mul(8).floor().long() |
| return dict( |
| fullname='MultiMarginLoss_margin_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multi_margin_loss(i, t.type_as(i).long(), margin=0.5, reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), |
| margin=0.5, reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def multimarginloss_weights_no_reduce_test(): |
| t = torch.rand(5).mul(8).floor().long() |
| weights = torch.rand(10) |
| return dict( |
| fullname='MultiMarginLoss_weights_no_reduce', |
| constructor=wrap_functional( |
| lambda i: F.multi_margin_loss(i, t.type_as(i).long(), weight=weights.type_as(i), |
| reduction='none')), |
| input_fn=lambda: torch.randn(5, 10), |
| reference_fn=lambda i, *_: |
| loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), |
| weight=weights, reduction='none'), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| pickle=False) |
| |
| |
| def fractional_max_pool2d_test(test_case): |
| random_samples = torch.DoubleTensor(1, 3, 2).uniform_() |
| if test_case == 'ratio': |
| return dict( |
| constructor=lambda: nn.FractionalMaxPool2d( |
| 2, output_ratio=0.5, _random_samples=random_samples), |
| input_size=(1, 3, 5, 7), |
| fullname='FractionalMaxPool2d_ratio') |
| elif test_case == 'size': |
| return dict( |
| constructor=lambda: nn.FractionalMaxPool2d((2, 3), output_size=( |
| 4, 3), _random_samples=random_samples), |
| input_size=(1, 3, 7, 6), |
| fullname='FractionalMaxPool2d_size') |
| |
| |
| def fractional_max_pool3d_test(test_case): |
| random_samples = torch.DoubleTensor(2, 4, 3).uniform_() |
| if test_case == 'ratio': |
| return dict( |
| constructor=lambda: nn.FractionalMaxPool3d( |
| 2, output_ratio=0.5, _random_samples=random_samples), |
| input_size=(2, 4, 5, 5, 5), |
| fullname='FractionalMaxPool3d_ratio') |
| elif test_case == 'size': |
| return dict( |
| constructor=lambda: nn.FractionalMaxPool3d((2, 2, 2), output_size=( |
| 4, 4, 4), _random_samples=random_samples), |
| input_size=(2, 4, 7, 7, 7), |
| fullname='FractionalMaxPool3d_size') |
| elif test_case == 'asymsize': |
| return dict( |
| constructor=lambda: nn.FractionalMaxPool3d((4, 2, 3), output_size=( |
| 10, 3, 2), _random_samples=random_samples), |
| input_size=(2, 4, 16, 7, 5), |
| fullname='FractionalMaxPool3d_asymsize') |
| |
| |
| new_module_tests = [ |
| poissonnllloss_no_reduce_test(), |
| bceloss_no_reduce_test(), |
| bceloss_weights_no_reduce_test(), |
| bce_with_logistic_legacy_enum_test(), |
| bce_with_logistic_no_reduce_test(), |
| bceloss_no_reduce_scalar_test(), |
| bceloss_weights_no_reduce_scalar_test(), |
| bce_with_logistic_no_reduce_scalar_test(), |
| kldivloss_with_target_no_reduce_test(), |
| kldivloss_no_reduce_test(), |
| kldivloss_no_reduce_scalar_test(), |
| l1loss_no_reduce_test(), |
| l1loss_no_reduce_scalar_test(), |
| mseloss_no_reduce_test(), |
| mseloss_no_reduce_scalar_test(), |
| nllloss_no_reduce_test(), |
| nllloss_no_reduce_ignore_index_test(), |
| nllloss_no_reduce_weights_test(), |
| nllloss_no_reduce_weights_ignore_index_test(), |
| nllloss_no_reduce_weights_ignore_index_neg_test(), |
| nllloss2d_no_reduce_test(), |
| nllloss2d_no_reduce_weights_test(), |
| nllloss2d_no_reduce_ignore_index_test(), |
| nlllossNd_no_reduce_test(), |
| nlllossNd_no_reduce_weights_test(), |
| nlllossNd_no_reduce_ignore_index_test(), |
| smoothl1loss_no_reduce_test(), |
| smoothl1loss_no_reduce_scalar_test(), |
| multilabelmarginloss_1d_no_reduce_test(), |
| multilabelmarginloss_index_neg_test(), |
| multilabelmarginloss_no_reduce_test(), |
| hingeembeddingloss_no_reduce_test(), |
| hingeembeddingloss_margin_no_reduce_test(), |
| softmarginloss_no_reduce_test(), |
| multilabelsoftmarginloss_no_reduce_test(), |
| multilabelsoftmarginloss_weights_no_reduce_test(), |
| multimarginloss_no_reduce_test(), |
| multimarginloss_1d_no_reduce_test(), |
| multimarginloss_p_no_reduce_test(), |
| multimarginloss_margin_no_reduce_test(), |
| multimarginloss_weights_no_reduce_test(), |
| fractional_max_pool2d_test('ratio'), |
| fractional_max_pool2d_test('size'), |
| fractional_max_pool3d_test('ratio'), |
| fractional_max_pool3d_test('size'), |
| fractional_max_pool3d_test('asymsize'), |
| dict( |
| module_name='BatchNorm1d', |
| constructor_args=(10,), |
| input_size=(4, 10), |
| cudnn=True, |
| check_eval=True, |
| desc='affine', |
| test_cuda=(not TEST_WITH_ROCM), |
| ), |
| dict( |
| module_name='BatchNorm1d', |
| constructor_args=(5,), |
| input_size=(4, 5, 3), |
| cudnn=True, |
| check_eval=True, |
| desc='3d_input', |
| ), |
| dict( |
| module_name='BatchNorm1d', |
| constructor_args=(10, 1e-3, None), |
| input_size=(4, 10), |
| cudnn=True, |
| check_eval=True, |
| desc='affine_simple_average', |
| test_cuda=(not TEST_WITH_ROCM), |
| ), |
| dict( |
| module_name='BatchNorm1d', |
| constructor_args=(10, 1e-3, 0.3, False), |
| input_size=(4, 10), |
| cudnn=True, |
| check_eval=True, |
| desc='not_affine', |
| ), |
| dict( |
| module_name='BatchNorm1d', |
| constructor_args=(10, 1e-3, 0.3, True, False), |
| input_size=(4, 10), |
| cudnn=True, |
| check_eval=True, |
| desc='not_tracking_stats', |
| test_cuda=(not TEST_WITH_ROCM), |
| ), |
| dict( |
| module_name='BatchNorm1d', |
| constructor_args=(5, 1e-3, 0.3, False), |
| input_size=(4, 5, 3), |
| cudnn=True, |
| check_eval=True, |
| desc='3d_input_not_affine', |
| ), |
| dict( |
| module_name='BatchNorm2d', |
| constructor_args=(3,), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| ), |
| dict( |
| module_name='BatchNorm2d', |
| constructor_args=(3, 1e-3, None), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| desc='2d_simple_average', |
| ), |
| dict( |
| module_name='BatchNorm2d', |
| constructor_args=(3, 1e-3, 0.8), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| desc='momentum', |
| ), |
| dict( |
| module_name='BatchNorm2d', |
| constructor_args=(3, 1e-3, 0.8, False), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| desc='not_affine', |
| ), |
| dict( |
| module_name='BatchNorm2d', |
| constructor_args=(3, 1e-3, 0.8, True, False), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| desc='not_tracking_stats', |
| ), |
| dict( |
| module_name='BatchNorm3d', |
| constructor_args=(3,), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| ), |
| dict( |
| module_name='BatchNorm3d', |
| constructor_args=(3, 1e-3, None), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| desc='3d_simple_average', |
| ), |
| dict( |
| module_name='BatchNorm3d', |
| constructor_args=(3, 1e-3, 0.7), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| desc='momentum', |
| ), |
| dict( |
| module_name='BatchNorm3d', |
| constructor_args=(3, 1e-3, 0.7, False), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| desc='not_affine', |
| ), |
| dict( |
| module_name='BatchNorm3d', |
| constructor_args=(3, 1e-3, 0.7, True, False), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| desc='not_tracking_stats', |
| ), |
| dict( |
| module_name='InstanceNorm1d', |
| constructor_args=(3, 1e-3, 0.3), |
| input_size=(4, 3, 15), |
| cudnn=True, |
| check_eval=True, |
| ), |
| dict( |
| module_name='InstanceNorm1d', |
| constructor_args=(3, 1e-3, 0.3, False, True), |
| input_size=(4, 3, 15), |
| cudnn=True, |
| check_eval=True, |
| desc='tracking_stats', |
| ), |
| dict( |
| module_name='InstanceNorm2d', |
| constructor_args=(3, 1e-3, 0.3), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| ), |
| dict( |
| module_name='InstanceNorm2d', |
| constructor_args=(3, 1e-3, 0.3, False, True), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| check_eval=True, |
| desc='tracking_stats', |
| ), |
| dict( |
| module_name='InstanceNorm3d', |
| constructor_args=(3, 1e-3, 0.3), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| ), |
| dict( |
| module_name='InstanceNorm3d', |
| constructor_args=(3, 1e-3, 0.3, False, True), |
| input_size=(2, 3, 4, 4, 4), |
| cudnn=True, |
| check_eval=True, |
| desc='tracking_stats', |
| ), |
| dict( |
| module_name='LayerNorm', |
| constructor_args=([5], 1e-3), |
| input_size=(4, 5, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='1d_elementwise_affine', |
| ), |
| dict( |
| module_name='LayerNorm', |
| constructor_args=([5], 1e-3, False), |
| input_size=(4, 5, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='1d_no_elementwise_affine', |
| ), |
| dict( |
| module_name='LayerNorm', |
| constructor_args=([2, 2, 5], 1e-3), |
| input_size=(4, 2, 2, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='3d_elementwise_affine', |
| ), |
| dict( |
| module_name='LayerNorm', |
| constructor_args=([2, 2, 5], 1e-3, False), |
| input_size=(4, 2, 2, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='3d_no_elementwise_affine', |
| ), |
| dict( |
| module_name='GroupNorm', |
| constructor_args=(3, 6, 1e-3), |
| input_size=(4, 6, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='1d_affine', |
| ), |
| dict( |
| module_name='GroupNorm', |
| constructor_args=(5, 5, 1e-3, False), |
| input_size=(4, 5, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='1d_no_affine_IN', # this setting is equivalent with InstanceNormi |
| ), |
| dict( |
| module_name='GroupNorm', |
| constructor_args=(1, 5, 1e-3, False), |
| input_size=(4, 5, 5), |
| cudnn=True, |
| check_eval=True, |
| desc='1d_no_affine_LN', # this setting is equivalent with LayerNorm |
| ), |
| dict( |
| module_name='GroupNorm', |
| constructor_args=(3, 6, 1e-3), |
| input_size=(4, 6, 2, 3), |
| cudnn=True, |
| check_eval=True, |
| desc='2d_affine', |
| ), |
| dict( |
| module_name='GroupNorm', |
| constructor_args=(3, 3, 1e-3, False), |
| input_size=(4, 3, 2, 3), |
| cudnn=True, |
| check_eval=True, |
| desc='2d_no_affine_IN', # this setting is equivalent with InstanceNorm |
| ), |
| dict( |
| module_name='GroupNorm', |
| constructor_args=(1, 3, 1e-3, False), |
| input_size=(4, 3, 2, 3), |
| cudnn=True, |
| check_eval=True, |
| desc='2d_no_affine_LN', # this setting is equivalent with LayerNorm |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(4, 5, 3), |
| input_size=(2, 4, 10), |
| cudnn=True, |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(4, 5, 3, 2), |
| input_size=(2, 4, 10), |
| cudnn=True, |
| desc='stride', |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(4, 5, 3, 1, 1), |
| input_size=(2, 4, 10), |
| cudnn=True, |
| desc='pad1', |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(4, 5, 5, 1, 2), |
| input_size=(2, 4, 10), |
| cudnn=True, |
| desc='pad2', |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(4, 4, 3, 1, 1), |
| input_size=(1, 4, 1), |
| cudnn=True, |
| desc='pad1size1', |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(4, 4, 5, 1, 2), |
| input_size=(1, 4, 1), |
| cudnn=True, |
| desc='pad2size1', |
| ), |
| dict( |
| fullname='Conv1d_dilated', |
| constructor=lambda: nn.Conv1d(4, 5, kernel_size=3, dilation=2), |
| input_size=(2, 4, 10), |
| ), |
| dict( |
| fullname='Conv1d_groups', |
| constructor=lambda: nn.Conv1d(4, 6, kernel_size=3, groups=2), |
| input_size=(2, 4, 6), |
| cudnn=True, |
| ), |
| dict( |
| fullname='ConvTranspose1d', |
| constructor=lambda: nn.ConvTranspose1d(3, 4, kernel_size=3, stride=(3,), padding=1, output_padding=(1,)), |
| cudnn=True, |
| input_size=(1, 3, 7), |
| ), |
| dict( |
| module_name='ConvTranspose1d', |
| constructor_args=(3, 4, 3, 2, 1, 1, 1, False), |
| input_size=(1, 3, 6), |
| cudnn=True, |
| desc='no_bias', |
| ), |
| dict( |
| module_name='ConvTranspose1d', |
| constructor_args=(3, 4, 3, 2, 1, 1, 1, True, 2), |
| input_size=(1, 3, 6), |
| cudnn=True, |
| desc='dilated', |
| ), |
| dict( |
| fullname='ConvTranspose1d_groups', |
| constructor=lambda: nn.ConvTranspose1d(4, 6, 3, stride=(3,), padding=1, output_padding=(1,), groups=2), |
| cudnn=True, |
| input_size=(2, 4, 7), |
| ), |
| dict( |
| module_name='MaxPool1d', |
| constructor_args=(4,), |
| input_size=(2, 10, 4), |
| ), |
| dict( |
| module_name='MaxPool1d', |
| constructor_args=(4, 4), |
| input_size=(2, 10, 4), |
| desc='stride', |
| ), |
| dict( |
| module_name='Conv2d', |
| constructor_args=(3, 4, (3, 2)), |
| input_size=(2, 3, 7, 5), |
| cudnn=True, |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv2d', |
| constructor_args=(3, 4, (3, 3), (2, 2)), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| desc='strided', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv2d', |
| constructor_args=(3, 4, (3, 3), (2, 2), (1, 1)), |
| input_size=(2, 3, 6, 6), |
| cudnn=True, |
| desc='padding', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv2d', |
| constructor_args=(3, 2, (3, 3), (2, 2), (1, 1), (2, 2)), |
| input_size=(2, 3, 8, 8), |
| cudnn=True, |
| desc='dilated', |
| ), |
| dict( |
| module_name='Conv2d', |
| constructor_args=(3, 4, (3, 2), 1, 0, 1, 1, False), |
| input_size=(2, 3, 6, 5), |
| cudnn=True, |
| desc='no_bias', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| fullname='Conv2d_groups', |
| constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2), |
| input_size=(2, 4, 6, 5), |
| cudnn=True, |
| check_with_long_tensor=True, |
| ), |
| dict( |
| fullname='Conv2d_groups_thnn', |
| constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2), |
| input_size=(2, 4, 6, 5), |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='ConvTranspose2d', |
| constructor_args=(3, 4, 3, (3, 2), 1, (1, 1)), |
| cudnn=True, |
| input_size=(1, 3, 7, 6), |
| ), |
| dict( |
| module_name='ConvTranspose2d', |
| constructor_args=(3, 4, 3, (2, 3), 1, (1, 1), 1, False, (2, 2)), |
| input_size=(1, 3, 6, 7), |
| cudnn=True, |
| desc='dilated', |
| ), |
| dict( |
| module_name='ConvTranspose2d', |
| constructor_args=(3, 4, 3, (2, 3), 1, (1, 1), 1, False), |
| input_size=(1, 3, 6, 7), |
| cudnn=True, |
| desc='no_bias', |
| ), |
| dict( |
| fullname='ConvTranspose2d_groups', |
| constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2), |
| input_size=(1, 2, 4, 5), |
| cudnn=True, |
| ), |
| dict( |
| fullname='Conv2d_depthwise', |
| constructor=lambda: nn.Conv2d(4, 4, (3, 3), groups=4), |
| input_size=(2, 4, 6, 6), |
| ), |
| dict( |
| fullname='Conv2d_depthwise_with_multiplier', |
| constructor=lambda: nn.Conv2d(4, 8, (3, 3), groups=4), |
| input_size=(2, 4, 6, 6), |
| ), |
| dict( |
| fullname='Conv2d_depthwise_strided', |
| constructor=lambda: nn.Conv2d(4, 4, (3, 3), stride=(2, 2), groups=4), |
| input_size=(2, 4, 6, 6), |
| ), |
| dict( |
| fullname='Conv2d_depthwise_padded', |
| constructor=lambda: nn.Conv2d(4, 4, (3, 3), padding=(1, 1), groups=4), |
| input_size=(2, 4, 6, 6), |
| ), |
| dict( |
| fullname='Conv2d_depthwise_dilated', |
| constructor=lambda: nn.Conv2d(4, 4, (2, 2), dilation=(2, 2), groups=4), |
| input_size=(2, 4, 5, 5), |
| ), |
| dict( |
| module_name='MaxPool2d', |
| constructor_args=((3, 3), (2, 2), (1, 1)), |
| input_size=(1, 3, 7, 7), |
| ), |
| dict( |
| module_name='AvgPool1d', |
| constructor_args=(2,), |
| input_size=(2, 3, 6), |
| ), |
| dict( |
| module_name='AvgPool1d', |
| constructor_args=((2,), (2,)), |
| input_size=(2, 3, 6), |
| desc='stride', |
| ), |
| dict( |
| module_name='AvgPool1d', |
| constructor_args=(2, 2, 1), |
| input_size=(2, 3, 6), |
| desc='stride_pad', |
| ), |
| dict( |
| module_name='AvgPool2d', |
| constructor_args=((2, 2),), |
| input_size=(2, 3, 6, 6), |
| ), |
| dict( |
| module_name='AvgPool2d', |
| constructor_args=((2, 2), (2, 2)), |
| input_size=(2, 3, 6, 6), |
| desc='stride', |
| ), |
| dict( |
| module_name='AvgPool2d', |
| constructor_args=((2, 2), (2, 2), (1, 1)), |
| input_size=(2, 3, 6, 6), |
| desc='stride_pad', |
| ), |
| dict( |
| module_name='LPPool2d', |
| constructor_args=(2, 2, 2), |
| input_size=(1, 3, 7, 7), |
| ), |
| dict( |
| module_name='LPPool2d', |
| constructor_args=(1.5, 2), |
| input_fn=lambda: torch.rand(1, 3, 7, 7), |
| desc='norm', |
| ), |
| dict( |
| module_name='LPPool1d', |
| constructor_args=(1.5, 2), |
| input_fn=lambda: torch.rand(1, 3, 7), |
| desc='norm', |
| ), |
| dict( |
| module_name='LPPool1d', |
| constructor_args=(2, 2, 3), |
| input_size=(1, 3, 7), |
| ), |
| dict( |
| module_name='LocalResponseNorm', |
| constructor_args=(3, ), |
| input_size=(1, 5, 7), |
| desc='1d', |
| ), |
| dict( |
| module_name='LocalResponseNorm', |
| constructor_args=(2, ), |
| input_size=(1, 5, 7, 7), |
| desc='2d_uneven_pad', |
| ), |
| dict( |
| module_name='LocalResponseNorm', |
| constructor_args=(1, 1., 0.5, 2.), |
| input_size=(1, 5, 7, 7, 7), |
| desc='3d_custom_params', |
| ), |
| dict( |
| module_name='ReflectionPad1d', |
| constructor_args=((1, 2),), |
| input_size=(2, 3, 8), |
| ), |
| dict( |
| module_name='ReflectionPad2d', |
| constructor_args=((1, 2, 3, 4),), |
| input_size=(2, 3, 8, 8), |
| ), |
| dict( |
| module_name='ReplicationPad1d', |
| constructor_args=((1, 2),), |
| input_size=(2, 3, 4), |
| ), |
| dict( |
| module_name='ReplicationPad2d', |
| constructor_args=((1, 2, 3, 4),), |
| input_size=(2, 3, 4, 4), |
| ), |
| dict( |
| module_name='ZeroPad2d', |
| constructor_args=((1, 2, 3, 4),), |
| input_size=(2, 3, 4, 4) |
| ), |
| dict( |
| module_name='ZeroPad2d', |
| constructor_args=((-1, -1, -1, -2),), |
| input_size=(2, 3, 4, 4), |
| desc='negative_dims' |
| ), |
| dict( |
| module_name='ConstantPad1d', |
| constructor_args=((1, 2), 2.), |
| input_size=(2, 3, 4) |
| ), |
| dict( |
| module_name='ConstantPad2d', |
| constructor_args=((1, 2, 3, 4), 2.), |
| input_size=(2, 3, 4, 4) |
| ), |
| dict( |
| module_name='ConstantPad3d', |
| constructor_args=((1, 2, 3, 4, 1, 0), 2.), |
| input_size=(2, 3, 4, 4, 5) |
| ), |
| dict( |
| module_name='Conv3d', |
| constructor_args=(3, 4, (2, 3, 4)), |
| input_size=(2, 3, 3, 4, 5), |
| cudnn=True, |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv3d', |
| constructor_args=(3, 4, (2, 3, 4), 1, 0, 1, 1, False), |
| input_size=(2, 3, 3, 4, 5), |
| cudnn=True, |
| desc='no_bias', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv3d', |
| constructor_args=(3, 4, 2, 2), |
| input_size=(2, 3, 5, 5, 5), |
| cudnn=True, |
| desc='stride', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv3d', |
| constructor_args=(3, 4, 2, 2, 1), |
| input_size=(2, 3, 5, 5, 5), |
| cudnn=True, |
| desc='stride_padding', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| fullname='Conv3d_groups', |
| constructor=lambda: nn.Conv3d(4, 6, kernel_size=3, groups=2), |
| input_size=(2, 4, 4, 5, 4), |
| cudnn=True, |
| check_with_long_tensor=True, |
| ), |
| dict( |
| fullname='Conv3d_dilated', |
| constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2), |
| input_size=(2, 3, 5, 5, 5), |
| ), |
| dict( |
| fullname='Conv3d_dilated_strided', |
| constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2, stride=2), |
| input_size=(2, 3, 5, 5, 5), |
| ), |
| dict( |
| module_name='ConvTranspose3d', |
| constructor_args=(2, 3, (2, 3, 2)), |
| cudnn=True, |
| input_size=(1, 2, 4, 5, 4), |
| ), |
| dict( |
| module_name='ConvTranspose3d', |
| constructor_args=(2, 3, (2, 3, 2), 1, 0, 0, 1, True, (2, 2, 2)), |
| cudnn=True, |
| input_size=(1, 2, 4, 5, 4), |
| desc='dilated', |
| ), |
| dict( |
| module_name='MaxPool3d', |
| constructor_args=((2, 2, 2),), |
| input_size=(2, 3, 5, 5, 5), |
| ), |
| dict( |
| module_name='MaxPool3d', |
| constructor_args=(2, (2, 2, 2)), |
| input_size=(2, 3, 5, 5, 5), |
| desc='stride', |
| ), |
| dict( |
| module_name='MaxPool3d', |
| constructor_args=(2, 2, (1, 1, 1)), |
| input_size=(2, 3, 5, 5, 5), |
| desc='stride_padding', |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=((2, 2, 2),), |
| input_size=(2, 3, 4, 4, 4), |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=(2, (2, 2, 2)), |
| input_size=(2, 3, 5, 5, 5), |
| desc='stride', |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=(2, 2, (1, 1, 1)), |
| input_size=(2, 3, 5, 5, 5), |
| desc='stride_pad', |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=(4, 2, (1, 2, 1)), |
| input_size=(2, 3, 5, 5, 5), |
| desc='stride_pad_gpu_fixedkw_output', |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=((2, 4, 8), 1, (1, 1, 2)), |
| input_size=(2, 3, 2, 4, 8), |
| desc='stride_pad_gpu_general_output', |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=(3, 1, 0), |
| input_size=(2, 3, 4, 4, 4), |
| desc='stride1_pad0_gpu_input', |
| ), |
| dict( |
| module_name='AvgPool3d', |
| constructor_args=(2, 2, (1, 1, 1)), |
| input_size=(2, 3, 4, 4, 4), |
| desc='stride_pad_gpu_input_nooverlap', |
| ), |
| dict( |
| module_name='ReplicationPad3d', |
| constructor_args=((1, 2, 3, 4, 5, 6),), |
| input_size=(2, 3, 5, 5, 5), |
| ), |
| dict( |
| module_name='Embedding', |
| constructor_args=(4, 3), |
| input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4), |
| jacobian_input=False, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='EmbeddingBag', |
| constructor_args=(4, 3), |
| input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4), |
| jacobian_input=False, |
| check_gradgrad=False, |
| desc='mean', |
| ), |
| dict( |
| module_name='EmbeddingBag', |
| constructor_args=(4, 3, None, 2., False, 'sum'), |
| input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4), |
| jacobian_input=False, |
| check_gradgrad=False, |
| desc='sum', |
| ), |
| dict( |
| module_name='EmbeddingBag', |
| constructor_args=(4, 3, None, 2., False, 'max'), |
| input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4), |
| jacobian_input=False, |
| check_gradgrad=False, |
| desc='max', |
| ), |
| dict( |
| fullname='EmbeddingBag_sparse', |
| constructor=lambda: nn.EmbeddingBag(4, 3, sparse=True), |
| input_fn=lambda: torch.randperm(2).repeat(1, 2), |
| jacobian_input=False, |
| check_gradgrad=False, |
| ), |
| dict( |
| constructor=lambda: nn.Embedding(4, 3, sparse=True), |
| input_fn=lambda: torch.randperm(2).repeat(1, 2), |
| jacobian_input=False, |
| fullname='Embedding_sparse', |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='PixelShuffle', |
| constructor_args=(3,), |
| input_size=(1, 9, 4, 4), |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='nearest'), |
| input_size=(1, 2, 4), |
| fullname='interpolate_nearest_1d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(12, ), scale_factor=None, mode='nearest'), |
| input_size=(1, 2, 3), |
| fullname='interpolate_nearest_tuple_1d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='nearest'), |
| input_size=(1, 2, 4), |
| fullname='interpolate_nearest_scale_1d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='linear', align_corners=False), |
| input_size=(1, 2, 4), |
| fullname='interpolate_linear_1d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, ), scale_factor=None, mode='linear', align_corners=False), |
| input_size=(1, 2, 3), |
| fullname='interpolate_linear_tuple_1d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='linear', align_corners=False), |
| input_size=(1, 2, 4), |
| fullname='interpolate_linear_scale_1d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='linear', align_corners=True), |
| input_size=(1, 2, 4), |
| fullname='interpolate_linear_1d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='linear', align_corners=True), |
| input_size=(1, 2, 4), |
| fullname='interpolate_linear_scale_1d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='nearest'), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_nearest_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(12, 16), scale_factor=None, mode='nearest'), |
| input_size=(1, 2, 3, 4), |
| fullname='interpolate_nearest_tuple_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='nearest'), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_nearest_scale_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='bilinear', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bilinear_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None, |
| mode='bilinear', align_corners=False), |
| input_size=(1, 2, 2, 3), |
| fullname='interpolate_bilinear_tuple_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., |
| mode='bilinear', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bilinear_scale_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 2.), |
| mode='bilinear', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bilinear_scale_tuple_shared_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.), |
| mode='bilinear', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bilinear_scale_tuple_skewed_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None, mode='bilinear', align_corners=True), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bilinear_tuple_2d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.), |
| mode='bilinear', align_corners=True), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bilinear_scale_tuple_skewed_2d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='bicubic', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bicubic_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None, |
| mode='bicubic', align_corners=False), |
| input_size=(1, 2, 2, 3), |
| fullname='interpolate_bicubic_tuple_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='bicubic', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bicubic_scale_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 2.), |
| mode='bicubic', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bicubic_scale_tuple_shared_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.), |
| mode='bicubic', align_corners=False), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bicubic_scale_tuple_skewed_2d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None, mode='bicubic', align_corners=True), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bicubic_tuple_2d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.), |
| mode='bicubic', align_corners=True), |
| input_size=(1, 2, 4, 4), |
| fullname='interpolate_bicubic_scale_tuple_skewed_2d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='nearest'), |
| input_size=(1, 2, 4, 4, 4), |
| fullname='interpolate_nearest_3d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(12, 16, 16), scale_factor=None, mode='nearest'), |
| input_size=(1, 2, 3, 4, 4), |
| fullname='interpolate_nearest_tuple_3d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='nearest'), |
| input_size=(1, 2, 4, 4, 4), |
| fullname='interpolate_nearest_scale_3d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='trilinear', align_corners=False), |
| input_size=(1, 2, 4, 4, 4), |
| fullname='interpolate_trilinear_3d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, 6, 6), |
| scale_factor=None, mode='trilinear', align_corners=False), |
| input_size=(1, 2, 2, 3, 3), |
| fullname='interpolate_trilinear_tuple_3d', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=3., mode='trilinear', align_corners=False), |
| input_size=(1, 2, 3, 4, 4), |
| fullname='interpolate_trilinear_scale_3d', |
| # See https://github.com/pytorch/pytorch/issues/5006 |
| precision=3e-4, |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=(4, 6, 6), scale_factor=None, |
| mode='trilinear', align_corners=True), |
| input_size=(1, 2, 2, 3, 3), |
| fullname='interpolate_trilinear_tuple_3d_align_corners', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.interpolate, size=None, scale_factor=3., mode='trilinear', align_corners=True), |
| input_size=(1, 2, 3, 4, 4), |
| fullname='interpolate_trilinear_scale_3d_align_corners', |
| # See https://github.com/pytorch/pytorch/issues/5006 |
| precision=3e-4, |
| pickle=False, |
| ), |
| dict( |
| module_name='AdaptiveMaxPool1d', |
| constructor_args=(3,), |
| input_fn=lambda: _rand_tensor_non_equal(1, 3, 5), |
| ), |
| dict( |
| module_name='AdaptiveMaxPool2d', |
| constructor_args=(3,), |
| input_fn=lambda: _rand_tensor_non_equal(1, 3, 5, 6), |
| desc='single', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool2d', |
| constructor_args=((3, 4),), |
| input_fn=lambda: _rand_tensor_non_equal(1, 3, 5, 6), |
| desc='tuple', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool2d', |
| constructor_args=((3, None),), |
| input_fn=lambda: _rand_tensor_non_equal(1, 3, 5, 6), |
| desc='tuple_none', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool3d', |
| constructor_args=(3,), |
| input_fn=lambda: _rand_tensor_non_equal(2, 3, 5, 6, 7), |
| desc='single', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool3d', |
| constructor_args=((3, 4, 5),), |
| input_fn=lambda: _rand_tensor_non_equal(2, 3, 5, 6, 7), |
| desc='tuple', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool3d', |
| constructor_args=((3, None, 5),), |
| input_fn=lambda: _rand_tensor_non_equal(2, 3, 5, 6, 7), |
| desc='tuple_none', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool3d', |
| constructor_args=(3,), |
| input_fn=lambda: _rand_tensor_non_equal(2, 3, 12, 9, 3), |
| desc='single_nonatomic', |
| ), |
| dict( |
| module_name='AdaptiveMaxPool3d', |
| constructor_args=((3, 4, 5),), |
| input_fn=lambda: _rand_tensor_non_equal(2, 3, 6, 4, 10), |
| desc='tuple_nonatomic', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool1d', |
| constructor_args=(3,), |
| input_fn=lambda: torch.rand(1, 3, 5), |
| ), |
| dict( |
| module_name='AdaptiveAvgPool1d', |
| constructor_args=(1,), |
| input_fn=lambda: torch.rand(1, 3, 5), |
| desc='one_output', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool2d', |
| constructor_args=(3,), |
| input_fn=lambda: torch.rand(1, 3, 5, 6), |
| desc='single', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool2d', |
| constructor_args=(1,), |
| input_fn=lambda: torch.rand(1, 3, 5, 6), |
| desc='single_1x1output', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool2d', |
| constructor_args=((3, 4),), |
| input_fn=lambda: torch.rand(1, 3, 5, 6), |
| desc='tuple', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool2d', |
| constructor_args=((3, None),), |
| input_fn=lambda: torch.rand(1, 3, 5, 6), |
| desc='tuple_none', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool3d', |
| constructor_args=(3,), |
| input_fn=lambda: torch.rand(2, 3, 5, 2, 7), |
| desc='single', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool3d', |
| constructor_args=((3, 4, 5),), |
| input_fn=lambda: torch.rand(2, 3, 5, 3, 7), |
| desc='tuple', |
| ), |
| dict( |
| module_name='AdaptiveAvgPool3d', |
| constructor_args=((None, 4, 5),), |
| input_fn=lambda: torch.rand(2, 3, 5, 3, 7), |
| desc='tuple_none', |
| ), |
| dict( |
| module_name='SELU', |
| input_size=(3, 2, 5), |
| check_inplace=True |
| ), |
| dict( |
| module_name='SELU', |
| input_size=(), |
| check_inplace=True, |
| desc='scalar' |
| ), |
| dict( |
| module_name='CELU', |
| input_size=(3, 2, 5), |
| constructor_args=(2.,), |
| check_inplace=True, |
| reference_fn=lambda x, *_: torch.where(x >= 0, x, 2. * ((.5 * x).exp() - 1)), |
| ), |
| dict( |
| module_name='CELU', |
| input_size=(), |
| constructor_args=(2.,), |
| check_inplace=True, |
| reference_fn=lambda x, *_: torch.where(x >= 0, x, 2. * ((.5 * x).exp() - 1)), |
| desc='scalar' |
| ), |
| dict( |
| module_name='GLU', |
| input_size=(5, 6), |
| ), |
| dict( |
| module_name='GLU', |
| constructor_args=(1,), |
| input_size=(5, 6, 7), |
| desc='dim', |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=-1), |
| input_size=(2, 128), # trigger the last-dim algo in CUDA |
| fullname='softmax_lastdim', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=1, dtype=torch.float64), |
| input_size=(2, 128), |
| fullname='softmax_lastdim_dtype', |
| pickle=False, |
| test_cuda=False |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=1), |
| input_size=(2, 128, 2, 2), # trigger special case of spatial CUDA algo |
| fullname='softmax_spatial_special', |
| pickle=False, |
| test_cuda=(not TEST_WITH_ROCM) |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=1), |
| input_size=(2, 2, 4, 4), # regular spatial algorithm |
| fullname='softmax_spatial', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=1, dtype=torch.float64), |
| input_size=(2, 2, 4, 4), # regular spatial algorithm |
| fullname='softmax_spatial_dtype', |
| pickle=False, |
| test_cuda=False |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=0), |
| input_size=(2, 3, 4, 5), |
| fullname='softmax_functional_dim0', |
| test_cuda=False, |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=3), |
| input_size=(2, 3, 4, 5), |
| fullname='softmax_functional_dim3', |
| test_cuda=False, |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.softmax, dim=-1), |
| input_size=(), |
| fullname='softmax_functional_scalar', |
| test_cuda=False, |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.log_softmax, dim=-1), |
| input_size=(2, 128), # trigger the last-dim algo in CUDA |
| fullname='log_softmax_lastdim', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.log_softmax, dim=1), |
| input_size=(2, 128, 2, 2), # trigger special case of spatial CUDA algo |
| fullname='log_softmax_spatial_special', |
| pickle=False, |
| test_cuda=(not TEST_WITH_ROCM) |
| ), |
| dict( |
| constructor=wrap_functional(F.log_softmax, dim=1), |
| input_size=(2, 2, 4, 4), # regular spatial algorithm |
| fullname='log_softmax_spatial', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.log_softmax, dim=0), |
| input_size=(2, 3, 4, 5), |
| fullname='log_softmax_dim0', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.log_softmax, dim=3), |
| input_size=(2, 3, 4, 5), |
| fullname='log_softmax_dim3', |
| pickle=False, |
| ), |
| dict( |
| constructor=wrap_functional(F.log_softmax, dim=0), |
| input_size=(), |
| fullname='log_softmax_scalar', |
| pickle=False, |
| ), |
| dict( |
| fullname='Unfold', |
| constructor=lambda: nn.Unfold((2, 2), (1, 1), (0, 0), (1, 1)), |
| input_size=(2, 4, 3, 3), |
| check_gradgrad=False, |
| test_cuda=True, |
| ), |
| dict( |
| fullname='Fold', |
| constructor=lambda: nn.Fold((3, 3), (2, 2), (1, 1), (0, 0), (1, 1)), |
| input_size=(2, 16, 4), |
| check_gradgrad=False, |
| test_cuda=True, |
| ), |
| dict( |
| fullname='Unfold_int_input', |
| constructor=lambda: nn.Unfold(2, 1, 0, 1), |
| input_size=(2, 4, 3, 3), |
| check_gradgrad=False, |
| test_cuda=True, |
| ), |
| dict( |
| fullname='Fold_int_input', |
| constructor=lambda: nn.Fold(3, 2, 1, 0, 1), |
| input_size=(2, 16, 4), |
| check_gradgrad=False, |
| test_cuda=True, |
| ), |
| dict( |
| module_name='Threshold', |
| constructor_args=(2., 1.), |
| input_size=(), |
| check_inplace=True, |
| desc='threshold_value_scalar' |
| ), |
| |
| dict( |
| module_name='ReLU', |
| input_size=(), |
| check_inplace=True, |
| desc='scalar' |
| ), |
| dict( |
| module_name='ReLU6', |
| input_size=(), |
| check_inplace=True, |
| desc='scalar' |
| ), |
| dict( |
| module_name='RReLU', |
| constructor_args=(0.1, 0.9), |
| input_size=(), |
| desc='with_up_down_scalar', |
| test_cuda=False, |
| ), |
| dict( |
| module_name='Hardtanh', |
| input_size=(), |
| reference_fn=lambda i, *_: i.clamp(-1, 1), |
| desc='scalar' |
| ), |
| dict( |
| module_name='Sigmoid', |
| input_size=(), |
| desc='scalar', |
| ), |
| dict( |
| module_name='Tanh', |
| input_size=(), |
| desc='scalar', |
| ), |
| dict( |
| module_name='Softmax', |
| constructor_args=(0,), |
| input_size=(), |
| reference_fn=lambda i, *_: torch.exp(i).div(torch.exp(i).sum(0, True)), |
| desc='scalar', |
| ), |
| dict( |
| module_name='LogSoftmax', |
| constructor_args=(0,), |
| input_size=(), |
| reference_fn=lambda i, *_: torch.exp(i).div_(torch.exp(i).sum(0, False)).log_(), |
| desc='multiparam_scalar', |
| ), |
| dict( |
| module_name='ELU', |
| constructor_args=(2.,), |
| input_size=(), |
| desc='scalar', |
| ), |
| dict( |
| module_name='Hardshrink', |
| constructor_args=(2.,), |
| input_size=(), |
| desc='scalar', |
| ), |
| dict( |
| module_name='LeakyReLU', |
| constructor_args=(0.5,), |
| input_size=(), |
| check_inplace=True, |
| desc='with_negval_scalar' |
| ), |
| dict( |
| module_name='LogSigmoid', |
| input_size=(), |
| reference_fn=lambda i, *_: i.sigmoid().log(), |
| desc='scalar' |
| ), |
| dict( |
| module_name='Softplus', |
| constructor_args=(2, -100), |
| input_size=(), |
| reference_fn=(lambda i, *_: ((i * 2) > -100).type_as(i) * i + |
| ((i * 2) <= -100).type_as(i) * 1. / 2. * torch.log(1 + torch.exp(2 * i))), |
| desc='beta_threshold_scalar', |
| ), |
| dict( |
| module_name='Softshrink', |
| constructor_args=(1,), |
| input_size=(), |
| desc='lambda_scalar', |
| ), |
| dict( |
| module_name='PReLU', |
| input_size=(), |
| reference_fn=lambda i, p, _: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], |
| desc='scalar', |
| ), |
| dict( |
| module_name='Softsign', |
| input_size=(), |
| reference_fn=lambda i, *_: i.div(1 + torch.abs(i)), |
| desc='scalar', |
| ), |
| dict( |
| module_name='Softmin', |
| constructor_args=(0,), |
| input_size=(), |
| desc='scalar', |
| ), |
| dict( |
| module_name='Tanhshrink', |
| input_size=(), |
| desc='scalar', |
| ), |
| dict( |
| fullname='Padding12_1dcircular', |
| constructor=wrap_functional(F.pad, pad=(1, 2), mode='circular'), |
| input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 2, 3]), |
| reference_fn=lambda i, *_: padding1d_circular(i, (1, 2)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding31_1dcircular', |
| constructor=wrap_functional(F.pad, pad=(3, 1), mode='circular'), |
| input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 2, 3]), |
| reference_fn=lambda i, *_: padding1d_circular(i, (3, 1)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding33_1dcircular', |
| constructor=wrap_functional(F.pad, pad=(3, 3), mode='circular'), |
| input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 2, 3]), |
| reference_fn=lambda i, *_: padding1d_circular(i, (3, 3)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding1221_2dcircular', |
| constructor=wrap_functional(F.pad, pad=(1, 2, 2, 1), mode='circular'), |
| input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 1, 2, 3]), |
| reference_fn=lambda i, *_: padding2d_circular(i, (1, 2, 2, 1)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding2322_2dcircular', |
| constructor=wrap_functional(F.pad, pad=(2, 3, 2, 2), mode='circular'), |
| input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 1, 2, 3]), |
| reference_fn=lambda i, *_: padding2d_circular(i, (2, 3, 2, 2)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding3331_2dcircular', |
| constructor=wrap_functional(F.pad, pad=(3, 3, 3, 1), mode='circular'), |
| input_fn=lambda: torch.arange(9, out=torch.DoubleTensor()).reshape([1, 1, 3, 3]), |
| reference_fn=lambda i, *_: padding2d_circular(i, (3, 3, 3, 1)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding122112_3dcircular', |
| constructor=wrap_functional(F.pad, pad=(1, 2, 2, 1, 1, 2), mode='circular'), |
| input_fn=lambda: torch.arange(12, out=torch.DoubleTensor()).reshape([1, 1, 2, 2, 3]), |
| reference_fn=lambda i, *_: padding3d_circular(i, (1, 2, 2, 1, 1, 2)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding322112_3dcircular', |
| constructor=wrap_functional(F.pad, pad=(3, 2, 2, 1, 1, 2), mode='circular'), |
| input_fn=lambda: torch.arange(12, out=torch.DoubleTensor()).reshape([1, 1, 2, 2, 3]), |
| reference_fn=lambda i, *_: padding3d_circular(i, (3, 2, 2, 1, 1, 2)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| dict( |
| fullname='Padding332122_3dcircular', |
| constructor=wrap_functional(F.pad, pad=(3, 3, 2, 1, 2, 2), mode='circular'), |
| input_fn=lambda: torch.arange(12, out=torch.DoubleTensor()).reshape([1, 1, 2, 2, 3]), |
| reference_fn=lambda i, *_: padding3d_circular(i, (3, 3, 2, 1, 2, 2)), |
| skip_double=TEST_WITH_ROCM, |
| pickle=False, |
| ), |
| |
| dict( |
| module_name='Conv1d', |
| constructor_args=(3, 4, 2, 2, (1,), 1, 1, True, 'circular'), |
| input_size=(2, 3, 5,), |
| cudnn=True, |
| desc='stride1_pad1circular', |
| ), |
| dict( |
| module_name='Conv1d', |
| constructor_args=(3, 4, 2, 2, (2,), 1, 1, True, 'circular'), |
| input_size=(2, 3, 5,), |
| cudnn=True, |
| desc='stride1_pad2circular', |
| ), |
| dict( |
| module_name='Conv2d', |
| constructor_args=(3, 4, (3, 3), (2, 2), (1, 2), 1, 1, True, 'circular'), |
| input_size=(2, 3, 3, 3), |
| cudnn=True, |
| desc='pad2circular', |
| check_with_long_tensor=True, |
| ), |
| dict( |
| module_name='Conv3d', |
| constructor_args=(3, 4, 2, 2, (1, 2, 3), 1, 1, True, 'circular'), |
| input_size=(2, 3, 3, 3, 3), |
| cudnn=True, |
| desc='stride_pad1circular', |
| check_with_long_tensor=True, |
| ), |
| ] |
| |
| |
| def kldivloss_reference(input, target, reduction='mean'): |
| safe_target = target * (target > 0).type_as(target) |
| safe_target_log = (safe_target + (target <= 0).type_as(target)).log() |
| result = safe_target * (safe_target_log - input) |
| if reduction == 'mean': |
| return result.mean() |
| elif reduction == 'sum': |
| return result.sum() |
| elif reduction == 'batchmean' and results.dim() != 0: |
| return result.sum() / result.size(0) |
| return result |
| |
| |
| def nlllossNd_reference(input, target, weight=None, ignore_index=-100, |
| reduction='mean'): |
| assert input.dim() >= 3 |
| N = input.size(0) |
| C = input.size(1) |
| out_size = (N,) + input.size()[2:] |
| output = torch.zeros(out_size).type_as(input) |
| |
| if weight is None: |
| weight = torch.ones(C).type_as(input) |
| total_weight = 0 |
| for tup in product(*[range(size) for size in out_size]): |
| t_nx = target[tup] |
| norm = 0. if ignore_index == t_nx else weight[t_nx].item() |
| input_index = list(tup) |
| input_index.insert(1, t_nx) |
| output[tup] = -input[tuple(input_index)] * norm |
| total_weight += norm |
| |
| if reduction == 'mean': |
| return output.sum() / total_weight |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| def nllloss_reference(input, target, weight=None, ignore_index=-100, |
| reduction='mean'): |
| |
| def nll_loss_helper(input, target, weight, ignore_index): |
| if target == ignore_index: |
| return (0, 0) |
| norm = 1 if weight is None else weight[target] |
| result = -input[target] * norm |
| return (result, norm) |
| |
| losses_and_weights = [nll_loss_helper(i, t, weight, ignore_index) |
| for i, t in zip(input, target)] |
| losses, weights = zip(*losses_and_weights) |
| losses_tensor = input.new_tensor(losses) |
| if reduction == 'mean': |
| return sum(losses_tensor) / sum(weights) |
| elif reduction == 'sum': |
| return sum(losses_tensor) |
| else: |
| return losses_tensor |
| |
| |
| def smoothl1loss_reference(input, target, reduction='mean'): |
| abs_diff = (input - target).abs() |
| ge_one_mask = (abs_diff >= 1).type_as(abs_diff) |
| lt_one_mask = (abs_diff < 1).type_as(abs_diff) |
| output = ge_one_mask * (abs_diff - 0.5) + lt_one_mask * 0.5 * (abs_diff ** 2) |
| if reduction == 'mean': |
| return output.mean() |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| def _multilabelmarginloss_reference(input, target): |
| targets = [] |
| for target_index in target: |
| if target_index < 0: |
| break |
| targets.append(target_index) |
| |
| sum = 0 |
| for target_index in targets: |
| for i in range(0, len(input)): |
| if i not in targets: |
| sum += max(0, 1 - input[target_index] + input[i]) |
| |
| return sum |
| |
| |
| def multilabelmarginloss_reference(input, target, reduction='mean'): |
| if input.dim() == 1: |
| n = 1 |
| dim = input.size(0) |
| output = input.new(n).zero_() |
| output[0] = _multilabelmarginloss_reference(input, target) |
| else: |
| n = input.size(0) |
| dim = input.size(1) |
| output = input.new(n).zero_() |
| for i in range(0, n): |
| output[i] = _multilabelmarginloss_reference(input[i], target[i]) |
| |
| if reduction == 'mean': |
| return output.mean() / dim |
| elif reduction == 'sum': |
| return output.sum() / dim |
| return output / dim |
| |
| |
| def hingeembeddingloss_reference(input, target, margin=1.0, reduction='mean'): |
| margin_clamp = (margin - input).clamp(min=0).type_as(input) |
| output = torch.where(target == 1, input, margin_clamp) |
| |
| if reduction == 'mean': |
| return output.mean() |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| def softmarginloss_reference(input, target, reduction='mean'): |
| output = (1 + (-input * target).exp()).log() |
| |
| if reduction == 'mean': |
| return output.mean() |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| def _multimarginloss_reference(input, target_idx, p, margin, weight): |
| if weight is None: |
| weight = input.new(len(input)).fill_(1) |
| |
| output = 0 |
| for i in range(0, len(input)): |
| if i != target_idx: |
| output += max(0, weight[target_idx] * (margin - input[target_idx] + input[i]) ** p) |
| return output |
| |
| |
| def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reduction='mean'): |
| if input.dim() == 1: |
| n = 1 |
| dim = input.size(0) |
| return _multimarginloss_reference(input, target[0], p, margin, weight) / dim |
| else: |
| n = input.size(0) |
| dim = input.size(1) |
| output = input.new(n) |
| for x in range(0, n): |
| output[x] = _multimarginloss_reference(input[x], target[x], p, margin, weight) |
| |
| if reduction == 'mean': |
| return output.mean() / dim |
| elif reduction == 'sum': |
| return output.sum() / dim |
| return output / dim |
| |
| |
| def cosineembeddingloss_reference(input1, input2, target, margin=0, reduction='mean'): |
| def _cos(a, b): |
| cos = a.new(a.size(0)) |
| for i in range(0, a.size(0)): |
| cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5) |
| return cos |
| |
| output = torch.where(target == 1, 1 - _cos(input1, input2), (_cos(input1, input2) - margin).clamp(min=0)) |
| |
| if reduction == 'mean': |
| return output.mean() |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| def tripletmarginloss_reference(anchor, positive, negative, margin=1.0, p=2, eps=1e-6, swap=False, |
| reduction='mean'): |
| d_p = torch.pairwise_distance(anchor, positive, p, eps) |
| d_n = torch.pairwise_distance(anchor, negative, p, eps) |
| if swap: |
| d_s = torch.pairwise_distance(positive, negative, p, eps) |
| d_n = torch.min(d_n, d_s) |
| |
| output = torch.clamp(margin + d_p - d_n, min=0.0) |
| if reduction == 'mean': |
| return output.mean() |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| def marginrankingloss_reference(input1, input2, target, margin=0, reduction='mean'): |
| output = (-target * (input1 - input2) + margin).clamp(min=0) |
| if reduction == 'mean': |
| return output.mean() |
| elif reduction == 'sum': |
| return output.sum() |
| return output |
| |
| |
| # this directly follows Graves et al's paper, in contrast to the production implementation, it does not use log-space |
| def ctcloss_reference(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean'): |
| input_lengths = torch.as_tensor(input_lengths, dtype=torch.long) |
| target_lengths = torch.as_tensor(target_lengths, dtype=torch.long) |
| dt = log_probs.dtype |
| log_probs = log_probs.double() # we need the accuracy as we are not in logspace |
| targets = targets.long() |
| cum_target_lengths = target_lengths.cumsum(0) |
| losses = [] |
| for i in range(log_probs.size(1)): |
| input_length = input_lengths[i].item() |
| target_length = target_lengths[i].item() |
| cum_target_length = cum_target_lengths[i].item() |
| targets_prime = targets.new_full((2 * target_length + 1,), blank) |
| if targets.dim() == 2: |
| targets_prime[1::2] = targets[i, :target_length] |
| else: |
| targets_prime[1::2] = targets[cum_target_length - target_length:cum_target_length] |
| probs = log_probs[:input_length, i].exp() |
| alpha = log_probs.new_zeros((target_length * 2 + 1,)) |
| alpha[0] = probs[0, blank] |
| alpha[1] = probs[0, targets_prime[1]] |
| mask_third = (targets_prime[:-2] != targets_prime[2:]) |
| for t in range(1, input_length): |
| alpha_next = alpha.clone() |
| alpha_next[1:] += alpha[:-1] |
| alpha_next[2:] += torch.where(mask_third, alpha[:-2], alpha.new_zeros(1)) |
| alpha = probs[t, targets_prime] * alpha_next |
| losses.append(-alpha[-2:].sum().log()[None]) |
| output = torch.cat(losses, 0) |
| if reduction == 'mean': |
| return (output / target_lengths.to(dtype=output.dtype, device=output.device)).mean() |
| elif reduction == 'sum': |
| return output.sum() |
| output = output.to(dt) |
| return output |
| |
| |
| def padding1d_circular(input, pad): |
| r""" input: |
| [[[0., 1., 2.], |
| [3., 4., 5.]]] |
| pad: (1, 2) |
| output: |
| [[[2., 0., 1., 2., 0., 1.], |
| [5., 3., 4., 5., 3., 4.]]] |
| """ |
| return torch.cat([input[:, :, -pad[0]:], input, |
| input[:, :, 0:pad[1]]], dim=2) |
| |
| |
| def padding2d_circular(input, pad): |
| r"""input: |
| [[[[0., 1., 2], |
| [3., 4., 5.]]]] |
| pad: (1, 2, 2, 1) |
| output: |
| [[[[2., 0., 1., 2., 0., 1.], |
| [5., 3., 4., 5., 3., 4.], |
| [2., 0., 1., 2., 0., 1.], |
| [5., 3., 4., 5., 3., 4.], |
| [2., 0., 1., 2., 0., 1.]]]] |
| """ |
| input = torch.cat([input[:, :, -pad[2]:], input, input[:, :, 0:pad[3]]], dim=2) |
| return torch.cat([input[:, :, :, -pad[0]:], input, input[:, :, :, 0:pad[1]]], dim=3) |
| |
| |
| def padding3d_circular(input, pad): |
| r"""input: |
| [[[[[ 0., 1., 2.], |
| [ 3., 4., 5.]], |
| [[ 6., 7., 8.], |
| [ 9., 10., 11.]]]]] |
| pad: (1, 2, 2, 1, 1, 2) |
| output: [[[[[ 8., 6., 7., 8., 6., 7.], |
| [11., 9., 10., 11., 9., 10.], |
| [ 8., 6., 7., 8., 6., 7.], |
| [11., 9., 10., 11., 9., 10.], |
| [ 8., 6., 7., 8., 6., 7.]], |
| |
| [[ 2., 0., 1., 2., 0., 1.], |
| [ 5., 3., 4., 5., 3., 4.], |
| [ 2., 0., 1., 2., 0., 1.], |
| [ 5., 3., 4., 5., 3., 4.], |
| [ 2., 0., 1., 2., 0., 1.]], |
| |
| [[ 8., 6., 7., 8., 6., 7.], |
| [11., 9., 10., 11., 9., 10.], |
| [ 8., 6., 7., 8., 6., 7.], |
| [11., 9., 10., 11., 9., 10.], |
| [ 8., 6., 7., 8., 6., 7.]], |
| |
| [[ 2., 0., 1., 2., 0., 1.], |
| [ 5., 3., 4., 5., 3., 4.], |
| [ 2., 0., 1., 2., 0., 1.], |
| [ 5., 3., 4., 5., 3., 4.], |
| [ 2., 0., 1., 2., 0., 1.]], |
| |
| [[ 8., 6., 7., 8., 6., 7.], |
| [11., 9., 10., 11., 9., 10.], |
| [ 8., 6., 7., 8., 6., 7.], |
| [11., 9., 10., 11., 9., 10.], |
| [ 8., 6., 7., 8., 6., 7.]]]]] |
| """ |
| input = torch.cat([input[:, :, -pad[4]:], input, input[:, :, 0:pad[5]]], dim=2) |
| input = torch.cat([input[:, :, :, -pad[2]:], input, input[:, :, :, 0:pad[3]]], dim=3) |
| return torch.cat([input[:, :, :, :, -pad[0]:], input, input[:, :, :, :, 0:pad[1]]], dim=4) |
| |
| |
| loss_reference_fns = { |
| 'KLDivLoss': kldivloss_reference, |
| 'NLLLoss': nllloss_reference, |
| 'NLLLossNd': nlllossNd_reference, |
| 'SmoothL1Loss': smoothl1loss_reference, |
| 'MultiLabelMarginLoss': multilabelmarginloss_reference, |
| 'HingeEmbeddingLoss': hingeembeddingloss_reference, |
| 'SoftMarginLoss': softmarginloss_reference, |
| 'MultiMarginLoss': multimarginloss_reference, |
| 'CosineEmbeddingLoss': cosineembeddingloss_reference, |
| 'TripletMarginLoss': tripletmarginloss_reference, |
| 'MarginRankingLoss': marginrankingloss_reference, |
| 'CTCLoss': ctcloss_reference, |
| } |
| |
| |
| criterion_tests = [ |
| dict( |
| module_name='L1Loss', |
| input_size=(2, 3, 4), |
| target_size=(2, 3, 4), |
| reference_fn=lambda i, t, _: 1. / i.numel() * |
| sum((a - b).abs().sum() for a, b in zip(i, t)), |
| ), |
| dict( |
| module_name='NLLLoss', |
| input_fn=lambda: torch.rand(15, 10).log(), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(), |
| reference_fn=lambda i, t, m: |
| nllloss_reference(i, t, reduction=get_reduction(m)), |
| check_sum_reduction=True |
| ), |
| dict( |
| module_name='NLLLoss', |
| constructor_args=(None, None, 2), |
| input_fn=lambda: torch.rand(15, 10).log(), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(), |
| reference_fn=lambda i, t, _: nllloss_reference(i, t, ignore_index=2), |
| desc='ignore_index' |
| ), |
| dict( |
| module_name='NLLLoss', |
| constructor_args_fn=lambda: (torch.rand(10),), |
| input_fn=lambda: torch.rand(15, 10).add(1e-2).log(), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(), |
| reference_fn=lambda i, t, m: |
| nllloss_reference(i, t, weight=get_weight(m)), |
| desc='weights', |
| ), |
| dict( |
| module_name='NLLLoss', |
| constructor_args_fn=lambda: (torch.rand(10), None, 2), |
| input_fn=lambda: torch.rand(15, 10).add(1e-2).log(), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(), |
| reference_fn=lambda i, t, m: |
| nllloss_reference(i, t, weight=get_weight(m), ignore_index=2), |
| desc='weights_ignore_index' |
| ), |
| dict( |
| module_name='NLLLoss', |
| constructor_args_fn=lambda: (torch.rand(10), None, -1), |
| input_fn=lambda: torch.rand(15, 10).add(1e-2).log(), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10 + 1).floor().long() - 1, |
| reference_fn=lambda i, t, m: |
| nllloss_reference(i, t, weight=get_weight(m), ignore_index=-1), |
| desc='weights_ignore_index_neg' |
| ), |
| dict( |
| module_name='KLDivLoss', |
| input_fn=lambda: torch.rand(10, 10).log(), |
| target_fn=lambda: torch.rand(10, 10), |
| reference_fn=lambda i, t, m: |
| kldivloss_reference(i, t, get_reduction(m)), |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='MSELoss', |
| input_size=(2, 3, 4, 5), |
| target_size=(2, 3, 4, 5), |
| reference_fn=lambda i, t, m: ((i - t).abs().pow(2).sum() / (i.numel() |
| if get_reduction(m) == 'mean' else 1)), |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='BCELoss', |
| input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2), |
| target_fn=lambda: torch.randn(15, 10).gt(0).double(), |
| reference_fn=lambda i, t, m: -(t * i.log() + (1 - t) * (1 - i).log()).sum() / |
| (i.numel() if get_reduction(m) else 1), |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='BCELoss', |
| constructor_args_fn=lambda: (torch.rand(10),), |
| input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2), |
| target_fn=lambda: torch.randn(15, 10).gt(0).double(), |
| reference_fn=lambda i, t, m: -((t * i.log() + (1 - t) * (1 - i).log()) * get_weight(m)).sum() / |
| (i.numel() if get_reduction(m) else 1), |
| desc='weights', |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='CrossEntropyLoss', |
| input_size=(15, 10), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(), |
| ), |
| dict( |
| module_name='CrossEntropyLoss', |
| constructor_args_fn=lambda: (torch.rand(10),), |
| input_size=(15, 10), |
| target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(), |
| desc='weights', |
| ), |
| dict( |
| module_name='HingeEmbeddingLoss', |
| input_size=(10,), |
| target_fn=lambda: torch.randn(10).gt(0).double().mul_(2).sub(1), |
| reference_fn=lambda i, t, m: |
| hingeembeddingloss_reference(i, t, reduction=get_reduction(m)), |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='HingeEmbeddingLoss', |
| constructor_args=(0.5,), |
| input_size=(10,), |
| target_fn=lambda: torch.randn(10).gt(0).double().mul_(2).sub(1), |
| reference_fn=lambda i, t, m: |
| hingeembeddingloss_reference(i, t, margin=0.5, reduction=get_reduction(m)), |
| desc='margin', |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='MultiLabelMarginLoss', |
| input_size=(10,), |
| target_fn=lambda: torch.rand(10).mul(10).floor().long(), |
| reference_fn=lambda i, t, m: |
| multilabelmarginloss_reference(i, t, reduction=get_reduction(m)), |
| desc="1d", |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiLabelMarginLoss', |
| input_size=(5, 10), |
| target_fn=lambda: torch.rand(5, 10).mul(10).floor().long(), |
| reference_fn=lambda i, t, m: |
| multilabelmarginloss_reference(i, t, reduction=get_reduction(m)), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiLabelSoftMarginLoss', |
| input_size=(5, 10), |
| target_fn=lambda: torch.rand(5, 10).mul(2).floor(), |
| reference_fn=lambda i, t, m: -(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()).sum() / i.numel(), |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiMarginLoss', |
| input_size=(5, 10), |
| target_fn=lambda: torch.rand(5).mul(8).floor().long(), |
| reference_fn=lambda i, t, m: |
| multimarginloss_reference(i, t, reduction=get_reduction(m)), |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiMarginLoss', |
| input_size=(10,), |
| target_fn=lambda: torch.rand(1).mul(8).floor().long(), |
| reference_fn=lambda i, t, m: |
| multimarginloss_reference(i, t, reduction=get_reduction(m)), |
| desc='1d', |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiMarginLoss', |
| constructor_args=(2,), |
| input_fn=lambda: torch.rand(5, 10).clamp_(1e-2, 1 - 1e-2), |
| target_fn=lambda: torch.rand(5).mul(8).floor().long(), |
| reference_fn=lambda i, t, m: |
| multimarginloss_reference(i, t, p=2, reduction=get_reduction(m)), |
| desc='p', |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiMarginLoss', |
| constructor_args=(1, 0.5), |
| legacy_constructor_args=(1, None, 0.5), |
| input_size=(5, 10), |
| target_fn=lambda: torch.rand(5).mul(8).floor().long(), |
| reference_fn=lambda i, t, m: |
| multimarginloss_reference(i, t, margin=0.5, reduction=get_reduction(m)), |
| desc='margin', |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='MultiMarginLoss', |
| constructor_args=(1, 1., torch.rand(10)), |
| legacy_constructor_args=(1, torch.rand(10)), |
| input_size=(5, 10), |
| target_fn=lambda: torch.rand(5).mul(8).floor().long(), |
| reference_fn=lambda i, t, m: |
| multimarginloss_reference(i, t, weight=get_weight(m), reduction=get_reduction(m)), |
| desc='weights', |
| check_sum_reduction=True, |
| check_gradgrad=False, |
| ), |
| dict( |
| module_name='SmoothL1Loss', |
| input_size=(5, 10), |
| target_size=(5, 10), |
| check_sum_reduction=True, |
| reference_fn=lambda i, t, m: |
| smoothl1loss_reference(i, t, reduction=get_reduction(m)), |
| ), |
| dict( |
| module_name='SoftMarginLoss', |
| input_size=(5, 5), |
| target_fn=lambda: torch.randn(5, 5).sign(), |
| reference_fn=lambda i, t, m: |
| softmarginloss_reference(i, t, reduction=get_reduction(m)), |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='CosineEmbeddingLoss', |
| input_fn=lambda: (torch.rand(15, 10), torch.rand(15, 10)), |
| target_fn=lambda: torch.randn(15).sign(), |
| reference_fn=lambda i, t, m: |
| cosineembeddingloss_reference(i[0], i[1], t, reduction=get_reduction(m)), |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='CosineEmbeddingLoss', |
| constructor_args=(0.7,), |
| input_fn=lambda: (torch.rand(15, 10), torch.rand(15, 10)), |
| target_fn=lambda: torch.randn(15).sign(), |
| reference_fn=lambda i, t, m: |
| cosineembeddingloss_reference(i[0], i[1], t, margin=0.7, reduction=get_reduction(m)), |
| desc='margin', |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='MarginRankingLoss', |
| input_fn=lambda: (torch.randn(50).mul(10), torch.randn(50).mul(10)), |
| target_fn=lambda: torch.randn(50).sign(), |
| reference_fn=lambda i, t, m: |
| marginrankingloss_reference(i[0], i[1], t, reduction=get_reduction(m)), |
| check_sum_reduction=True, |
| ), |
| dict( |
| module_name='MarginRankingLoss', |
| constructor_args=(0.5,), |
| input_fn=lambda: (torch.randn(50).mul(10), torch.randn(50).mul(10)), |
| target_fn=lambda: torch.randn(50).sign(), |
| reference_fn=lambda i, t, m: |
| marginrankingloss_reference(i[0], i[1], t, margin=0.5, reduction=get_reduction(m)), |
| desc='margin', |
| check_sum_reduction=True, |
| ), |
| ] |
| |
| |
| class NNTestCase(TestCase): |
| |
| def _jacobian(self, input, num_out): |
| if isinstance(input, tuple): |
| return tuple(self._jacobian(elem, num_out) for elem in input) |
| elif isinstance(input, list): |
| return [self._jacobian(elem, num_out) for elem in input] |
| else: |
| return torch.zeros(input.nelement(), num_out) |
| |
| def _flatten_tensors(self, x): |
| if isinstance(x, torch.Tensor): |
| if x.is_sparse: |
| return x.to_dense().view(-1) |
| else: |
| return x.view(-1) |
| else: |
| return tuple(self._flatten_tensors(a) for a in x) |
| |
| def _zero_grad_input(self, input): |
| if isinstance(input, torch.Tensor): |
| if input.requires_grad and input.grad is not None: |
| input.grad.zero_() |
| input.grad.detach_() |
| else: |
| for i in input: |
| self._zero_grad_input(i) |
| |
| def _analytical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True): |
| output = self._forward(module, input) |
| output_size = output.nelement() |
| |
| if jacobian_input: |
| jacobian_inp = self._jacobian(input, output_size) |
| flat_jacobian_input = list(iter_tensors(jacobian_inp)) |
| |
| if jacobian_parameters: |
| num_param = sum(p.numel() for p in self._get_parameters(module)[0]) |
| jacobian_param = torch.zeros(num_param, output_size) |
| |
| for i in range(output_size): |
| param, d_param = self._get_parameters(module) |
| # make non grad zeros |
| d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param)] |
| |
| d_out = torch.zeros_like(output) |
| flat_d_out = d_out.view(-1) |
| flat_d_out[i] = 1 |
| |
| if jacobian_parameters: |
| self._zero_grad_parameters(module) |
| # Tensors will accumulate gradient from multiple steps |
| if jacobian_input: |
| self._zero_grad_input(input) |
| d_input = self._backward(module, input, output, d_out) |
| |
| if jacobian_input: |
| for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)): |
| jacobian_x[:, i] = d_x.contiguous().view(-1) |
| if jacobian_parameters: |
| jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0) |
| |
| res = tuple() |
| if jacobian_input: |
| res += jacobian_inp, |
| if jacobian_parameters: |
| res += jacobian_param, |
| |
| return res |
| |
| def _numerical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True): |
| def fw(input): |
| return self._forward(module, input).detach() |
| |
| res = tuple() |
| if jacobian_input: |
| res += get_numerical_jacobian(fw, input, eps=1e-6), |
| if jacobian_parameters: |
| param, _ = self._get_parameters(module) |
| res += torch.cat([get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param], 0), |
| return res |
| |
| def check_jacobian(self, module, input, jacobian_input=True): |
| jacobian_parameters = bool(self._get_parameters(module)[0]) |
| analytical = self._analytical_jacobian(module, input, jacobian_input, jacobian_parameters) |
| numerical = self._numerical_jacobian(module, input, jacobian_input, jacobian_parameters) |
| analytical_t = list(iter_tensors(analytical)) |
| numerical_t = list(iter_tensors(numerical)) |
| |
| # TODO: compare structure |
| self.assertLessEqual( |
| max(a.add(-1, n).abs().max() for a, n in zip(analytical_t, numerical_t)), |
| PRECISION |
| ) |
| |
| def check_criterion_jacobian(self, criterion, input, target): |
| eps = 1e-6 |
| self._forward_criterion(criterion, input, target) |
| analytical_d_x = self._backward_criterion(criterion, input, target) |
| numerical_d_x = deepcopy(analytical_d_x) |
| |
| input_t = iter_tensors(input) |
| numerical_t = iter_tensors(numerical_d_x) |
| for x, d_x in zip(input_t, numerical_t): |
| x = x.view(-1).data |
| d_x = d_x.view(-1).data |
| for i in range(x.nelement()): |
| original = x[i].item() |
| x[i] = original + eps |
| fx1 = self._forward_criterion(criterion, input, target) |
| x[i] = original - eps |
| fx2 = self._forward_criterion(criterion, input, target) |
| deriv = (fx1 - fx2) / (2. * eps) |
| d_x[i] = float(deriv) |
| x[i] = original |
| |
| # TODO: check structure |
| analytical_t = list(iter_tensors(analytical_d_x)) |
| numerical_t = list(iter_tensors(numerical_d_x)) |
| |
| self.assertLessEqual( |
| max(a.add(-1, n).abs().max() for a, n in zip(analytical_t, numerical_t)), |
| PRECISION |
| ) |
| |
| |
| class TestBase(object): |
| |
| _required_arg_names = {'constructor_args', 'input', 'extra_args'} |
| |
| def __init__(self, constructor, desc='', reference_fn=None, fullname=None, **kwargs): |
| self.desc = desc |
| self.fullname = fullname |
| self.constructor = constructor |
| self.reference_fn = reference_fn |
| for name in self._required_arg_names: |
| if name not in kwargs and name + '_fn' not in kwargs and name + '_size' not in kwargs: |
| if name in {'constructor_args', 'extra_args'}: |
| kwargs[name] = tuple() |
| else: |
| raise ValueError("{}: Specify {} by a value, a function to generate it, or it's size!" |
| .format(self.get_name(), name)) |
| self._extra_kwargs = kwargs |
| self._arg_cache = {} |
| |
| def get_name(self): |
| if self.fullname is not None: |
| return 'test_' + self.fullname |
| |
| test_name = 'test_' + self.constructor.__name__ |
| if self.desc: |
| test_name += '_' + self.desc |
| return test_name |
| |
| def _unpack(self, value): |
| if isinstance(value, torch.Tensor): |
| return value |
| elif is_iterable(value): |
| return type(value)(self._unpack(v) for v in value) |
| else: |
| return value |
| |
| @property |
| def constructor_args(self): |
| return self._get_arg('constructor_args', True) |
| |
| @property |
| def extra_args(self): |
| return self._get_arg('extra_args', True) |
| |
| def _get_arg(self, name, unpack): |
| assert name in self._required_arg_names |
| |
| if name not in self._arg_cache: |
| fn_name = name + '_fn' |
| size_name = name + '_size' |
| |
| if name in self._extra_kwargs: |
| self._arg_cache[name] = self._extra_kwargs[name] |
| elif fn_name in self._extra_kwargs: |
| self._arg_cache[name] = self._extra_kwargs[fn_name]() |
| else: |
| assert size_name in self._extra_kwargs |
| |
| def map_tensor_sizes(sizes): |
| if isinstance(sizes, list): |
| return [map_tensor_sizes(s) for s in sizes] |
| elif isinstance(sizes, torch.Tensor): |
| return sizes.double() |
| else: |
| return torch.randn(sizes) |
| |
| self._arg_cache[name] = map_tensor_sizes(self._extra_kwargs[size_name]) |
| |
| return self._unpack(self._arg_cache[name]) if unpack else self._arg_cache[name] |
| |
| def _get_input(self, unpack=True): |
| return self._get_arg('input', unpack) |
| |
| def __call__(self, test_case): |
| raise NotImplementedError |
| |
| |
| class ModuleTest(TestBase): |
| |
| def __init__(self, *args, **kwargs): |
| super(ModuleTest, self).__init__(*args, **kwargs) |
| self.jacobian_input = kwargs.get('jacobian_input', True) |
| self.should_test_cuda = kwargs.get('test_cuda', True) |
| self.should_test_pickle = kwargs.get('pickle', True) |
| self.check_gradgrad = kwargs.get('check_gradgrad', True) |
| self.FIXME_no_cuda_gradgrad_comparison = \ |
| kwargs.get('FIXME_no_cuda_gradgrad_comparison', False) |
| self.precision = kwargs.get('precision', 2e-4) |
| self.check_forward_only = kwargs.get('check_forward_only', False) |
| |
| def __call__(self, test_case): |
| module = self.constructor(*self.constructor_args) |
| input = self._get_input() |
| |
| if self.reference_fn is not None: |
| out = test_case._forward(module, input) |
| ref_input = deepcopy(input) |
| ref_module = deepcopy(module) |
| expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0], ref_module) |
| test_case.assertEqual(out, expected_out) |
| if self.check_forward_only: |
| return |
| self.test_noncontig(test_case, module, input) |
| |
| if self.should_test_pickle: |
| # TODO: do this with in-memory files as soon as torch.save will support it |
| with TemporaryFile() as f: |
| test_case._forward(module, input) |
| torch.save(module, f) |
| f.seek(0) |
| module_copy = torch.load(f) |
| test_case.assertEqual(test_case._forward(module, input), test_case._forward(module_copy, input)) |
| |
| self._do_test(test_case, module, input) |
| |
| def noncontiguize(self, obj): |
| if isinstance(obj, list): |
| return [self.noncontiguize(o) for o in obj] |
| tensor = obj |
| ndim = tensor.dim() |
| # Always making only the last dimension noncontiguous is easy to hide |
| # bugs because .view(-1) will still work. So try to find a dim with size |
| # > 1 and make that non-contiguous, i.e., stack + select on the |
| # dimension directly after that. |
| dim = ndim |
| for d in range(ndim): |
| if tensor.size(d) > 1: |
| dim = d + 1 |
| break |
| noncontig = torch.stack([torch.empty_like(tensor), tensor], dim).select(dim, 1).detach() |
| assert noncontig.numel() == 1 or not noncontig.is_contiguous() |
| noncontig.requires_grad = tensor.requires_grad |
| return noncontig |
| |
| def test_noncontig(self, test_case, module, input): |
| # check no scalars, can't make non-contig |
| if isinstance(input, torch.Tensor) and input.dim() == 0: |
| return |
| if any(i.dim() == 0 for i in input if isinstance(i, torch.Tensor)): |
| return |
| |
| test_case._zero_grad_parameters(module) |
| test_case._zero_grad_input(input) |
| with freeze_rng_state(): |
| output = test_case._forward(module, input) |
| grad_output = output.new(output.shape).normal_() |
| output = output.clone() |
| d_input = deepcopy(test_case._backward(module, input, output, grad_output)) |
| d_param = deepcopy(test_case._get_parameters(module)[1]) |
| |
| nc_input = self.noncontiguize(input) |
| nc_grad_output = self.noncontiguize(grad_output) |
| for contig_i, contig_g in product((True, False), repeat=2): |
| i = input if contig_i else nc_input |
| go = grad_output if contig_g else nc_grad_output |
| test_case._zero_grad_parameters(module) |
| test_case._zero_grad_input(i) |
| with freeze_rng_state(): |
| out = test_case._forward(module, i) |
| grad = test_case._backward(module, i, out, go) |
| |
| test_case.assertEqual(out, output) |
| test_case.assertEqual(grad, d_input, 1e-4) |
| test_case.assertEqual(test_case._get_parameters(module)[1], d_param) |
| |
| def test_cuda(self, test_case): |
| if not TEST_CUDA or not self.should_test_cuda: |
| raise unittest.SkipTest('Excluded from CUDA tests') |
| try: |
| cpu_input = self._get_input() |
| type_map = {'torch.DoubleTensor': torch.cuda.FloatTensor} |
| gpu_input = to_gpu(cpu_input, type_map=type_map) |
| |
| cpu_module = self.constructor(*self.constructor_args) |
| gpu_module = self.constructor(*self.constructor_args).float().cuda() |
| cpu_param = test_case._get_parameters(cpu_module) |
| gpu_param = test_case._get_parameters(gpu_module) |
| for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]): |
| gpu_p.data.copy_(cpu_p) |
| |
| test_case._zero_grad_input(cpu_input) |
| test_case._zero_grad_input(gpu_input) |
| test_case._zero_grad_parameters(cpu_module) |
| test_case._zero_grad_parameters(gpu_module) |
| cpu_output = test_case._forward(cpu_module, cpu_input) |
| gpu_output = test_case._forward(gpu_module, gpu_input) |
| test_case.assertEqual(cpu_output, gpu_output, self.precision) |
| |
| # Run backwards on CPU and GPU and compare results |
| for _ in range(5): |
| cpu_gradOutput = cpu_output.clone().normal_() |
| gpu_gradOutput = cpu_gradOutput.type('torch.cuda.FloatTensor') |
| cpu_gradInput = test_case._backward(cpu_module, cpu_input, cpu_output, cpu_gradOutput) |
| gpu_gradInput = test_case._backward(gpu_module, gpu_input, gpu_output, gpu_gradOutput) |
| test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision) |
| for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]): |
| test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision) |
| |
| # Run double-backwards on CPU and GPU and compare results |
| if self.check_gradgrad and not self.FIXME_no_cuda_gradgrad_comparison: |
| cpu_output = cpu_module(cpu_input) |
| gpu_output = gpu_module(gpu_input) |
| |
| cpu_gradOutput = torch.randn_like(cpu_output, requires_grad=True) |
| gpu_gradOutput = cpu_gradOutput.type_as(gpu_output).detach() |
| gpu_gradOutput.requires_grad = True |
| |
| cpu_gradInputs = torch.autograd.grad( |
| cpu_output, |
| (cpu_input,) + tuple(cpu_module.parameters()), |
| cpu_gradOutput, |
| create_graph=True) |
| gpu_gradInputs = torch.autograd.grad( |
| gpu_output, |
| (gpu_input,) + tuple(gpu_module.parameters()), |
| gpu_gradOutput, |
| create_graph=True) |
| |
| for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs): |
| test_case.assertEqual(cpu_d_i, gpu_d_i, self.precision) |
| |
| # We mix output into the second backwards computation so that |
| # torch.autograd.grad doesn't complain that some inputs |
| # are unreachable (which can happen if you differentiate |
| # only on the gradient. |
| cpu_gg = torch.autograd.grad( |
| cpu_output.sum() + sum(map(lambda x: x.sum(), cpu_gradInputs)), |
| (cpu_input, cpu_gradOutput) + tuple(cpu_module.parameters()), |
| retain_graph=True) |
| gpu_gg = torch.autograd.grad( |
| gpu_output.sum() + sum(map(lambda x: x.sum(), gpu_gradInputs)), |
| (gpu_input, gpu_gradOutput) + tuple(gpu_module.parameters()), |
| retain_graph=True) |
| |
| test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision) |
| for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg): |
| test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision) |
| |
| self.test_noncontig(test_case, gpu_module, gpu_input) |
| except NotImplementedError: |
| pass |
| # TODO: remove this after CUDA scatter_ is implemented |
| except AttributeError as e: |
| if len(e.args) == 1 and "'FloatTensor' object has no attribute 'scatter_'" in e.args[0]: |
| pass |
| else: |
| raise |
| |
| |
| class CriterionTest(TestBase): |
| |
| _required_arg_names = TestBase._required_arg_names.union({'target'}) |
| |
| def __init__(self, *args, **kwargs): |
| super(CriterionTest, self).__init__(*args, **kwargs) |
| self.should_test_cuda = kwargs.get('test_cuda', True) |
| self.check_forward_only = kwargs.get('check_forward_only', True) |
| |
| def _get_target(self): |
| return self._get_arg('target', True) |
| |
| def __call__(self, test_case): |
| module = self.constructor(*self.constructor_args) |
| input = self._get_input() |
| |
| # Check that these methods don't raise errors |
| module.__repr__() |
| str(module) |
| |
| target = self._get_target() |
| |
| if self.reference_fn is not None: |
| out = test_case._forward_criterion(module, input, target, extra_args=self.extra_args) |
| ref_args = (deepcopy(input), deepcopy(target)) + self.extra_args + (module,) |
| expected_out = self.reference_fn(*ref_args) |
| test_case.assertEqual(out, expected_out) |
| |
| if self.check_forward_only: |
| return |
| |
| test_case.check_criterion_jacobian(module, input, target) |
| self._do_extra_tests(test_case, module, input, target) |
| |
| def test_cuda(self, test_case): |
| if not TEST_CUDA or not self.should_test_cuda: |
| raise unittest.SkipTest('Excluded from CUDA tests') |
| try: |
| cpu_input = self._get_input() |
| type_map = { |
| 'torch.DoubleTensor': torch.cuda.FloatTensor, |
| } |
| gpu_input = to_gpu(cpu_input, type_map=type_map) |
| |
| cpu_target = self._get_target() |
| gpu_target = to_gpu(cpu_target, type_map=type_map) |
| |
| cpu_module = self.constructor(*self.constructor_args) |
| gpu_module = self.constructor(*self.constructor_args).float().cuda() |
| |
| cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target) |
| gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target) |
| test_case.assertEqual(cpu_output, gpu_output, 4e-4) |
| |
| gradOutput = torch.randn(()) |
| cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target, gradOutput) |
| gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target, gradOutput) |
| test_case.assertEqual(cpu_gradInput, gpu_gradInput, 4e-4) |
| except NotImplementedError: |
| pass |
| |
| def _do_extra_tests(self, test_case, module, input, target): |
| pass |