Remove test linalg test skips from MAGMA integration (#58232)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/55552; majority of cases in https://github.com/pytorch/pytorch/issues/51303
Tests in torch/testing/_internal/common_methods_invocations.py (tested through test_ops) cannot be fully removed, since the machines seem to be running out of gpu memory during the test, and needs further analysis
Pull Request resolved: https://github.com/pytorch/pytorch/pull/58232
Reviewed By: ngimel
Differential Revision: D29394021
Pulled By: malfet
fbshipit-source-id: f108a70af33beec908ac1c0b58467f8744e6fe87
diff --git a/test/test_linalg.py b/test/test_linalg.py
index e85abfe..a759f7c 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -15,7 +15,7 @@
from torch.testing._internal.common_utils import \
(TestCase, run_tests, TEST_SCIPY, IS_MACOS, IS_WINDOWS, slowTest,
TEST_WITH_ASAN, make_tensor, TEST_WITH_ROCM, IS_FBCODE, IS_REMOTE_GPU,
- iter_indices, gradcheck, gradgradcheck, skipIfRocm)
+ iter_indices, gradcheck, gradgradcheck)
from torch.testing._internal.common_device_type import \
(instantiate_device_type_tests, dtypes,
onlyCPU, skipCUDAIf, skipCUDAIfNoMagma, skipCPUIfNoLapack, precisionOverride,
@@ -868,9 +868,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.double, torch.cdouble)
- # NOTE: This test, and many others in this file that use magma, are currently skipped for ROCm.
- # See: https://github.com/pytorch/pytorch/issues/51303
- @skipCUDAIfRocm
def test_det(self, device, dtype):
tensors = (
torch.randn((2, 2), device=device, dtype=dtype),
@@ -1267,7 +1264,6 @@
# This test confirms that torch.linalg.norm's dtype argument works
# as expected, according to the function's documentation
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
def test_norm_dtype(self, device):
def run_test_case(input_size, ord, keepdim, from_dtype, to_dtype):
# Determine the best dtype to use for comparisons between tensors
@@ -1592,7 +1588,6 @@
@skipCUDAIfNoMagma
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float32: 1e-3})
- @skipCUDAIfRocm
def test_cond(self, device, dtype):
def run_test_case(input, p):
result = torch.linalg.cond(input, p)
@@ -1643,7 +1638,6 @@
actual = torch.linalg.cond(input, p)
self.assertEqual(actual, expected)
- @skipIfRocm # https://github.com/pytorch/pytorch/issues/55552
@skipMeta # https://github.com/pytorch/pytorch/issues/53739
@skipCPUIfNoLapack
@skipCUDAIfNoMagma
@@ -1929,7 +1923,6 @@
# Test degenerate shape results match numpy for linalg.norm matrix norms
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@skipCPUIfNoLapack
@dtypes(torch.float, torch.double, torch.cfloat, torch.cdouble)
def test_norm_matrix_degenerate_shapes(self, device, dtype):
@@ -2005,7 +1998,6 @@
expected = torch.pow(x.pow(3).abs().sum(1), 1.0 / 3.0)
self.assertEqual(result, expected)
- @skipIfRocm # https://github.com/pytorch/pytorch/issues/55552
@skipCPUIfNoLapack
@skipCUDAIfNoMagma
@dtypes(*floating_and_complex_types())
@@ -2144,7 +2136,6 @@
@skipCPUIfNoLapack
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
# NumPy computes only in float64 and complex128 precisions
# for float32 or complex64 results might be very different from float64 or complex128
@dtypes(torch.float64, torch.complex128)
@@ -2194,7 +2185,6 @@
@onlyCUDA
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@dtypes(*floating_and_complex_types())
def test_eig_compare_backends(self, device, dtype):
def run_test(shape, *, symmetric=False):
@@ -2227,7 +2217,6 @@
@slowTest
@onlyCUDA
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@dtypes(torch.float32)
def test_eig_check_magma(self, device, dtype):
# For CUDA inputs only matrices of size larger than 2048x2048 actually call MAGMA library
@@ -2238,7 +2227,6 @@
self.assertEqual(a.to(v.dtype) @ v, w * v, atol=1e-3, rtol=1e-3)
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@skipCPUIfNoLapack
@dtypes(*floating_and_complex_types())
def test_eig_errors_and_warnings(self, device, dtype):
@@ -2302,7 +2290,6 @@
@skipCPUIfNoLapack
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
# NumPy computes only in float64 and complex128 precisions
# for float32 or complex64 results might be very different from float64 or complex128
@dtypes(torch.float64, torch.complex128)
@@ -2349,7 +2336,6 @@
@onlyCUDA
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@dtypes(*floating_and_complex_types())
def test_eigvals_compare_backends(self, device, dtype):
def run_test(shape, *, symmetric=False):
@@ -2396,7 +2382,6 @@
run_test(shape, symmetric=True)
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@skipCPUIfNoLapack
@dtypes(*floating_and_complex_types())
def test_eigvals_errors_and_warnings(self, device, dtype):
@@ -2750,7 +2735,6 @@
@skipCUDAIfNoMagmaAndNoCusolver
@skipCPUIfNoLapack
@dtypes(torch.double)
- @skipCUDAIfRocm
def test_svd_lowrank(self, device, dtype):
from torch.testing._internal.common_utils import random_lowrank_matrix, random_sparse_matrix
@@ -3149,7 +3133,6 @@
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float32: 2e-3, torch.complex64: 2e-3,
torch.float64: 1e-8, torch.complex128: 1e-8})
- @skipCUDAIfRocm
def test_inverse(self, device, dtype):
from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
@@ -3273,7 +3256,6 @@
test_inverse_many_batches_helper(torch_inverse, 3, 512)
test_inverse_many_batches_helper(torch_inverse, 64, 64)
- @skipIfRocm # https://github.com/pytorch/pytorch/issues/55552
@skipCUDAIfNoMagmaAndNoCusolver
@skipCPUIfNoLapack
@onlyOnCPUAndCUDA # TODO: XLA doesn't raise exception
@@ -3408,7 +3390,6 @@
with self.assertRaisesRegex(RuntimeError, "rcond tensor of complex type is not supported"):
torch.linalg.pinv(a, rcond=rcond)
- @skipIfRocm # https://github.com/pytorch/pytorch/issues/55552
@skipCUDAIfNoMagmaAndNoCusolver
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@@ -3480,7 +3461,6 @@
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float32: 1e-3, torch.complex64: 1e-3})
- @skipCUDAIfRocm
def test_solve(self, device, dtype):
def run_test(n, batch, rhs):
A_dims = (n, *batch)
@@ -3528,7 +3508,6 @@
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float32: 1e-3, torch.complex64: 1e-3})
- @skipCUDAIfRocm
def test_solve_batched_non_contiguous(self, device, dtype):
from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
A = random_fullrank_matrix_distinct_singular_value(2, 2, dtype=dtype).to(device).permute(1, 0, 2)
@@ -3539,7 +3518,6 @@
expected = np.linalg.solve(A.cpu().numpy(), b.cpu().numpy())
self.assertEqual(actual, expected)
- @skipIfRocm # https://github.com/pytorch/pytorch/issues/55552
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@@ -3618,7 +3596,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
- @skipCUDAIfRocm
def test_old_solve_batched(self, device, dtype):
def solve_batch_helper(A_dims, b_dims):
b, A = self.solve_test_helper(A_dims, b_dims, device, dtype)
@@ -3637,7 +3614,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
- @skipCUDAIfRocm
def test_old_solve_batched_non_contiguous(self, device, dtype):
from numpy.linalg import solve
from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
@@ -3661,7 +3637,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
- @skipCUDAIfRocm
def test_old_solve_batched_broadcasting(self, device, dtype):
from numpy.linalg import solve
@@ -3824,7 +3799,6 @@
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float: 1e-3, torch.cfloat: 1e-3})
- @skipCUDAIfRocm
def test_tensorinv(self, device, dtype):
def run_test(a_shape, ind):
@@ -3855,7 +3829,6 @@
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float: 1e-3, torch.cfloat: 1e-3})
- @skipCUDAIfRocm
def test_tensorinv_non_contiguous(self, device, dtype):
def run_test(a_shape, ind):
@@ -5201,7 +5174,6 @@
with self.assertRaisesRegex(RuntimeError, error_regex):
torch.ormqr(a, tau, c)
- @skipCUDAIfRocm
def test_blas_empty(self, device):
def fn(torchfn, *args, test_out=False, **kwargs):
def call_torch_fn(*args, **kwargs):
@@ -5284,7 +5256,6 @@
A_LU, pivots = fn(torch.lu, (2, 0, 0))
self.assertEqual([(2, 0, 0), (2, 0)], [A_LU.shape, pivots.shape])
- @skipCUDAIfRocm
@dtypesIfCUDA(torch.cfloat, torch.cdouble,
*torch.testing.get_all_fp_dtypes(include_half=not CUDA9, include_bfloat16=(CUDA11OrLater and SM53OrLater)))
@dtypes(*(set(torch.testing.get_all_dtypes()) - {torch.half, torch.bool}))
@@ -5375,7 +5346,6 @@
@skipCPUIfNoLapack
@skipCUDAIfNoCusolver
- @skipCUDAIfRocm
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
def test_householder_product(self, device, dtype):
def generate_reflectors_and_tau(A):
@@ -5436,7 +5406,6 @@
@skipCPUIfNoLapack
@skipCUDAIfNoCusolver
- @skipCUDAIfRocm
def test_householder_product_errors_and_warnings(self, device):
test_cases = [
# input1 size, input2 size, error regex
@@ -5486,7 +5455,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.double, torch.cfloat, torch.cdouble)
- @skipCUDAIfRocm
def test_lu(self, device, dtype):
from torch.testing._internal.common_utils import random_matrix
@@ -5596,7 +5564,6 @@
@skipCPUIfNoLapack
@skipCUDAIfNoMagma
@dtypes(torch.double)
- @skipCUDAIfRocm
def test_lu_unpack_check_input(self, device, dtype):
x = torch.rand(5, 5, 5, device=device, dtype=dtype)
lu_data, lu_pivots = torch.lu(x, pivot=True)
@@ -6530,7 +6497,6 @@
check(2, 3, 3)
check(2, 3, 4, 4, noncontiguous=True)
- @skipCUDAIfRocm
@skipCPUIfNoLapack
@skipCUDAIfNoMagmaAndNoCusolver
@dtypes(torch.double, torch.cdouble)
@@ -6851,7 +6817,6 @@
run_test(3, 3, 5, 5)
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float32: 1e-3, torch.complex64: 1e-3,
@@ -7127,7 +7092,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.double)
- @skipCUDAIfRocm
def test_det_logdet_slogdet_batched(self, device, dtype):
from torch.testing._internal.common_utils import (random_symmetric_matrix, random_symmetric_psd_matrix,
random_symmetric_pd_matrix, random_square_matrix_of_rank)
@@ -7177,7 +7141,6 @@
run_test(matsize, batchdims, mat_chars=['sing', 'non_sing'])
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
def test_cholesky_inverse(self, device, dtype):
@@ -7412,7 +7375,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
- @skipCUDAIfRocm
def test_lu_solve_batched_non_contiguous(self, device, dtype):
from numpy.linalg import solve
from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
@@ -7457,7 +7419,6 @@
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
@precisionOverride({torch.float32: 1e-3, torch.complex64: 1e-3,
torch.float64: 1e-8, torch.complex128: 1e-8})
- @skipCUDAIfRocm
def test_lu_solve_batched(self, device, dtype):
def sub_test(pivot):
def lu_solve_batch_test_helper(A_dims, b_dims, pivot):
@@ -7501,7 +7462,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
- @skipCUDAIfRocm
def test_lu_solve_batched_broadcasting(self, device, dtype):
from numpy.linalg import solve
from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
@@ -7894,7 +7854,6 @@
self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, atol=1e-8, rtol=0)
@skipCUDAIfNoMagma
- @skipCUDAIfRocm
@skipCPUIfNoLapack
def test_lapack_empty(self, device):
# FIXME: these are just a selection of LAPACK functions -- we need a general strategy here.