[CI] Run test_multi_gpu in test_inductor_distributed (#100135)
Summary: The guard reason string change is needed after https://github.com/pytorch/pytorch/pull/98107/
Pull Request resolved: https://github.com/pytorch/pytorch/pull/100135
Approved by: https://github.com/anijain2305
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index 1ee49f2..d6462ef 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -255,6 +255,10 @@
}
test_inductor_distributed() {
+ # Smuggle a few multi-gpu tests here so that we don't have to request another large node
+ echo "Testing multi_gpu tests in test_torchinductor"
+ pytest test/inductor/test_torchinductor.py -k test_multi_gpu
+
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
# with if required # gpus aren't available
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives --verbose
@@ -630,10 +634,6 @@
}
test_distributed() {
- # Smuggle a few multi-gpu tests here so that we don't have to request another large node
- echo "Testing multi_gpu tests in test_torchinductor"
- pytest test/inductor/test_torchinductor.py -k test_multi_gpu
-
echo "Testing distributed python tests"
time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
assert_git_not_dirty
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
index bfb93d9..e997d5c 100644
--- a/test/inductor/test_torchinductor.py
+++ b/test/inductor/test_torchinductor.py
@@ -2069,7 +2069,7 @@
gemm_opt(x1, y1)
self.assertTrue(failed_guard is not None)
self.assertTrue(
- "tensor 'x' Tensor device index mismatch. Expected device index to be"
+ "tensor 'L['x']' Tensor device index mismatch. Expected device index to be"
in failed_guard.reason
)