[distributed] NCCL result code update (#129704)
[distributed] NCCL result code update (#128777)
The nccl result codes are outdated. This PR fixes #128756.
Fixes #128756
Pull Request resolved: https://github.com/pytorch/pytorch/pull/128777
Approved by: https://github.com/Skylion007
(cherry picked from commit c027c8935b25cdb99fce5595fa1a980df8cdb4ab)
Co-authored-by: Myungjin Lee <myungjle@cisco.com>
diff --git a/torch/csrc/cuda/nccl.cpp b/torch/csrc/cuda/nccl.cpp
index b4c91b6..2a8fa0b 100644
--- a/torch/csrc/cuda/nccl.cpp
+++ b/torch/csrc/cuda/nccl.cpp
@@ -47,12 +47,14 @@
return ncclResult_t::ncclInvalidArgument;
case torch::cuda::nccl::ncclResult::InvalidUsage:
return ncclResult_t::ncclInvalidUsage;
- case torch::cuda::nccl::ncclResult::NumResults:
- return ncclResult_t::ncclNumResults;
+ case torch::cuda::nccl::ncclResult::RemoteError:
+ return ncclResult_t::ncclRemoteError;
#ifdef NCCL_HAS_COMM_NONBLOCKING
case torch::cuda::nccl::ncclResult::InProgress:
return ncclResult_t::ncclInProgress;
#endif
+ case torch::cuda::nccl::ncclResult::NumResults:
+ return ncclResult_t::ncclNumResults;
default:
throw std::runtime_error("Unconvertible NCCL type");
}
@@ -72,12 +74,14 @@
return torch::cuda::nccl::ncclResult::InvalidArgument;
case ncclInvalidUsage:
return torch::cuda::nccl::ncclResult::InvalidUsage;
- case ncclNumResults:
- return torch::cuda::nccl::ncclResult::NumResults;
+ case ncclRemoteError:
+ return torch::cuda::nccl::ncclResult::RemoteError;
#ifdef NCCL_HAS_COMM_NONBLOCKING
case ncclInProgress:
return torch::cuda::nccl::ncclResult::InProgress;
#endif
+ case ncclNumResults:
+ return torch::cuda::nccl::ncclResult::NumResults;
default:
throw std::runtime_error("Unconvertible NCCL type");
}
diff --git a/torch/csrc/cuda/nccl.h b/torch/csrc/cuda/nccl.h
index 37d1be1..bf3ce3b 100644
--- a/torch/csrc/cuda/nccl.h
+++ b/torch/csrc/cuda/nccl.h
@@ -44,8 +44,9 @@
InternalError = 3,
InvalidArgument = 4,
InvalidUsage = 5,
- NumResults = 6,
- InProgress = 7
+ RemoteError = 6,
+ InProgress = 7,
+ NumResults = 8
};
/* Reduction operation selector */