Fix ibverbs completion queue capacity
Summary:
The header already contained an analysis of required completion queue
depth but the queue pair was still initialized with a maximum queue
depth of kMaxBuffers. This change fixes that and updates the analysis
to talk separately about receive and send completion queues.
Reviewed By: andrewwdye
Differential Revision: D4785786
fbshipit-source-id: 4dc302d523a3b7162dc261d14cfcc755681febf8
diff --git a/gloo/transport/ibverbs/pair.cc b/gloo/transport/ibverbs/pair.cc
index e0563da..9bac8a5 100644
--- a/gloo/transport/ibverbs/pair.cc
+++ b/gloo/transport/ibverbs/pair.cc
@@ -59,8 +59,8 @@
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
attr.send_cq = cq_;
attr.recv_cq = cq_;
- attr.cap.max_send_wr = Pair::kMaxBuffers;
- attr.cap.max_recv_wr = Pair::kMaxBuffers;
+ attr.cap.max_send_wr = Pair::kSendCompletionQueueCapacity;
+ attr.cap.max_recv_wr = Pair::kRecvCompletionQueueCapacity;
attr.cap.max_send_sge = 1;
attr.cap.max_recv_sge = 1;
attr.qp_type = IBV_QPT_RC;
@@ -214,7 +214,7 @@
// The work request is serialized and sent to the driver so it
// doesn't need to be valid after the ibv_post_recv call.
- struct ibv_recv_wr* bad_wr;
+ struct ibv_recv_wr* bad_wr = nullptr;
int rv = ibv_post_recv(qp_, &wr, &bad_wr);
if (rv != 0) {
signalIoFailure(GLOO_ERROR_MSG("ibv_post_recv: ", rv));
@@ -243,7 +243,7 @@
// The work request is serialized and sent to the driver so it
// doesn't need to be valid after the ibv_post_send call.
- struct ibv_send_wr* bad_wr;
+ struct ibv_send_wr* bad_wr = nullptr;
int rv = ibv_post_send(qp_, &wr, &bad_wr);
if (rv != 0) {
signalIoFailure(GLOO_ERROR_MSG("ibv_post_send: ", rv));
@@ -279,7 +279,7 @@
struct ibv_recv_wr wr;
memset(&wr, 0, sizeof(wr));
- struct ibv_recv_wr* bad_wr;
+ struct ibv_recv_wr* bad_wr = nullptr;
rv = ibv_post_recv(qp_, &wr, &bad_wr);
if (rv != 0) {
signalIoFailure(GLOO_ERROR_MSG("ibv_post_recv: ", rv));
diff --git a/gloo/transport/ibverbs/pair.h b/gloo/transport/ibverbs/pair.h
index 2d1ce3a..4f84fde 100644
--- a/gloo/transport/ibverbs/pair.h
+++ b/gloo/transport/ibverbs/pair.h
@@ -37,16 +37,30 @@
// Use 3x the maximum number of buffers as the capacity
// for entries in this pair's completion queue.
//
- // There are a maximum of:
+ // For receive completions, there are a maximum of:
// - MAX_BUFFERS posted receive work requests to receive memory
- // regions from the other side of the pair.
- // - MAX_BUFFERS posted send work requests to send memory
- // regions to the other side of the pair.
+ // regions from the other side of the pair (for send buffers).
// - MAX_BUFFERS posted receive work requests for RDMA writes
- // from the other side of the pair. These requests are posted
- // at the same time a buffer's local memory region is sent to
- // the other side of the pair.
- static constexpr auto kCompletionQueueCapacity = 3 * kMaxBuffers;
+ // from the other side of the pair.
+ //
+ // For send completions, there are a maximum of:
+ // - MAX_BUFFERS posted send work requests to send memory
+ // regions to the other side of the pair (for receive buffers).
+ // - MAX_BUFFERS posted send work requests for RDMA writes
+ // to the other side of the pair.
+ //
+ // While this sums up to 4x kMaxBuffers work requests, send work
+ // requests can only be posted after receiving the corresponding
+ // memory region from the other side of the pair. This leads to a
+ // maximum of of 3x kMaxBuffers posted work requests at any given
+ // time. However, since the majority can be made up by either
+ // receive work requests or send work requests, we keep the capacity
+ // at 4x kMaxBuffers and allocate half to each type.
+ //
+ static constexpr auto kRecvCompletionQueueCapacity = 2 * kMaxBuffers;
+ static constexpr auto kSendCompletionQueueCapacity = 2 * kMaxBuffers;
+ static constexpr auto kCompletionQueueCapacity =
+ kRecvCompletionQueueCapacity + kSendCompletionQueueCapacity;
// The ibv_req_notify(3) function takes an argument called
// 'solicited_only' which makes it only trigger a notification for