Implement crash recovery of circular queueus

Bug: 73254584
Test: Local build
Change-Id: Ia89ef479cb696c182794ca181ec5e43efcefd8e3
diff --git a/common/vsoc/lib/lock_common.cpp b/common/vsoc/lib/lock_common.cpp
index 2744ba3..7825011 100644
--- a/common/vsoc/lib/lock_common.cpp
+++ b/common/vsoc/lib/lock_common.cpp
@@ -34,10 +34,20 @@
 static_assert((GuestWaitingFlag & HostWaitingFlag) == 0,
               "Waiting flags should not share bits");
 
-// const uint32_t ReservedForRobustLocksFlag = 0x20000000U;
+// Set if the current owner is the host
+const uint32_t HostIsOwner = 0x20000000U;
 
 // PID_MAX_LIMIT appears to be 0x00400000U, so we're probably ok here
-const uint32_t OwnerMask = 0x1FFFFFFFU;
+const uint32_t OwnerMask = 0x3FFFFFFFU;
+
+uint32_t MakeOwnerTid(uint32_t raw_tid) {
+  if (Sides::OurSide == Sides::Host) {
+    return (raw_tid | HostIsOwner) & OwnerMask;
+  } else {
+    return raw_tid & (OwnerMask & ~HostIsOwner);
+  }
+}
+
 };  // namespace
 
 namespace vsoc {
@@ -47,7 +57,7 @@
  */
 bool vsoc::layout::WaitingLockBase::TryLock(uint32_t tid,
                                             uint32_t* expected_out) {
-  uint32_t masked_tid = tid & OwnerMask;
+  uint32_t masked_tid = MakeOwnerTid(tid);
   uint32_t expected = LockFree;
   while (1) {
     // First try to lock assuming that the mutex is free
@@ -94,7 +104,8 @@
 
   // We didn't hold the lock. This process is insane and must die before it
   // does damage.
-  if ((tid ^ expected_state) & OwnerMask) {
+  uint32_t marked_tid = MakeOwnerTid(tid);
+  if ((marked_tid ^ expected_state) & OwnerMask) {
     LOG(FATAL) << tid << " unlocking " << this << " owned by "
                << expected_state;
   }
@@ -106,7 +117,7 @@
       break;
     }
   }
-  if ((expected_state ^ tid) & OwnerMask) {
+  if ((expected_state ^ marked_tid) & OwnerMask) {
     LOG(FATAL) << "Lock owner of " << this << " changed from " << tid << " to "
                << expected_state << " during unlock";
   }
@@ -115,6 +126,12 @@
   return rval;
 }
 
+bool vsoc::layout::WaitingLockBase::RecoverSingleSided() {
+  // No need to signal because the caller ensured that there were no other
+  // threads...
+  return lock_uint32_.exchange(LockFree) != LockFree;
+}
+
 void layout::GuestAndHostLock::Lock(RegionView* region) {
   uint32_t expected;
   uint32_t tid = gettid();
@@ -130,4 +147,27 @@
   region->SendSignal(UnlockCommon(gettid()), &lock_uint32_);
 }
 
+bool layout::GuestAndHostLock::Recover(RegionView* region) {
+  uint32_t expected_state = lock_uint32_;
+  uint32_t expected_owner_bit = (Sides::OurSide == Sides::Host) ? HostIsOwner : 0;
+  // This avoids check then act by reading exactly once and then
+  // eliminating the states where Recover should be a noop.
+  if (expected_state == LockFree) {
+    return false;
+  }
+  // Owned by the other side, do nothing.
+  if ((expected_state & HostIsOwner) != expected_owner_bit) {
+    return false;
+  }
+  // At this point we know two things:
+  //   * The lock was held by our side
+  //   * There are no other threads running on our side (precondition
+  //     for calling Recover())
+  // Therefore, we know that the current expected value should still
+  // be in the lock structure. Use the normal unlock logic, providing
+  // the tid that we observed in the lock.
+  region->SendSignal(UnlockCommon(expected_state), &lock_uint32_);
+  return true;
+}
+
 }  // namespace vsoc
diff --git a/common/vsoc/shm/circqueue.h b/common/vsoc/shm/circqueue.h
index ad5e5e7..2f26fa3 100644
--- a/common/vsoc/shm/circqueue.h
+++ b/common/vsoc/shm/circqueue.h
@@ -84,6 +84,10 @@
   intptr_t WriteReserveLocked(RegionSignalingInterface* r, size_t bytes,
                               Range* t, bool non_blocking);
 
+  bool RecoverBase() {
+    return lock_.Recover();
+  }
+
   // Note: Both of these fields may hold values larger than the buffer size,
   // they should be interpreted modulo the buffer size. This fact along with the
   // buffer size being a power of two greatly simplyfies the index calculations.
@@ -123,6 +127,10 @@
   intptr_t Write(RegionSignalingInterface* r, const char* buffer_in,
                  std::size_t bytes, bool non_blocking = false);
 
+  bool Recover() {
+    return this->RecoverBase();
+  }
+
  protected:
   using Range = typename CircularQueueBase<SizeLog2>::Range;
 };
@@ -169,6 +177,10 @@
           size_t iov_count,
           bool non_blocking = false);
 
+  bool Recover() {
+    return this->RecoverBase();
+  }
+
  protected:
   static_assert(CircularQueueBase<SizeLog2>::BufferSize >= MaxPacketSize,
                 "Buffer is too small to hold the maximum sized packet");
diff --git a/common/vsoc/shm/lock.h b/common/vsoc/shm/lock.h
index b08b6d2..f3b8da8 100644
--- a/common/vsoc/shm/lock.h
+++ b/common/vsoc/shm/lock.h
@@ -56,12 +56,29 @@
    * readers and writers.
    */
   void Lock() {
-    while (lock_.exchange(1)) {
+    while (1) {
+      uint32_t expected = 0;
+      if (lock_.compare_exchange_strong(expected, Sides::OurSide)) {
+        return;
+      }
       _mm_pause();
     }
   }
 
   /**
+   * Drop the lock iff it is currently held by this side. Used by
+   * recovery code that cleans up regions in the event of a reboot
+   * (guest side) or a service restart (host side).
+   *
+   * The caller must ensure that there are no other threads on its
+   * side (e.g. guest/host) are using the window.
+   */
+  bool Recover() {
+    uint32_t expected = Sides::OurSide;
+    return lock_.compare_exchange_strong(expected, 0);
+  }
+
+  /**
    * Release the spinlock.
    */
   void Unlock() {
@@ -93,6 +110,9 @@
   // Returns sides that should be signalled or 0
   Sides UnlockCommon(uint32_t tid);
 
+  // Common code to recover single-sided locks.
+  bool RecoverSingleSided();
+
   // Non-zero values in this word indicate that the lock is in use.
   // This is 32 bits for compatibility with futex()
   std::atomic<uint32_t> lock_uint32_;
@@ -134,6 +154,14 @@
 #ifndef CUTTLEFISH_HOST
   void Lock();
   void Unlock();
+  /**
+   * Drop the lock iff it is currently held. Used by
+   * recovery code that cleans up regions in the event of a reboot.
+   *
+   * The caller must ensure that there are no other threads on its
+   * side (e.g. guest/host) are using the window.
+   */
+  bool Recover();
 #endif
 };
 ASSERT_SHM_COMPATIBLE(GuestLock, multi_region);
@@ -151,6 +179,15 @@
 #ifdef CUTTLEFISH_HOST
   void Lock();
   void Unlock();
+  /**
+   * Drop the lock iff it is currently held. Used by
+   * recovery code that cleans up regions in the event of a daemon
+   * restart.
+   *
+   * The caller must ensure that there are no other threads on its
+   * side (e.g. guest/host) are using the window.
+   */
+  bool Recover();
 #endif
 };
 ASSERT_SHM_COMPATIBLE(HostLock, multi_region);
@@ -183,6 +220,15 @@
  public:
   void Lock(RegionView*);
   void Unlock(RegionView*);
+  /**
+   * Drop the lock iff it is currently held by this side. Used by
+   * recovery code that cleans up regions in the event of a reboot
+   * (guest side) or a service restart (host side).
+   *
+   * The caller must ensure that there are no other threads on its
+   * side (e.g. guest/host) are using the window.
+   */
+  bool Recover(RegionView*);
 };
 ASSERT_SHM_COMPATIBLE(GuestAndHostLock, multi_region);
 
diff --git a/common/vsoc/shm/socket_forward_layout.h b/common/vsoc/shm/socket_forward_layout.h
index 8175e29..970662d 100644
--- a/common/vsoc/shm/socket_forward_layout.h
+++ b/common/vsoc/shm/socket_forward_layout.h
@@ -44,9 +44,32 @@
   std::uint32_t port_;
 
   SpinLock queue_state_lock_;
+
+  bool Recover() {
+    bool recovered = false;
+    bool rval = host_to_guest.Recover();
+    recovered = recovered || rval;
+    rval = guest_to_host.Recover();
+    recovered = recovered || rval;
+    rval = queue_state_lock_.Recover();
+    recovered = recovered || rval;
+    // TODO: Put queue_state_ and port_ recovery here, probably after grabbing
+    // the queue_state_lock_.
+    return recovered;
+  }
 };
 
 struct SocketForwardLayout : public RegionLayout {
+  bool Recover() {
+    bool recovered = false;
+    for (auto& i : queues_) {
+      bool rval = i.Recover();
+      recovered = recovered || rval;
+    }
+    //TODO: consider handling the sequence number here
+    return recovered;
+  }
+
   QueuePair queues_[version_info::socket_forward::kNumQueues];
   std::atomic_uint32_t seq_num;
   static const char* region_name;
diff --git a/guest/vsoc/lib/guest_lock.cpp b/guest/vsoc/lib/guest_lock.cpp
index 67ce665..98940cb 100644
--- a/guest/vsoc/lib/guest_lock.cpp
+++ b/guest/vsoc/lib/guest_lock.cpp
@@ -41,5 +41,9 @@
   }
 }
 
+bool GuestLock::Recover() {
+  return RecoverSingleSided();
+}
+
 }  // namespace layout
 }  // namespace vsoc
diff --git a/host/vsoc/lib/host_lock.cpp b/host/vsoc/lib/host_lock.cpp
index 44c20b5..2d610f1 100644
--- a/host/vsoc/lib/host_lock.cpp
+++ b/host/vsoc/lib/host_lock.cpp
@@ -43,5 +43,9 @@
   }
 }
 
+bool HostLock::Recover() {
+  return RecoverSingleSided();
+}
+
 }  // namespace layout
 }  // namespace vsoc