Merge remote-tracking branch 'goog/upstream-master' into drewry-nos-merge-upstream

* goog/upstream-master:
  Transport: timeout after 60 seconds

Change-Id: Id908b1351b6c6edbd7f378b7416ac0286443e984
Bug: 110758319
diff --git a/libnos/debug.cpp b/libnos/debug.cpp
index 2bd49bd..0398d54 100644
--- a/libnos/debug.cpp
+++ b/libnos/debug.cpp
@@ -34,6 +34,7 @@
     ErrorString_helper(APP_ERROR_RPC)
     ErrorString_helper(APP_ERROR_CHECKSUM)
     ErrorString_helper(APP_ERROR_BUSY)
+    ErrorString_helper(APP_ERROR_TIMEOUT)
     default:
       if (code >= APP_LINE_NUMBER_BASE && code < MAX_APP_STATUS) {
         return "APP_LINE_NUMBER " + std::to_string(code - APP_LINE_NUMBER_BASE);
diff --git a/libnos_transport/test/test.cpp b/libnos_transport/test/test.cpp
index c6b4a9c..ac7a884 100644
--- a/libnos_transport/test/test.cpp
+++ b/libnos_transport/test/test.cpp
@@ -635,3 +635,26 @@
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
+
+#ifdef TEST_TIMEOUT
+TEST_F(TransportTest, Timeout) {
+  const uint8_t app_id = 49;
+  const uint16_t param = 64;
+
+  InSequence please;
+  EXPECT_GET_STATUS_IDLE(app_id);
+  EXPECT_SEND_DATA(app_id, nullptr, 0);
+  EXPECT_GO_COMMAND(app_id, param, nullptr, 0, 0);
+
+  // Keep saying we're working on it
+  const uint32_t command = CMD_ID((app_id)) | CMD_IS_READ | CMD_TRANSPORT;
+  EXPECT_CALL(mock_dev(), Read(command, _, STATUS_MAX_LENGTH))
+      .WillRepeatedly(DoAll(ReadStatusV1_Working(), Return(0)));
+
+  // We'll still try and clean up
+  EXPECT_CLEAR_STATUS(app_id);
+
+  uint32_t res = nos_call_application(dev(), app_id, param, nullptr, 0, nullptr, nullptr);
+  EXPECT_THAT(res, Eq(APP_ERROR_TIMEOUT));
+}
+#endif
diff --git a/libnos_transport/transport.c b/libnos_transport/transport.c
index 71d4b56..7dad955 100644
--- a/libnos_transport/transport.c
+++ b/libnos_transport/transport.c
@@ -23,6 +23,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include <unistd.h>
 
 #include <application.h>
@@ -75,9 +76,8 @@
 /* In case of CRC error, try to retransmit */
 #define CRC_RETRY_COUNT 3
 
-/* The number of times to poll before giving up */
-/* TODO: review this limit, maybe replace with a timeout */
-#define POLL_LIMIT 1000000
+/* How long to poll before giving up */
+#define POLL_LIMIT_SECONDS 60
 
 struct transport_context {
   const struct nos_device *dev;
@@ -334,12 +334,31 @@
   return APP_SUCCESS;
 }
 
+static bool timespec_before(const struct timespec *lhs, const struct timespec *rhs) {
+  if (lhs->tv_sec == rhs->tv_sec) {
+    return lhs->tv_nsec < rhs->tv_nsec;
+  } else {
+    return lhs->tv_sec < rhs->tv_sec;
+  }
+}
+
 /*
  * Keep polling until the app says it is done.
  */
-uint32_t poll_until_done(const struct transport_context *ctx,
-                         struct transport_status *status) {
+static uint32_t poll_until_done(const struct transport_context *ctx,
+                                struct transport_status *status) {
   uint32_t poll_count = 0;
+
+  /* Start the timer */
+  struct timespec now;
+  struct timespec abort_at;
+  if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) {
+    NLOGE("clock_gettime() failing: %s", strerror(errno));
+    return APP_ERROR_IO;
+  }
+  abort_at.tv_sec = now.tv_sec + POLL_LIMIT_SECONDS;
+  abort_at.tv_nsec = now.tv_nsec;
+
   NLOGD("Polling app %d", ctx->app_id);
   do {
     /* Poll the status */
@@ -370,17 +389,22 @@
       NLOGE("App %d just stopped working", ctx->app_id);
       return APP_ERROR_INTERNAL;
     }
-  } while (poll_count != POLL_LIMIT);
+    if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) {
+      NLOGE("clock_gettime() failing: %s", strerror(errno));
+      return APP_ERROR_IO;
+    }
+  } while (timespec_before(&now, &abort_at));
 
-  NLOGE("App %d not done after polling %d times", ctx->app_id, poll_count);
-  return APP_ERROR_INTERNAL;
+  NLOGE("App %d not done after polling %d times in %d seconds",
+        ctx->app_id, poll_count, POLL_LIMIT_SECONDS);
+  return APP_ERROR_TIMEOUT;
 }
 
 /*
  * Reconstruct the reply data from datagram stream.
  */
-uint32_t receive_reply(const struct transport_context *ctx,
-                       const struct transport_status *status) {
+static uint32_t receive_reply(const struct transport_context *ctx,
+                              const struct transport_status *status) {
   int retries = CRC_RETRY_COUNT;
   while (retries--) {
     NLOGD("Read app %d reply data (%d bytes)", ctx->app_id, status->reply_len);
diff --git a/nugget/include/application.h b/nugget/include/application.h
index 8b7eaf2..6dd402a 100644
--- a/nugget/include/application.h
+++ b/nugget/include/application.h
@@ -309,6 +309,7 @@
   APP_ERROR_RPC,             /* problem during RPC communication */
   APP_ERROR_CHECKSUM,        /* checksum failed, only used within protocol */
   APP_ERROR_BUSY,            /* the app is already working on a commnad */
+  APP_ERROR_TIMEOUT,         /* the app took too long to respond */
   /* more? */
 
   APP_SPECIFIC_ERROR = 0x20, /* "should be enough for anybody" */