Workaround for H4 HCI stream corruption during LE scans

Sometimes, during result-heavy LE scans, the UART byte stream can become
corrupted, leading to assertions caused by mis-interpreting the bytes
following the corruption.

This workaround looks for tell-tale signs of a BLE event and attempts to
skip the correct amount of bytes in the stream to re-synchronize onto a
packet boundary.

Bug: 23934838
Change-Id: Ifadaecf8077cb1defc7134c59c97302fca660f81
diff --git a/hci/src/hci_hal_h4.c b/hci/src/hci_hal_h4.c
index f19755f..7a48c69 100644
--- a/hci/src/hci_hal_h4.c
+++ b/hci/src/hci_hal_h4.c
@@ -31,6 +31,7 @@
 #include "vendor.h"
 
 #define HCI_HAL_SERIAL_BUFFER_SIZE 1026
+#define HCI_BLE_EVENT 0x3e
 
 // Increased HCI thread priority to keep up with the audio sub-system
 // when streaming time sensitive data (A2DP).
@@ -47,6 +48,8 @@
 static eager_reader_t *uart_stream;
 static serial_data_type_t current_data_type;
 static bool stream_has_interpretation;
+static bool stream_corruption_detected;
+static uint8_t stream_corruption_bytes_to_ignore;
 
 static void event_uart_has_bytes(eager_reader_t *reader, void *context);
 
@@ -86,6 +89,8 @@
   }
 
   stream_has_interpretation = false;
+  stream_corruption_detected = false;
+  stream_corruption_bytes_to_ignore = 0;
   eager_reader_register(uart_stream, thread_get_reactor(thread), event_uart_has_bytes, NULL);
 
   // Raise thread priorities to keep up with audio
@@ -177,6 +182,40 @@
 
 // Internal functions
 
+// WORKAROUND:
+// As exhibited by b/23934838, during result-heavy LE scans, the UART byte
+// stream can get corrupted, leading to assertions caused by mis-interpreting
+// the bytes following the corruption.
+// This workaround looks for tell-tale signs of a BLE event and attempts to
+// skip the correct amount of bytes in the stream to re-synchronize onto
+// a packet boundary.
+// Function returns true if |byte_read| has been processed by the workaround.
+static bool stream_corrupted_during_le_scan_workaround(const uint8_t byte_read)
+{
+  if (!stream_corruption_detected && byte_read == HCI_BLE_EVENT) {
+    LOG_ERROR("%s HCI stream corrupted (message type 0x3E)!", __func__);
+    stream_corruption_detected = true;
+    return true;
+  }
+
+  if (stream_corruption_detected) {
+    if (stream_corruption_bytes_to_ignore == 0) {
+      stream_corruption_bytes_to_ignore = byte_read;
+      LOG_ERROR("%s About to skip %d bytes...", __func__, stream_corruption_bytes_to_ignore);
+    } else {
+      --stream_corruption_bytes_to_ignore;
+    }
+
+    if (stream_corruption_bytes_to_ignore == 0) {
+      LOG_ERROR("%s Back to our regularly scheduled program...", __func__);
+      stream_corruption_detected = false;
+    }
+    return true;
+  }
+
+  return false;
+}
+
 // See what data is waiting, and notify the upper layer
 static void event_uart_has_bytes(eager_reader_t *reader, UNUSED_ATTR void *context) {
   if (stream_has_interpretation) {
@@ -187,6 +226,10 @@
       LOG_ERROR("%s could not read HCI message type", __func__);
       return;
     }
+
+    if (stream_corrupted_during_le_scan_workaround(type_byte))
+      return;
+
     if (type_byte < DATA_TYPE_ACL || type_byte > DATA_TYPE_EVENT) {
       LOG_ERROR("%s Unknown HCI message type. Dropping this byte 0x%x, min %x, max %x", __func__, type_byte, DATA_TYPE_ACL, DATA_TYPE_EVENT);
       return;
diff --git a/hci/test/hci_hal_h4_test.cpp b/hci/test/hci_hal_h4_test.cpp
index 2c4030d..d64e8c4 100644
--- a/hci/test/hci_hal_h4_test.cpp
+++ b/hci/test/hci_hal_h4_test.cpp
@@ -43,6 +43,9 @@
   type_byte_only
 );
 
+// Use as packet type to test stream_corrupted_during_le_scan_workaround()
+static const uint8_t HCI_BLE_EVENT = 0x3e;
+
 static char sample_data1[100] = "A point is that which has no part.";
 static char sample_data2[100] = "A line is breadthless length.";
 static char sample_data3[100] = "The ends of a line are points.";
@@ -50,6 +53,9 @@
 static char sco_data[100] =     "A surface is that which has length and breadth only.";
 static char event_data[100] =   "The edges of a surface are lines.";
 
+// Test data for stream_corrupted_during_le_scan_workaround()
+static char corrupted_data[] = { 0x5 /* length of remaining data */, 'H', 'e', 'l', 'l', 'o' };
+
 static const hci_hal_t *hal;
 static int dummy_serial_fd;
 static int reentry_i = 0;
@@ -221,6 +227,7 @@
   reset_for(read_synchronous);
 
   write_packet(sockfd[1], DATA_TYPE_ACL, acl_data);
+  write_packet(sockfd[1], HCI_BLE_EVENT, corrupted_data);
   write_packet(sockfd[1], DATA_TYPE_SCO, sco_data);
   write_packet(sockfd[1], DATA_TYPE_EVENT, event_data);