If command_timed_out() is triggered don't get blocked by a deadlock
If command_timed_out() is triggered and we cannot acquire the
commands_pending_response_mutex to print the debug info, then proceed
with the rest of the operations until the process aborts.
Similarly, if startup_timer_expired() is triggered and we are blocked
by a deadlock, just abort() because there is no way to recover.
Bug: 110441099
Test: Code compilation
Change-Id: I367332bbbd9c1d887bd2bc829b9f617114fc96fc
Merged-In: I367332bbbd9c1d887bd2bc829b9f617114fc96fc
(cherry picked from commit 2838b32ce424a6f8c8025175aaf9b6fd8cab313f)
diff --git a/hci/src/hci_layer.cc b/hci/src/hci_layer.cc
index 5111fc5..f1e0e7a 100644
--- a/hci/src/hci_layer.cc
+++ b/hci/src/hci_layer.cc
@@ -78,7 +78,9 @@
// Abort if there is no response to an HCI command.
static const uint32_t COMMAND_PENDING_TIMEOUT_MS = 2000;
+static const uint32_t COMMAND_PENDING_MUTEX_ACQUIRE_TIMEOUT_MS = 500;
static const uint32_t COMMAND_TIMEOUT_RESTART_MS = 5000;
+static const int HCI_UNKNOWN_COMMAND_TIMED_OUT = 0x00ffffff;
// Our interface
static bool interface_created;
@@ -105,7 +107,7 @@
// Inbound-related
static alarm_t* command_response_timer;
static list_t* commands_pending_response;
-static std::recursive_mutex commands_pending_response_mutex;
+static std::recursive_timed_mutex commands_pending_response_mutex;
static alarm_t* hci_timeout_abort_timer;
// The hand-off point for data going to a higher layer, set by the higher layer
@@ -255,7 +257,8 @@
// Free the timers
{
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
alarm_free(command_response_timer);
command_response_timer = NULL;
alarm_free(startup_timer);
@@ -277,7 +280,8 @@
hci_close();
{
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
list_free(commands_pending_response);
commands_pending_response = NULL;
}
@@ -371,7 +375,8 @@
static void event_finish_startup(UNUSED_ATTR void* context) {
LOG_INFO(LOG_TAG, "%s", __func__);
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
alarm_cancel(startup_timer);
future_ready(startup_future, FUTURE_SUCCESS);
startup_future = NULL;
@@ -380,9 +385,18 @@
static void startup_timer_expired(UNUSED_ATTR void* context) {
LOG_ERROR(LOG_TAG, "%s", __func__);
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::unique_lock<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex, std::defer_lock);
+ if (!lock.try_lock_for(std::chrono::milliseconds(
+ COMMAND_PENDING_MUTEX_ACQUIRE_TIMEOUT_MS))) {
+ LOG_ERROR(LOG_TAG, "%s: Cannot obtain the mutex", __func__);
+ // We cannot recover if the startup timer expired and we are deadlock,
+ // hence abort.
+ abort();
+ }
future_ready(startup_future, FUTURE_FAIL);
startup_future = NULL;
+ lock.unlock();
}
// Command/packet transmitting functions
@@ -408,7 +422,8 @@
static void event_command_ready(waiting_command_t* wait_entry) {
{
/// Move it to the list of commands awaiting response
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
wait_entry->timestamp = std::chrono::steady_clock::now();
list_append(commands_pending_response, wait_entry);
}
@@ -475,10 +490,7 @@
abort();
}
-// Print debugging information and quit. Don't dereference original_wait_entry.
-static void command_timed_out(void* original_wait_entry) {
- std::unique_lock<std::recursive_mutex> lock(commands_pending_response_mutex);
-
+static void command_timed_out_log_info(void* original_wait_entry) {
LOG_ERROR(LOG_TAG, "%s: %d commands pending response", __func__,
get_num_waiting_commands());
@@ -508,7 +520,21 @@
LOG_EVENT_INT(BT_HCI_TIMEOUT_TAG_NUM, wait_entry->opcode);
}
- lock.unlock();
+}
+
+// Print debugging information and quit. Don't dereference original_wait_entry.
+static void command_timed_out(void* original_wait_entry) {
+ LOG_ERROR(LOG_TAG, "%s", __func__);
+ std::unique_lock<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex, std::defer_lock);
+ if (!lock.try_lock_for(std::chrono::milliseconds(
+ COMMAND_PENDING_MUTEX_ACQUIRE_TIMEOUT_MS))) {
+ LOG_ERROR(LOG_TAG, "%s: Cannot obtain the mutex", __func__);
+ LOG_EVENT_INT(BT_HCI_TIMEOUT_TAG_NUM, HCI_UNKNOWN_COMMAND_TIMED_OUT);
+ } else {
+ command_timed_out_log_info(original_wait_entry);
+ lock.unlock();
+ }
// Don't request a firmware dump for multiple hci timeouts
if (hci_timeout_abort_timer != NULL || hci_firmware_log_fd != INVALID_FD) {
@@ -675,7 +701,8 @@
// Misc internal functions
static waiting_command_t* get_waiting_command(command_opcode_t opcode) {
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
for (const list_node_t* node = list_begin(commands_pending_response);
node != list_end(commands_pending_response); node = list_next(node)) {
@@ -693,12 +720,14 @@
}
static int get_num_waiting_commands() {
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
return list_length(commands_pending_response);
}
static void update_command_response_timer(void) {
- std::lock_guard<std::recursive_mutex> lock(commands_pending_response_mutex);
+ std::lock_guard<std::recursive_timed_mutex> lock(
+ commands_pending_response_mutex);
if (command_response_timer == NULL) return;
if (list_is_empty(commands_pending_response)) {