trusty:pac: remove pac_keys test am: b2a63b7eca am: e204421147

Original change: https://android-review.googlesource.com/c/trusty/lk/trusty/+/2692488

Change-Id: I36442cb0260cc64a9545532d2775ae61a82999c0
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg
index 587b0de..f356f46 100644
--- a/PREUPLOAD.cfg
+++ b/PREUPLOAD.cfg
@@ -2,6 +2,8 @@
 clang_format = true
 commit_msg_bug_field = true
 commit_msg_changeid_field = true
+rustfmt = true
 
 [Builtin Hooks Options]
 clang_format = --commit ${PREUPLOAD_COMMIT} --style file --extensions c,h,cc,cpp
+rustfmt = --config-path=rustfmt.toml
diff --git a/app/btitest/btitest.c b/app/btitest/btitest.c
index fde5e1f..f2e1c73 100644
--- a/app/btitest/btitest.c
+++ b/app/btitest/btitest.c
@@ -69,10 +69,11 @@
 
 TEST(btitest, supported) {
     if (!arch_bti_supported()) {
-        trusty_unittest_printf("[  SKIPPED ] BTI is not supported\n");
-        return;
+        trusty_unittest_printf("[   INFO   ] BTI is not supported\n");
+        GTEST_SKIP();
     }
     EXPECT_EQ(true, arch_bti_supported());
+test_abort:;
 }
 
 /* Smoke-test the callee functions; they should return 0 when called with BL */
@@ -126,8 +127,8 @@
 
 TEST(btitest, pacisp) {
     if (!arch_pac_address_supported()) {
-        trusty_unittest_printf("[  SKIPPED ] PAC is not supported\n");
-        return;
+        trusty_unittest_printf("[   INFO   ] PAC is not supported\n");
+        GTEST_SKIP();
     }
 
     /* PACIASP is a valid target for all branch types */
@@ -141,6 +142,7 @@
     EXPECT_EQ(0, btitest_br_x16(BTITEST_CALLEE_PACIBSP));
     EXPECT_EQ(0, btitest_br_x17(BTITEST_CALLEE_PACIBSP));
     EXPECT_EQ(0, btitest_blr(BTITEST_CALLEE_PACIBSP));
+test_abort:;
 }
 
 PORT_TEST(btitest, "com.android.kernel.btitest");
diff --git a/app/dpctest/rules.mk b/app/dpctest/rules.mk
index 71b24f7..b6b486e 100644
--- a/app/dpctest/rules.mk
+++ b/app/dpctest/rules.mk
@@ -6,7 +6,7 @@
 	WITH_DPC_TEST=1 \
 
 MODULE_DEPS += \
-	external/lk/lib/dpc \
+	$(LKROOT)/lib/dpc \
 	trusty/kernel/lib/unittest \
 
 MODULE_SRCS += \
diff --git a/app/list-ports/main.c b/app/list-ports/main.c
new file mode 100644
index 0000000..2183464
--- /dev/null
+++ b/app/list-ports/main.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023, Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <lib/trusty/ipc.h>
+#include <lib/unittest/unittest.h>
+#include <lk/init.h>
+#include <stdio.h>
+#include <string.h>
+
+TEST(list_ports, lists) {
+    struct ipc_port* port_list;
+    size_t len = ipc_get_port_list(&port_list);
+
+    trusty_unittest_printf("|%.64s|%.12s|%.12s|%.12s|%.12s|%.12s|\n", "NAME",
+                           "TA_CONNECT", "NS_CONNECT", "STATUS", "NB_BUFFER",
+                           "BUFFER_SZ");
+    for (size_t port_idx = 0; port_idx < len; ++port_idx) {
+        struct ipc_port* port = port_list + port_idx;
+
+        trusty_unittest_printf("|%.64s|", port->path);
+
+        if (port->flags & IPC_PORT_ALLOW_TA_CONNECT) {
+            trusty_unittest_printf("%.12s|", "TRUE");
+        } else {
+            trusty_unittest_printf("%.12s|", "FALSE");
+        }
+
+        if (port->flags & IPC_PORT_ALLOW_NS_CONNECT) {
+            trusty_unittest_printf("%.12s|", "TRUE");
+        } else {
+            trusty_unittest_printf("%.12s|", "FALSE");
+        }
+
+        /* Port State */
+        if (port->state == IPC_PORT_STATE_INVALID) {
+            trusty_unittest_printf("%.12s|", "INVALID");
+        } else if (port->state == IPC_PORT_STATE_LISTENING) {
+            trusty_unittest_printf("%.12s|", "LISTENING");
+        } else {
+            trusty_unittest_printf("%.12s|", "UNKNOWN");
+        }
+
+        trusty_unittest_printf("%.12u|", port->num_recv_bufs);
+        trusty_unittest_printf("%.12zu|", port->recv_buf_size);
+
+        trusty_unittest_printf("\n");
+    }
+    ipc_free_port_list(port_list);
+}
+
+PORT_TEST(list_ports, "com.android.kernel.list-ports");
diff --git a/app/list-ports/rules.mk b/app/list-ports/rules.mk
new file mode 100644
index 0000000..38345fc
--- /dev/null
+++ b/app/list-ports/rules.mk
@@ -0,0 +1,27 @@
+# Copyright (C) 2023 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_DIR := $(GET_LOCAL_DIR)
+
+MODULE := $(LOCAL_DIR)
+
+MODULE_DEPS += \
+	trusty/kernel/lib/unittest \
+
+MODULE_SRCS += \
+	$(LOCAL_DIR)/main.c \
+
+include make/module.mk
+
diff --git a/app/memorylatencybench/main.c b/app/memorylatencybench/main.c
index 937e629..7139896 100644
--- a/app/memorylatencybench/main.c
+++ b/app/memorylatencybench/main.c
@@ -22,42 +22,144 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <arch/defines.h>
 #include <trusty_benchmark.h>
 #include <uapi/err.h>
 
-#define BUF_SIZE 16384
+#define BLOCK_SIZE_BYTES (CACHE_LINE * 4)
+#define STRUCT_NPAD (BLOCK_SIZE_BYTES) / sizeof(uintptr_t)
+#define MAX_WORKING_SET_SZ 16777216
 
-typedef struct {
-    uint8_t buf[BUF_SIZE];
+static const uint64_t working_set_sizes[] = {
+        BLOCK_SIZE_BYTES,
+        512,
+        1024,
+        2048,
+        4096,
+        8192,
+        16384,
+        32768,
+        65536,
+        131072,
+        262144,
+        524288,
+        1048576,
+        2097152,
+        4194304,
+        8388608,
+        MAX_WORKING_SET_SZ,
+};
+
+typedef union memlatency_state_t {
+    union memlatency_state_t* next;
+    uintptr_t pad[STRUCT_NPAD];
 } memlatency_state_t;
 
-static memlatency_state_t* memlatency_state;
+static memlatency_state_t* memlatency_state_start;
+
+static size_t nb_blocks = MAX_WORKING_SET_SZ / sizeof(memlatency_state_t);
+
+static void get_param_name_cb_fixed(char* buf,
+                                    size_t buf_size,
+                                    size_t param_idx) {
+    snprintf(buf, buf_size,
+             "%" PRIu64 " Bytes working size in blocks of %zu Bytes",
+             working_set_sizes[param_idx], sizeof(memlatency_state_t));
+}
+
+static void get_formatted_value_cb(char* buf,
+                                   size_t buf_size,
+                                   int64_t value,
+                                   const char* metric_name) {
+    if (strcmp("time_micro_seconds", metric_name) == 0) {
+        int64_t mic_sec = value / 1000;
+        int64_t n_sec = value % 1000;
+
+        snprintf(buf, buf_size, "%" PRId64 ".%03" PRId64 "", mic_sec, n_sec);
+    } else {
+        snprintf(buf, buf_size, "%" PRId64, value);
+    }
+}
 
 BENCH_SETUP(memlatency) {
-    memlatency_state = calloc(1, sizeof(memlatency_state_t));
-    if (memlatency_state == NULL) {
+    trusty_bench_get_param_name_cb = &get_param_name_cb_fixed;
+    trusty_bench_get_formatted_value_cb = &get_formatted_value_cb;
+    memlatency_state_start =
+            memalign(CACHE_LINE, nb_blocks * sizeof(memlatency_state_t));
+
+    if (memlatency_state_start == NULL) {
         TLOGE("Failed to Allocate memory for memlatency_state!");
         return ERR_NO_MEMORY;
     }
 
+    memset((uint8_t*)memlatency_state_start, 0,
+           nb_blocks * sizeof(memlatency_state_t));
+
+    for (size_t idx = 0; idx < nb_blocks - 1; ++idx) {
+        memlatency_state_start[idx].next = &memlatency_state_start[idx + 1];
+    }
+
+    static_assert(sizeof(memlatency_state_t) == BLOCK_SIZE_BYTES);
+
     return NO_ERROR;
 }
 
 BENCH_TEARDOWN(memlatency) {
-    free(memlatency_state);
-    memlatency_state = NULL;
+    free(memlatency_state_start);
+    memlatency_state_start = NULL;
 }
 
-BENCH(memlatency, latency, 20) {
-    int rc = NO_ERROR;
+BENCH(memlatency, latency_read, 20, working_set_sizes) {
+    uint64_t sz = working_set_sizes[bench_get_param_idx()];
+    uint64_t nb_blocks = sz / BLOCK_SIZE_BYTES;
+    uint64_t loops = 10 * (MAX_WORKING_SET_SZ / sz);
 
-    ASSERT_EQ(NO_ERROR, rc);
+    ASSERT_GT(nb_blocks, 0);
 
+    while (loops > 0) {
+        --loops;
+        volatile union memlatency_state_t* block = memlatency_state_start;
+
+        for (size_t idx = 0; idx < nb_blocks; idx++) {
+            /* To make sure we are not overwriting next block Address */
+            static_assert(sizeof(uintptr_t) == __SIZEOF_POINTER__);
+            block = block->next;
+        }
+    }
+
+    return NO_ERROR;
 test_abort:
-    return rc;
+    return ERR_GENERIC;
 }
 
-BENCH_RESULT(memlatency, latency, time_micro_seconds) {
+BENCH(memlatency, latency_write, 20, working_set_sizes) {
+    uint64_t sz = working_set_sizes[bench_get_param_idx()];
+    uint64_t nb_blocks = sz / BLOCK_SIZE_BYTES;
+    uint64_t loops = 10 * (MAX_WORKING_SET_SZ / sz);
+
+    ASSERT_GT(nb_blocks, 0);
+
+    while (loops > 0) {
+        --loops;
+        union memlatency_state_t* block = memlatency_state_start;
+
+        for (size_t idx = 0; idx < nb_blocks; idx++) {
+            /* To make sure we are not overwriting next block Address */
+            static_assert(sizeof(uintptr_t) == __SIZEOF_POINTER__);
+            (block + idx)->pad[1] = idx + sz;
+        }
+    }
+
+    return NO_ERROR;
+test_abort:
+    return ERR_GENERIC;
+}
+
+BENCH_RESULT(memlatency, latency_read, time_micro_seconds) {
+    return bench_get_duration_ns();
+}
+
+BENCH_RESULT(memlatency, latency_write, time_micro_seconds) {
     return bench_get_duration_ns();
 }
 
diff --git a/app/mmutest/mmutest.c b/app/mmutest/mmutest.c
index 1636591..d379792 100644
--- a/app/mmutest/mmutest.c
+++ b/app/mmutest/mmutest.c
@@ -677,10 +677,11 @@
 
 TEST(mmutest, pan) {
     if (!mmutest_arch_pan_supported()) {
-        trusty_unittest_printf("[  SKIPPED ] PAN is not supported\n");
-        return;
+        trusty_unittest_printf("[   INFO   ] PAN is not supported\n");
+        GTEST_SKIP();
     }
     EXPECT_EQ(true, mmutest_arch_pan_enabled());
+test_abort:;
 }
 
 TEST(mmutest, store_kernel) {
@@ -778,6 +779,9 @@
      */
     ret = vmm_alloc(aspace, "ns_conflict_ns", PAGE_SIZE, &ptr_ns,
                     PAGE_SIZE_SHIFT + 2, 0, ARCH_MMU_FLAG_NS);
+    if (ret == ERR_NOT_SUPPORTED) {
+        GTEST_SKIP();
+    }
     EXPECT_EQ(NO_ERROR, ret);
 
     ret = arch_mmu_query(&aspace->arch_aspace, (vaddr_t)ptr_ns, NULL,
diff --git a/app/pacbench/pacbench.c b/app/pacbench/pacbench.c
index 0081a49..575118b 100644
--- a/app/pacbench/pacbench.c
+++ b/app/pacbench/pacbench.c
@@ -32,6 +32,11 @@
 /* Extended loop count for faster functions */
 #define EXTRA_LOOPS 10000000u
 
+#define PACKBENCH_STR_REP2(s) s s
+#define PACKBENCH_STR_REP4(s) PACKBENCH_STR_REP2(s) PACKBENCH_STR_REP2(s)
+#define PACKBENCH_STR_REP8(s) PACKBENCH_STR_REP4(s) PACKBENCH_STR_REP4(s)
+#define PACKBENCH_STR_REP16(s) PACKBENCH_STR_REP8(s) PACKBENCH_STR_REP8(s)
+
 BENCH_SETUP(pac) {
     return NO_ERROR;
 }
@@ -45,29 +50,21 @@
  * the instruction functional, though this benchmark does not test the
  * instruction - see pactest instead.
  */
-BENCH(pac, pacia, RUNS) {
+BENCH_ALL_CPU(pac, pacia, RUNS) {
     uint64_t val = 0;
 
     for (uint64_t i = 0; i < LOOPS; i++) {
-        __asm__ volatile(
-                ".arch_extension pauth\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                "PACIA %0, %1\n\tPACIA %0, %1\n\t"
-                : "+r"(val)
-                : "r"(i));
+        __asm__ volatile(".arch_extension pauth\n\t" PACKBENCH_STR_REP16(
+                                 "PACIA %0, %1\n\t")
+                         : "+r"(val)
+                         : "r"(i));
     }
 
     return NO_ERROR;
 }
 
-BENCH_RESULT(pac, pacia, ns_per_pacia) {
-    return bench_get_duration_ns() / (LOOPS * INSTRUCTIONS_PER_LOOP);
+BENCH_RESULT(pac, pacia, ps_per_pacia) {
+    return (bench_get_duration_ns() * 1000u) / (LOOPS * INSTRUCTIONS_PER_LOOP);
 }
 
 BENCH_RESULT(pac, pacia, us_total) {
@@ -85,29 +82,21 @@
  * Note we cannot test AUTIA alone since it may generate an exception if it
  * fails.
  */
-BENCH(pac, pacautia, RUNS) {
+BENCH_ALL_CPU(pac, pacautia, RUNS) {
     uint64_t val = 0;
 
     for (uint64_t i = 0; i < LOOPS; i++) {
-        __asm__ volatile(
-                ".arch_extension pauth\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                "PACIA %0, %1\n\tAUTIA %0, %1\n\t"
-                : "+r"(val)
-                : "r"(i));
+        __asm__ volatile(".arch_extension pauth\n\t" PACKBENCH_STR_REP16(
+                                 "PACIA %0, %1\n\tAUTIA %0, %1\n\t")
+                         : "+r"(val)
+                         : "r"(i));
     }
 
     return NO_ERROR;
 }
 
-BENCH_RESULT(pac, pacautia, ns_per_pacautia) {
-    return bench_get_duration_ns() / (LOOPS * INSTRUCTIONS_PER_LOOP);
+BENCH_RESULT(pac, pacautia, ps_per_pacautia) {
+    return (bench_get_duration_ns() * 1000u) / (LOOPS * INSTRUCTIONS_PER_LOOP);
 }
 
 BENCH_RESULT(pac, pacautia, us_total) {
@@ -124,29 +113,21 @@
  * the instruction functional, though this benchmark does not test the
  * instruction - see pactest instead.
  */
-BENCH(pac, pacib, RUNS) {
+BENCH_ALL_CPU(pac, pacib, RUNS) {
     uint64_t val = 0;
 
     for (uint64_t i = 0; i < LOOPS; i++) {
-        __asm__ volatile(
-                ".arch_extension pauth\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                "PACIB %0, %1\n\tPACIB %0, %1\n\t"
-                : "+r"(val)
-                : "r"(i));
+        __asm__ volatile(".arch_extension pauth\n\t" PACKBENCH_STR_REP16(
+                                 "PACIB %0, %1\n\t")
+                         : "+r"(val)
+                         : "r"(i));
     }
 
     return NO_ERROR;
 }
 
-BENCH_RESULT(pac, pacib, ns_per_pacib) {
-    return bench_get_duration_ns() / (LOOPS * INSTRUCTIONS_PER_LOOP);
+BENCH_RESULT(pac, pacib, ps_per_pacib) {
+    return (bench_get_duration_ns() * 1000u) / (LOOPS * INSTRUCTIONS_PER_LOOP);
 }
 
 BENCH_RESULT(pac, pacib, us_total) {
@@ -162,29 +143,21 @@
  * Even if PAC is supported by the hardware, Trusty doesn't use or enable this
  * key.
  */
-BENCH(pac, pacautib, RUNS) {
+BENCH_ALL_CPU(pac, pacautib, RUNS) {
     uint64_t val = 0;
 
     for (uint64_t i = 0; i < LOOPS; i++) {
-        __asm__ volatile(
-                ".arch_extension pauth\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                "PACIB %0, %1\n\tAUTIB %0, %1\n\t"
-                : "+r"(val)
-                : "r"(i));
+        __asm__ volatile(".arch_extension pauth\n\t" PACKBENCH_STR_REP16(
+                                 "PACIB %0, %1\n\tAUTIB %0, %1\n\t")
+                         : "+r"(val)
+                         : "r"(i));
     }
 
     return NO_ERROR;
 }
 
-BENCH_RESULT(pac, pacautib, ns_per_pacautib) {
-    return bench_get_duration_ns() / (LOOPS * INSTRUCTIONS_PER_LOOP);
+BENCH_RESULT(pac, pacautib, ps_per_pacautib) {
+    return (bench_get_duration_ns() * 1000u) / (LOOPS * INSTRUCTIONS_PER_LOOP);
 }
 
 BENCH_RESULT(pac, pacautib, us_total) {
@@ -199,28 +172,21 @@
 /*
  * Simple arithmetic instruction test.
  */
-BENCH(pac, add, RUNS) {
+BENCH_ALL_CPU(pac, add, RUNS) {
     uint64_t val = 0;
 
     for (uint64_t i = 0; i < EXTRA_LOOPS; i++) {
-        __asm__ volatile(
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                "ADD %0, %0, %1\n\tADD %0, %0, %1\n\t"
-                : "+r"(val)
-                : "r"(i));
+        __asm__ volatile(PACKBENCH_STR_REP16(PACBENCH_ADD_INSTR)
+                         : "+r"(val)
+                         : "r"(i));
     }
 
     return NO_ERROR;
 }
 
-BENCH_RESULT(pac, add, ns_per_add) {
-    return bench_get_duration_ns() / (EXTRA_LOOPS * INSTRUCTIONS_PER_LOOP);
+BENCH_RESULT(pac, add, ps_per_add) {
+    return (bench_get_duration_ns() * 1000u) /
+           (EXTRA_LOOPS * INSTRUCTIONS_PER_LOOP);
 }
 
 BENCH_RESULT(pac, add, us_total) {
@@ -234,24 +200,17 @@
 /*
  * NOP instruction test.
  */
-BENCH(pac, nop, RUNS) {
+BENCH_ALL_CPU(pac, nop, RUNS) {
     for (uint64_t i = 0; i < EXTRA_LOOPS; i++) {
-        __asm__ volatile(
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t"
-                "NOP\n\tNOP\n\t");
+        __asm__ volatile(PACKBENCH_STR_REP16(PACBENCH_NOP_INSTR));
     }
 
     return NO_ERROR;
 }
 
-BENCH_RESULT(pac, nop, ns_per_nop) {
-    return bench_get_duration_ns() / (EXTRA_LOOPS * INSTRUCTIONS_PER_LOOP);
+BENCH_RESULT(pac, nop, ps_per_nop) {
+    return (bench_get_duration_ns() * 1000u) /
+           (EXTRA_LOOPS * INSTRUCTIONS_PER_LOOP);
 }
 
 BENCH_RESULT(pac, nop, us_total) {
diff --git a/app/pacbench/pacbench_arm.h b/app/pacbench/pacbench_arm.h
new file mode 100644
index 0000000..0fdb3e0
--- /dev/null
+++ b/app/pacbench/pacbench_arm.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define PACBENCH_ADD_INSTR "ADD %0, %1\n\t"
+#define PACBENCH_NOP_INSTR "NOP\n\t"
diff --git a/app/pacbench/pacbench_arm64.h b/app/pacbench/pacbench_arm64.h
new file mode 100644
index 0000000..879a8bc
--- /dev/null
+++ b/app/pacbench/pacbench_arm64.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define PACBENCH_ADD_INSTR "ADD %0, %0, %1\n\t"
+#define PACBENCH_NOP_INSTR "NOP\n\t"
diff --git a/app/pacbench/pacbench_x86.h b/app/pacbench/pacbench_x86.h
new file mode 100644
index 0000000..60db7f0
--- /dev/null
+++ b/app/pacbench/pacbench_x86.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define PACBENCH_ADD_INSTR "ADD %1, %0\n\t"
+#define PACBENCH_NOP_INSTR "NOP\n\t"
diff --git a/app/pacbench/rules.mk b/app/pacbench/rules.mk
index 5bafadf..df78ce2 100644
--- a/app/pacbench/rules.mk
+++ b/app/pacbench/rules.mk
@@ -2,6 +2,8 @@
 
 MODULE := $(LOCAL_DIR)
 
+MODULE_CFLAGS += -include $(LOCAL_DIR)/pacbench_$(ARCH).h
+
 MODULE_DEPS += \
 	trusty/kernel/lib/unittest \
 
diff --git a/app/pactest/pactest.c b/app/pactest/pactest.c
index 2b1003e..92d541f 100644
--- a/app/pactest/pactest.c
+++ b/app/pactest/pactest.c
@@ -39,9 +39,78 @@
 #include <platform/random.h>
 #include <stdio.h>
 
-#define PACTEST_ADDRESS 0x1234567890abcdefu
+#define MASK(bits) ((1ull << (bits)) - 1)
+
+/*
+ * Test addresses for each translation table (TT).
+ * Address bit 55 is used to select between page translation tables to use.
+ * TT0 is used for user addresses, while TT1 is kernel addresses.
+ */
+#define PACTEST_TT0_ADDRESS \
+    (0x1234567890abcdefu & MASK(MMU_USER_SIZE_SHIFT)) & ~(1ull << 55)
+#define PACTEST_TT1_ADDRESS \
+    (0x1234567890abcdefu | (~0ull << MMU_KERNEL_SIZE_SHIFT)) | (1ull << 55)
 #define PACTEST_MODIFIER 0xfedcba0987654321u
 
+/* Helper function for parameterized calling of specific PAC instructions */
+static uint64_t pacxx(bool instr_not_data,
+                      bool key_a_not_b,
+                      uint64_t address,
+                      uint64_t modifier) {
+    if (key_a_not_b) {
+        if (instr_not_data) {
+            __asm__(".arch_extension pauth\n"
+                    "\tPACIA %0, %1"
+                    : "+r"(address)
+                    : "r"(modifier));
+        } else {
+            __asm__(".arch_extension pauth\n"
+                    "\tPACDA %0, %1"
+                    : "+r"(address)
+                    : "r"(modifier));
+        }
+    } else {
+        if (instr_not_data) {
+            __asm__(".arch_extension pauth\n"
+                    "\tPACIB %0, %1"
+                    : "+r"(address)
+                    : "r"(modifier));
+        } else {
+            __asm__(".arch_extension pauth\n"
+                    "\tPACDB %0, %1"
+                    : "+r"(address)
+                    : "r"(modifier));
+        }
+    }
+
+    return address;
+}
+
+/*
+ * Helper function for parameterized calling of specific PAC instructions.
+ * The instructions are implemented in assembly as they may generate exceptions
+ * (FEAT_FPAC) which need catching.
+ */
+static int autxx(bool instr_not_data,
+                 bool key_a_not_b,
+                 uint64_t address,
+                 uint64_t modifier,
+                 uint64_t* result) {
+    if (key_a_not_b) {
+        if (instr_not_data) {
+            return pactest_autia(address, modifier, result);
+        } else {
+            return pactest_autda(address, modifier, result);
+        }
+    } else {
+        if (instr_not_data) {
+            return pactest_autib(address, modifier, result);
+        } else {
+            return pactest_autdb(address, modifier, result);
+        }
+    }
+}
+
 static uint8_t get_nibble(uint64_t reg, uint8_t shift) {
     return (reg >> shift) & 0xf;
 }
@@ -65,8 +134,8 @@
 
 TEST(pactest, pauth_supported) {
     if (!arch_pac_address_supported()) {
-        trusty_unittest_printf("[  SKIPPED ] PAuth is not supported\n");
-        return;
+        trusty_unittest_printf("[   INFO   ] PAuth is not supported\n");
+        GTEST_SKIP();
     }
 
     const uint64_t isar1 = ARM64_READ_SYSREG(id_aa64isar1_el1);
@@ -100,6 +169,7 @@
     /* Log the support in case later trying to debug a test */
     trusty_unittest_printf("[   INFO   ] algo: %s\n", algo);
     trusty_unittest_printf("[   INFO   ] feat: %s%s\n", features, cpf);
+test_abort:;
 }
 
 TEST(pactest, fpac_supported) {
@@ -107,12 +177,13 @@
     int rc;
 
     if (!arch_pac_exception_supported()) {
-        trusty_unittest_printf("[  SKIPPED ] FPAC is not supported\n");
-        return;
+        trusty_unittest_printf("[   INFO   ] FPAC is not supported\n");
+        GTEST_SKIP();
     }
 
-    rc = pactest_autia(PACTEST_ADDRESS, PACTEST_MODIFIER, &val);
+    rc = pactest_autia(PACTEST_TT0_ADDRESS, PACTEST_MODIFIER, &val);
     EXPECT_EQ(rc, ERR_FAULT);
+test_abort:;
 }
 
 TEST(pactest, enabled) {
@@ -126,111 +197,56 @@
     EXPECT_EQ(sctlr_el1 & SCTLR_EL1_ENDB, 0);
 }
 
-TEST(pactest, instr_pacautia) {
-    const uint64_t sctlr_el1 = ARM64_READ_SYSREG(SCTLR_EL1);
-    uint64_t address = PACTEST_ADDRESS;
-    int rc;
-
-    if (arch_pac_address_supported() && (sctlr_el1 & SCTLR_EL1_ENIA) != 0) {
-        /* Test PACIA adds a PAC */
-        __asm__(".arch_extension pauth\n"
-                "\tPACIA %0, %1"
-                : "+r"(address)
-                : "r"(PACTEST_MODIFIER));
-        EXPECT_NE(PACTEST_ADDRESS, address);
-
-        uint64_t pac_address = address;
-
-        /* Check AUTIA returns the original pointer */
-        rc = pactest_autia(address, PACTEST_MODIFIER, &address);
-        EXPECT_EQ(rc, 0)
-        EXPECT_EQ(PACTEST_ADDRESS, address);
-
-        /* Check the pointer is invalidated  if the modifier is changed */
-        rc = pactest_autia(pac_address, ~PACTEST_MODIFIER, &address);
-        if (arch_pac_exception_supported()) {
-            EXPECT_EQ(rc, ERR_FAULT);
-        } else {
-            /* Address should have been invalidated */
-            EXPECT_EQ(rc, 0);
-            EXPECT_NE(address, PACTEST_ADDRESS);
-        }
-
-    } else {
-        /* Test PACIA does nothing */
-        __asm__(".arch_extension pauth\n"
-                "\tPACIA %0, %1"
-                : "+r"(address)
-                : "r"(PACTEST_MODIFIER));
-        EXPECT_EQ(address, PACTEST_ADDRESS);
-
-        /* Check AUTIA does nothing */
-        rc = pactest_autia(address, PACTEST_MODIFIER, &address);
-        EXPECT_EQ(rc, 0)
-        EXPECT_EQ(address, PACTEST_ADDRESS);
+TEST(pactest, keys) {
+    if (!arch_pac_address_supported()) {
+        GTEST_SKIP();
     }
-}
 
-/* This is exactly the same as the pacautia test, but for the b key */
+    const struct packeys* thread_keys = &get_current_thread()->arch.packeys;
+    const uint64_t keyi_lo = ARM64_READ_SYSREG(APIAKeyLo_EL1);
+    const uint64_t keyi_hi = ARM64_READ_SYSREG(APIAKeyHi_EL1);
 
-TEST(pactest, instr_pacautib) {
-    const uint64_t sctlr_el1 = ARM64_READ_SYSREG(SCTLR_EL1);
-    uint64_t address = PACTEST_ADDRESS;
-    int rc;
+    EXPECT_EQ(thread_keys->apia[0], keyi_lo);
+    EXPECT_EQ(thread_keys->apia[1], keyi_hi);
 
-    if (arch_pac_address_supported() && (sctlr_el1 & SCTLR_EL1_ENIB) != 0) {
-        /* Test PACIB adds a PAC */
-        __asm__(".arch_extension pauth\n"
-                "\tPACIB %0, %1"
-                : "+r"(address)
-                : "r"(PACTEST_MODIFIER));
-        EXPECT_NE(PACTEST_ADDRESS, address);
-
-        uint64_t pac_address = address;
-
-        /* Check AUTIB returns the original pointer */
-        rc = pactest_autib(address, PACTEST_MODIFIER, &address);
-        EXPECT_EQ(rc, 0)
-        EXPECT_EQ(PACTEST_ADDRESS, address);
-
-        /* Check the pointer is invalidated  if the modifier is changed */
-        rc = pactest_autib(pac_address, ~PACTEST_MODIFIER, &address);
-        if (arch_pac_exception_supported()) {
-            EXPECT_EQ(rc, ERR_FAULT);
-        } else {
-            /* Address should have been invalidated */
-            EXPECT_EQ(rc, 0);
-            EXPECT_NE(address, PACTEST_ADDRESS);
-        }
-
-    } else {
-        /* Test PACIB does nothing */
-        __asm__(".arch_extension pauth\n"
-                "\tPACIB %0, %1"
-                : "+r"(address)
-                : "r"(PACTEST_MODIFIER));
-        EXPECT_EQ(address, PACTEST_ADDRESS);
-
-        /* Check AUTIB does nothing */
-        rc = pactest_autib(address, PACTEST_MODIFIER, &address);
-        EXPECT_EQ(rc, 0)
-        EXPECT_EQ(address, PACTEST_ADDRESS);
-    }
+    /*
+     * Check the keys are neither all 0's of all 1's.
+     * While these values are valid, it may indicate incorrect initialisation.
+     */
+    EXPECT_NE(UINT64_MAX, keyi_lo);
+    EXPECT_NE(UINT64_MAX, keyi_hi);
+    EXPECT_NE(0, keyi_lo);
+    EXPECT_NE(0, keyi_hi);
+test_abort:;
 }
 
 typedef struct {
     bool translation_table;
-    bool address_not_data;
+    bool instr_not_data;
+    bool key_a_not_b;
+    bool key_enabled;
 } pactest_t;
 
 static void get_params(pactest_t* p) {
     const bool* const* params = GetParam();
     p->translation_table = *params[0];
-    p->address_not_data = *params[1];
+    /* Invert for more logical test ordering: AI, AD, BI, BD */
+    p->instr_not_data = !*params[1];
+    p->key_a_not_b = !*params[2];
 }
 
 TEST_F_SETUP(pactest) {
+    uint64_t key_enabled_bit;
+
     get_params(_state);
+
+    if (_state->instr_not_data) {
+        key_enabled_bit = _state->key_a_not_b ? SCTLR_EL1_ENIA : SCTLR_EL1_ENIB;
+    } else {
+        key_enabled_bit = _state->key_a_not_b ? SCTLR_EL1_ENDA : SCTLR_EL1_ENDB;
+    }
+
+    _state->key_enabled = ARM64_READ_SYSREG(SCTLR_EL1) & key_enabled_bit;
 }
 
 TEST_F_TEARDOWN(pactest) {}
@@ -241,27 +257,75 @@
     pactest_t p;
     get_params(&p);
 
-    snprintf(buf, buf_size, "TT%u/%s", p.translation_table ? 1 : 0,
-             p.address_not_data ? "PACA" : "PACD");
+    snprintf(buf, buf_size, "TT%u/%s%s", p.translation_table ? 1 : 0,
+             p.instr_not_data ? "PACI" : "PACD", p.key_a_not_b ? "A" : "B");
 }
 
-INSTANTIATE_TEST_SUITE_P(pac_length,
+INSTANTIATE_TEST_SUITE_P(pac,
                          pactest,
-                         testing_Combine(testing_Bool(), testing_Bool()),
+                         testing_Combine(testing_Bool(),
+                                         testing_Bool(),
+                                         testing_Bool()),
                          user_param_to_string);
 
+TEST_P(pactest, instr) {
+    if (!arch_pac_address_supported()) {
+        GTEST_SKIP();
+    }
+
+    const uint64_t test_address = _state->translation_table
+                                          ? PACTEST_TT1_ADDRESS
+                                          : PACTEST_TT0_ADDRESS;
+    uint64_t address = test_address;
+    int rc;
+
+    if (_state->key_enabled) {
+        /* Test instruction adds a PAC */
+        address = pacxx(_state->instr_not_data, _state->key_a_not_b, address,
+                        PACTEST_MODIFIER);
+
+        /* Address should have been modified to include PAC */
+        EXPECT_NE(test_address, address);
+
+        uint64_t pac_address = address;
+
+        /* Check AUT returns the original pointer */
+        rc = autxx(_state->instr_not_data, _state->key_a_not_b, address,
+                   PACTEST_MODIFIER, &address);
+
+        EXPECT_EQ(rc, 0)
+        EXPECT_EQ(test_address, address);
+
+        /* Check the pointer is invalidated when the modifier is changed */
+        rc = autxx(_state->instr_not_data, _state->key_a_not_b, pac_address,
+                   ~PACTEST_MODIFIER, &address);
+        if (arch_pac_exception_supported()) {
+            EXPECT_EQ(rc, ERR_FAULT);
+        } else {
+            /* Address should have been invalidated */
+            EXPECT_EQ(rc, 0);
+            EXPECT_NE(address, test_address);
+        }
+    } else { /* Key disabled */
+
+        address = pacxx(_state->instr_not_data, _state->key_a_not_b, address,
+                        PACTEST_MODIFIER);
+        EXPECT_EQ(test_address, address);
+
+        rc = autxx(_state->instr_not_data, _state->key_a_not_b, address,
+                   PACTEST_MODIFIER, &address);
+        EXPECT_EQ(rc, 0)
+        EXPECT_EQ(test_address, address);
+    }
+test_abort:;
+}
+
 TEST_P(pactest, pac_length) {
     if (!arch_pac_address_supported()) {
-        trusty_unittest_printf("[  SKIPPED ] PAuth is not supported\n");
-        return;
+        GTEST_SKIP();
     }
 
     uint8_t top = 0, bot = 64;
-    const uint64_t sctlr_el1 = ARM64_READ_SYSREG(SCTLR_EL1);
-
-    /* Enable all keys */
-    ARM64_WRITE_SYSREG(SCTLR_EL1, sctlr_el1 | SCTLR_EL1_ENIA | SCTLR_EL1_ENIB |
-                                          SCTLR_EL1_ENDA | SCTLR_EL1_ENDB);
 
     /*
      * Probe a number of times in order to ensure we find the top and bottom
@@ -283,16 +347,8 @@
             val |= 1ull << 55;
         }
 
-        /* Select instruction type */
-        if (_state->address_not_data) {
-            __asm__(".arch_extension pauth\n"
-                    "\tPACIZA %0"
-                    : "+r"(val));
-        } else {
-            __asm__(".arch_extension pauth\n"
-                    "\tPACDZA %0"
-                    : "+r"(val));
-        }
+        /* Call specific instruction variant */
+        val = pacxx(_state->instr_not_data, _state->key_a_not_b, val, 0);
 
         /* Remove un-changed bits and clear bit 55 */
         val ^= orig;
@@ -305,22 +361,27 @@
         }
     }
 
-    /* If this is not true, the PAC key not be functioning */
-    ASSERT_GT(top, bot);
+    if (_state->key_enabled) {
+        /* If this is not true, the PAC key not be functioning */
+        ASSERT_GT(top, bot);
 
-    /* Count bit range, except bit 55 if it is in the range */
-    int bits = (top + 1) - bot;
-    if (bot < 55 && top > 55) {
-        bits--;
+        /* Count bit range, except bit 55 if it is in the range */
+        int bits = (top + 1) - bot;
+        if (bot < 55 && top > 55) {
+            bits--;
+        }
+
+        trusty_unittest_printf("[   INFO   ] PAC bits %" PRIu8 ":%" PRIu8
+                               " = %d effective bits\n",
+                               top, bot, bits);
+    } else {
+        trusty_unittest_printf("[   INFO   ] PAC key disabled\n");
+
+        ASSERT_EQ(top, 0);
+        ASSERT_EQ(bot, 64);
     }
 
-    trusty_unittest_printf("[   INFO   ] PAC bits %" PRIu8 ":%" PRIu8
-                           " = %d effective bits\n",
-                           top, bot, bits);
-
-test_abort:
-    /* Restore enabled keys */
-    ARM64_WRITE_SYSREG(SCTLR_EL1, sctlr_el1);
+test_abort:;
 }
 
 PORT_TEST(pactest, "com.android.kernel.pactest");
diff --git a/app/pactest/pactest.h b/app/pactest/pactest.h
index 638e72b..c7377cc 100644
--- a/app/pactest/pactest.h
+++ b/app/pactest/pactest.h
@@ -26,3 +26,7 @@
 int pactest_autia(uint64_t address, uint64_t modifier, uint64_t* result);
 
 int pactest_autib(uint64_t address, uint64_t modifier, uint64_t* result);
+
+int pactest_autda(uint64_t address, uint64_t modifier, uint64_t* result);
+
+int pactest_autdb(uint64_t address, uint64_t modifier, uint64_t* result);
diff --git a/app/pactest/pactest_arm64.S b/app/pactest/pactest_arm64.S
index 215a91a..10c9b4c 100644
--- a/app/pactest/pactest_arm64.S
+++ b/app/pactest/pactest_arm64.S
@@ -62,6 +62,41 @@
     mov	x0, #0
     ret
 
+
+/**
+ * int pactest_autda(uint64_t address, uint64_t modifier, uint64_t* result)
+ *  - Function to test autda instruction (data A-key).
+ *
+ * This checks the passed address is authenticated with the modifier and DA key,
+ * and returns the pointer in *result if FEAT_FPAC is not implemented.
+ *
+ * Returns ERR_FAULT if the PAC instruction faults (FEAT_FPAC).
+ * Return 0 if the PAC check does not fault, in which case *result is updated.
+ */
+FUNCTION(pactest_autda)
+set_fault_handler .Lpactest_fault
+    autda x0, x1
+    str x0, [x2]
+    mov	x0, #0
+    ret
+
+/**
+ * int pactest_autdb(uint64_t address, uint64_t modifier, uint64_t* result)
+ *  - Function to test autdb instruction (data B-key).
+ *
+ * This checks the passed address is authenticated with the modifier and DB key,
+ * and returns the pointer in *result if FEAT_FPAC is not implemented.
+ *
+ * Returns ERR_FAULT if the PAC instruction faults (FEAT_FPAC).
+ * Return 0 if the PAC check does not fault, in which case *result is updated.
+ */
+FUNCTION(pactest_autdb)
+set_fault_handler .Lpactest_fault
+    autdb x0, x1
+    str x0, [x2]
+    mov	x0, #0
+    ret
+
 .Lpactest_fault:
     bti jc
     mov x0, #ERR_FAULT
diff --git a/app/pactest/pactest_stub.c b/app/pactest/pactest_stub.c
index 0976bdc..c810d47 100644
--- a/app/pactest/pactest_stub.c
+++ b/app/pactest/pactest_stub.c
@@ -31,9 +31,7 @@
 
 TEST(pactest, DISABLED_keys) {}
 
-TEST(pactest, DISABLED_instr_pacautia) {}
-
-TEST(pactest, DISABLED_instr_pacautib) {}
+TEST(pactest, DISABLED_instr) {}
 
 TEST(pactest, DISABLED_pac_length) {}
 
diff --git a/app/smptest/smptest.c b/app/smptest/smptest.c
index 9eb9fcf..7ea3fee 100644
--- a/app/smptest/smptest.c
+++ b/app/smptest/smptest.c
@@ -21,25 +21,45 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <kernel/mp.h>
 #include <kernel/thread.h>
 #include <lib/unittest/unittest.h>
 #include <lk/init.h>
 #include <stdbool.h>
 #include <stdio.h>
 
-#define SMPTEST_THREAD_COUNT (4)
+#define THREAD_DELAY_MS 1
 
-static thread_t* smptest_thread[SMPTEST_THREAD_COUNT];
-static int smptest_thread_unblock_count[SMPTEST_THREAD_COUNT];
-static int smptest_thread_done_count[SMPTEST_THREAD_COUNT];
+#define SMPTEST_CYCLES 16
 
-static int smptest(void* arg) {
-    uint i = (uintptr_t)arg;
+static struct smptest_thread {
+    thread_t* thread;
+
+    volatile bool started;
+    volatile uint unblock_count;
+    volatile uint error_count;
+    volatile uint done_count;
+
+} smptest_thread[SMP_MAX_CPUS];
+
+/* Check if a thread is blocked, using volatile to ensure re-read */
+static bool thread_is_blocked(volatile thread_t* thread) {
+    return thread->state == THREAD_BLOCKED;
+}
+
+static int smptest_thread_func(void* arg) {
+    const uint i = (uintptr_t)arg;
+    const uint expected_cpu = i;
+    struct smptest_thread* const smpt = &smptest_thread[i];
+
+    /* Note thread as started so main thread sees which CPUs are available */
+    smpt->started = true;
+
     uint cpu = arch_curr_cpu_num();
-
-    if (cpu != i) {
+    if (cpu != expected_cpu) {
         /* Warn if the thread starts on another CPU than it was pinned to */
         printf("%s: thread %d started on wrong cpu: %d\n", __func__, i, cpu);
+        smpt->error_count++;
     }
 
     while (true) {
@@ -48,135 +68,167 @@
         thread_block();
 
         cpu = arch_curr_cpu_num();
-        if (cpu != i) {
+        if (cpu != expected_cpu) {
             /* Don't update any state if the thread runs on the wrong CPU. */
             printf("%s: thread %d ran on wrong cpu: %d\n", __func__, i, cpu);
+            smpt->error_count++;
             continue;
         }
+
         /*
          * Update unblock count for this cpu so the main test thread can see
          * that it ran.
          */
-        smptest_thread_unblock_count[i]++;
+        smpt->unblock_count++;
         THREAD_UNLOCK(state1);
 
-        /* Sleep to simplify tracing and test CPU local timers */
-        thread_sleep(100);
+        /* Sleep to allow other threads to block */
+        thread_sleep(THREAD_DELAY_MS);
 
         THREAD_LOCK(state2);
-        if (i + 1 < SMPTEST_THREAD_COUNT) {
-            /* Wake up next CPU */
-            thread_unblock(smptest_thread[i + 1], false);
-        } else {
-            /* Print status from last CPU. */
-            printf("%s: %d %d\n", __func__, i, smptest_thread_unblock_count[i]);
+
+        /* Find and unblock the next started cpu */
+        for (uint next_cpu = i + 1; next_cpu < SMP_MAX_CPUS; next_cpu++) {
+            if (smptest_thread[next_cpu].started) {
+                thread_t* next = smptest_thread[next_cpu].thread;
+
+                /* Next CPU should be blocked; wake it up */
+                if (thread_is_blocked(next)) {
+                    thread_unblock(next, false);
+                } else {
+                    printf("%s: thread %d not blocked\n", __func__, i + 1);
+                    smpt->error_count++;
+                }
+
+                break;
+            }
         }
+
         /*
          * Update unblock count for this cpu so the main test thread can see
          * that it completed.
          */
-        smptest_thread_done_count[i]++;
+        smpt->done_count++;
         THREAD_UNLOCK(state2);
     }
     return 0;
 }
 
-static bool run_smp_test(struct unittest* test) {
-    int i;
-    int j;
+TEST(smptest, run) {
     bool wait_for_cpus = false;
 
-    for (i = 0; i < SMPTEST_THREAD_COUNT; i++) {
-        if (smptest_thread[i]->state != THREAD_BLOCKED) {
-            unittest_printf("smptest, thread %d not ready, wait\n", i);
+    for (uint i = 0; i < SMP_MAX_CPUS; i++) {
+        if (!thread_is_blocked(smptest_thread[i].thread)) {
+            unittest_printf("[   INFO   ] thread %d not ready\n", i);
             wait_for_cpus = true;
-            break;
         }
     }
-    if (wait_for_cpus) {
-        /*
-         * test-runner can start the test before all CPUs have finished booting.
-         * Wait another second for all the CPUs we need to be ready.
-         */
-        thread_sleep(1000);
-    }
-    for (i = 0; i < SMPTEST_THREAD_COUNT; i++) {
-        if (smptest_thread[i]->state != THREAD_BLOCKED) {
-            unittest_printf("smptest, thread %d not ready\n", i);
-            return false;
-        }
-    }
-    unittest_printf("smptest start\n");
-    for (i = 0; i < SMPTEST_THREAD_COUNT; i++) {
-        smptest_thread_unblock_count[i] = 0;
-        smptest_thread_done_count[i] = 0;
-    }
 
     /*
-     * Repeat the test at least once, in case the CPUs don't go back to the
-     * same state after the first wake-up
+     * test-runner can start the test before all CPUs have finished booting.
+     * Wait another second for all the CPUs we need to be ready if needed.
      */
-    for (j = 1; j <= 2; j++) {
+    if (wait_for_cpus) {
+        unittest_printf("[   INFO   ] waiting for threads to be ready\n");
+        thread_sleep(1000);
+    }
+
+    for (uint i = 0; i < SMP_MAX_CPUS; i++) {
+        ASSERT_EQ(!mp_is_cpu_active(i) ||
+                          thread_is_blocked(smptest_thread[i].thread),
+                  true, "thread %d not ready\n", i);
+    }
+
+    for (uint i = 0; i < SMP_MAX_CPUS; i++) {
+        smptest_thread[i].unblock_count = 0;
+        smptest_thread[i].error_count = 0;
+        smptest_thread[i].done_count = 0;
+    }
+
+    /*
+     * Repeat the test, in case the CPUs don't go back to the same state
+     * after the first wake-up
+     */
+    for (uint j = 1; j < SMPTEST_CYCLES; j++) {
         THREAD_LOCK(state);
         /*
-         * Wake up thread on CPU 0 to start a test run. It will wake up CPU 1,
-         * CPU 1 will wake up CPU 2 and CPU 2 will wake up CPU 3.
+         * Wake up thread on CPU 0 to start a test run. Each thread 'n' should
+         * wake-up thread 'n+1' until the last thread stops.
+         * Check thread is blocked before unblocking to avoid asserts.
          */
-        thread_unblock(smptest_thread[0], false);
+        if (thread_is_blocked(smptest_thread[0].thread)) {
+            thread_unblock(smptest_thread[0].thread, false);
+        }
+
         THREAD_UNLOCK(state);
 
-        /*
-         * Sleep 1 second to allow all CPUs to run. Each CPU sleeps 100 ms, so
-         * this leaves 600 ms of execution time.
-         */
-        thread_sleep(1000);
+        /* Sleep to allow all CPUs to run with some margin */
+        thread_sleep((THREAD_DELAY_MS + 5) * SMP_MAX_CPUS);
 
         /*
          * Check that every CPU-thread ran exactly once each time we woke up the
-         * thread on CPU 0.
+         * first thread.
          */
-        for (i = 0; i < SMPTEST_THREAD_COUNT; i++) {
-            int unblock_count = smptest_thread_unblock_count[i];
-            int done_count = smptest_thread_done_count[i];
-            if (unblock_count < j) {
-                unittest_printf("smptest cpu %d FAILED to run\n", i);
-                return false;
+        for (uint cpu = 0; cpu < SMP_MAX_CPUS; cpu++) {
+            const struct smptest_thread* const smpt = &smptest_thread[cpu];
+
+            /*
+             * Some cpus can still execute the thread body (e.g. if they are
+             * interrupted by some other jobs), let them time to finish
+             * (up to 1 sec, then think they got stuck).
+             */
+            for (int i = 0; i < 10; i++) {
+                if (smpt->unblock_count != j || smpt->done_count != j) {
+                    thread_sleep(100);
+                }
             }
-            if (done_count < j) {
-                unittest_printf("smptest cpu %d FAILED to complete\n", i);
-                return false;
+
+            const int unblock_count = smpt->unblock_count;
+            const int error_count = smpt->error_count;
+            const int done_count = smpt->done_count;
+
+            if (smpt->started) {
+                EXPECT_EQ(unblock_count, j, "cpu %d FAILED block count\n", cpu);
+                EXPECT_EQ(error_count, 0, "cpu %d FAILED error count\n", cpu);
+                EXPECT_EQ(done_count, j, "cpu %d FAILED done count\n", cpu);
+
+                if (j == SMPTEST_CYCLES - 1) {
+                    unittest_printf(
+                            "[   INFO   ] smptest cpu %d ran %d times\n", cpu,
+                            SMPTEST_CYCLES);
+                }
+            } else {
+                EXPECT_EQ(mp_is_cpu_active(cpu), false,
+                          "cpu %d active but not running", cpu);
+                EXPECT_EQ(unblock_count, 0, "cpu %d FAILED block count\n", cpu);
+                EXPECT_EQ(error_count, 0, "cpu %d FAILED error count\n", cpu);
+                EXPECT_EQ(done_count, 0, "cpu %d FAILED done count\n", cpu);
             }
-            if (unblock_count > j || done_count > j) {
-                unittest_printf("smptest cpu %d FAILED to block\n", i);
-                return false;
-            }
-            unittest_printf("smptest cpu %d ran\n", i);
         }
     }
-    return true;
+
+test_abort:;
 }
 
-static struct unittest smp_unittest = {
-        .port_name = "com.android.kernel.smp-unittest",
-        .run_test = run_smp_test,
-};
+static void smptest_setup(uint level) {
+    /* Create a thread for each possible CPU */
+    for (uint cpu = 0; cpu < SMP_MAX_CPUS; cpu++) {
+        struct smptest_thread* smpt = &smptest_thread[cpu];
+        char thread_name[32];
 
-static void smptest_init(uint level) {
-    int i;
-    char thread_name[32];
-
-    for (i = 0; i < SMPTEST_THREAD_COUNT; i++) {
-        snprintf(thread_name, sizeof(thread_name), "smptest-%d", i);
-        smptest_thread[i] =
-                thread_create(thread_name, smptest, (void*)(uintptr_t)i,
-                              HIGH_PRIORITY, DEFAULT_STACK_SIZE);
-        thread_set_pinned_cpu(smptest_thread[i], i);
-    }
-    for (i = 0; i < SMPTEST_THREAD_COUNT; i++) {
-        thread_resume(smptest_thread[i]);
+        snprintf(thread_name, sizeof(thread_name), "smptest-%u", cpu);
+        smpt->thread = thread_create(thread_name, smptest_thread_func,
+                                     (void*)(uintptr_t)cpu, HIGH_PRIORITY,
+                                     DEFAULT_STACK_SIZE);
+        thread_set_pinned_cpu(smpt->thread, cpu);
     }
 
-    unittest_add(&smp_unittest);
+    /* Allow threads to run */
+    for (uint cpu = 0; cpu < SMP_MAX_CPUS; cpu++) {
+        thread_resume(smptest_thread[cpu].thread);
+    }
 }
 
-LK_INIT_HOOK(smptest, smptest_init, LK_INIT_LEVEL_APPS);
+LK_INIT_HOOK(smptest_hook, smptest_setup, LK_INIT_LEVEL_APPS);
+
+PORT_TEST(smptest, "com.android.kernel.smp-unittest");
diff --git a/app/stdcalltest/stdcalltest.c b/app/stdcalltest/stdcalltest.c
index 97348a2..b9aae5a 100644
--- a/app/stdcalltest/stdcalltest.c
+++ b/app/stdcalltest/stdcalltest.c
@@ -21,6 +21,12 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+/*
+ * This module registers smc handlers that are called by tests running in the
+ * client os. This api is currently only available if lib/sm is enabled.
+ */
+#if WITH_LIB_SM
+
 #include <arch/arch_ops.h>
 #include <arch/ops.h>
 #include <err.h>
@@ -196,3 +202,5 @@
     }
 }
 LK_INIT_HOOK(stdcalltest, stdcalltest_init, LK_INIT_LEVEL_APPS);
+
+#endif
diff --git a/app/trusty/user-tasks.mk b/app/trusty/user-tasks.mk
index ab42f35..4adb923 100644
--- a/app/trusty/user-tasks.mk
+++ b/app/trusty/user-tasks.mk
@@ -24,6 +24,7 @@
 #
 # Input variables
 #
+#   ALLHOSTMODULES             - list of all host modules (e.g. Rust proc-macros) to be built
 #   TRUSTY_BUILTIN_USER_TASKS  - list of compiled from source user tasks to be included into final image
 #   TRUSTY_PREBUILT_USER_TASKS - list of precompiled user tasks to be included into final image
 #   	These prebuilt task modules must include a manifest binary and app elf binary, e.g.:
@@ -42,7 +43,7 @@
 #                               still allow overriding via environment variable.
 #
 
-$(info Include Trusty user tasks support)
+$(call INFO_LOG,Include Trusty user tasks support)
 
 TRUSTY_APP_DIR := $(GET_LOCAL_DIR)
 
@@ -89,7 +90,15 @@
 TRUSTY_SDK_LICENSE_DIR := $(TRUSTY_SDK_DIR)/licenses
 TRUSTY_SDK_LICENSE := $(TRUSTY_SDK_DIR)/LICENSE
 TRUSTY_LIBRARY_BUILDDIR := $(BUILDDIR)/lib
-TRUSTY_HOST_LIBRARY_BUILDDIR := $(BUILDDIR)/host_lib
+
+# Host modules required by userspace are built in passing as needed, as the
+# userspace build system respects MODULE_RUST_HOST_LIB. But the kernel may also
+# need some host modules, e.g. for proc-macro crates. So we explicitly iterate
+# over the set of host modules required by the kernel, generating their build
+# rules with the userspace build system.
+$(foreach lib,$(ALLHOSTMODULES),\
+	$(eval $(call trusty-build-rule,$(lib))))
+
 
 # The license file construction assumes that all projects will contain the same
 # set of SDK modules and thus the same set of respective license files. If this
@@ -115,6 +124,7 @@
 	trusty/user/base/lib/libc-trusty \
 	trusty/user/base/lib/libcxxabi-trusty \
 	trusty/user/base/lib/libstdc++-trusty \
+	trusty/user/base/lib/line-coverage \
 	trusty/user/base/lib/rng \
 	trusty/user/base/lib/spi/client \
 	trusty/user/base/lib/spi/common \
@@ -145,7 +155,6 @@
 $(patsubst lib%,%,$(notdir $(1)))
 endef
 
-GLOBAL_HOST_RUSTFLAGS += -L $(RUST_HOST_LIBDIR) -L dependency=$(TRUSTY_HOST_LIBRARY_BUILDDIR)
 GLOBAL_USER_RUSTFLAGS += -L dependency=$(TRUSTY_LIBRARY_BUILDDIR)
 
 # We need the host library dir to pick up recursive dependencies that are proc
@@ -191,15 +200,23 @@
 
 # Rust crate tests have a -test suffix to their module name to distinguish from
 # the crate itself with the same path.
+ifeq ($(call TOBOOL,$(ARCH_$(ARCH)_SUPPORTS_RUST)),true)
 RUST_USER_TEST_MODULES := $(addsuffix -test,$(TRUSTY_RUST_USER_TESTS))
+endif
 
+ifneq (true,$(call TOBOOL,$(UNITTEST_COVERAGE_ENABLED)))
 # Default to including all user tests in the image if the set of builtin tests
 # is not selected.
 TRUSTY_BUILTIN_USER_TESTS ?= $(TRUSTY_USER_TESTS) $(RUST_USER_TEST_MODULES)
 TRUSTY_BUILTIN_USER_TASKS += $(TRUSTY_BUILTIN_USER_TESTS)
+endif
+
+# remove duplicates
+TRUSTY_BUILTIN_USER_TASKS := $(sort $(TRUSTY_BUILTIN_USER_TASKS))
 
 ALL_USER_TASKS := $(TRUSTY_BUILTIN_USER_TASKS) $(TRUSTY_LOADABLE_USER_TASKS)  \
 		  $(TRUSTY_USER_TESTS) $(TRUSTY_LOADABLE_USER_TESTS) $(TRUSTY_RUST_USER_TESTS)
+
 # sort and remove duplicates
 ALL_USER_TASKS := $(sort $(ALL_USER_TASKS))
 
@@ -213,6 +230,9 @@
 # should not matter for this library. (The main difference is which version of
 # libcxx they link against, and the builtins do not use C++.)
 TRUSTY_APP_LIBGCC := $(CLANG_BINDIR)/../runtimes_ndk_cxx/libclang_rt.builtins-$(STANDARD_ARCH_NAME)-android.a
+ifeq (true,$(call TOBOOL,$(UNITTEST_COVERAGE_ENABLED)))
+TRUSTY_APP_LIBCOV := -u__llvm_profile_runtime $(CLANG_BINDIR)/../runtimes_ndk_cxx/libclang_rt.profile-aarch64-android.a
+endif
 
 TRUSTY_APP_BASE_LDFLAGS := $(GLOBAL_SHARED_LDFLAGS) -z max-page-size=4096 -z separate-loadable-segments
 TRUSTY_APP_ALIGNMENT := 4096
@@ -238,6 +258,8 @@
 # Rules are the first time a library is required, so libraries may already be
 # processed before we get to them in the list of SDK libraries.
 #
+$(call INFO_LOG,Generate build rules for SDK libraries)
+
 $(foreach lib,$(TRUSTY_SDK_MODULES),\
 	$(if $(_MODULES_$(lib)),,$(eval $(call trusty-build-rule,$(lib)))))
 
@@ -260,6 +282,8 @@
 #
 # Generate build rules for each user task
 #
+$(call INFO_LOG,Generate build rules for user tasks)
+
 $(foreach t,$(ALL_USER_TASKS),\
    $(eval $(call trusty-build-rule,$(t))))
 
@@ -284,14 +308,24 @@
 TRUSTY_BUILTIN_USER_TASKS += $(TRUSTY_PREBUILT_USER_TASKS)
 
 # Build the SDK makefile
+$(call INFO_LOG,Build SDK makefile)
 $(eval $(call trusty-build-rule,trusty/user/base/sdk))
 
 # Ensure that includes and libs are installed
 all:: $(ALL_SDK_INCLUDES) $(ALL_SDK_LIBS) $(ALL_SDK_EXTRA_FILES) $(TRUSTY_SDK_LICENSE)
 
+ifeq (false,$(call TOBOOL,$(TRUSTY_APPLOADER_ENABLED)))
+
+ifneq ($(strip $(TRUSTY_LOADABLE_USER_TASKS) $(TRUSTY_LOADABLE_USER_TESTS)),)
+$(error Loadable apps ($(TRUSTY_LOADABLE_USER_TASKS) $(TRUSTY_LOADABLE_USER_TESTS)) requested but this build does not include the apploader service)
+endif
+
+else # TRUSTY_APPLOADER_ENABLED
+
 #
 # Generate loadable application packages
 #
+$(call INFO_LOG,Generate loadable application packages)
 define loadable-app-build-rule
 $(eval APP_NAME := $(notdir $(1)))\
 $(eval APP_TOP_MODULE := $(1))\
@@ -305,6 +339,7 @@
 #
 # Generate build rules for each application
 #
+$(call INFO_LOG,Generate loadable apps build rules)
 $(foreach t,$(TRUSTY_LOADABLE_USER_TASKS),\
    $(call loadable-app-build-rule,$(t)))
 
@@ -321,6 +356,7 @@
 #
 # Generate build rules for test application
 #
+$(call INFO_LOG,Generate test apps build rules)
 $(foreach t,$(TRUSTY_USER_TESTS),\
    $(call loadable-app-build-rule,$(t)))
 
@@ -329,20 +365,24 @@
 
 ifneq ($(strip $(TRUSTY_LOADABLE_TEST_APPS)),)
 
-TEST_PACKAGE_ZIP := $(BUILDDIR)/trusty_test_package.zip
+TEST_PACKAGE_ZIP ?= $(BUILDDIR)/trusty_test_package.zip
+TEST_PACKAGE_ZIP_ARGS ?= -q -u -r
 
 $(TEST_PACKAGE_ZIP): BUILDDIR := $(BUILDDIR)
+$(TEST_PACKAGE_ZIP_ARGS): TEST_PACKAGE_ZIP_ARGS := $(TEST_PACKAGE_ZIP_ARGS)
 $(TEST_PACKAGE_ZIP): $(TRUSTY_LOADABLE_TEST_APPS)
 	@$(MKDIR)
-	@echo Creating Trusty test archive package
-	@echo "$^"
+	@$(call ECHO,KERNEL,creating Trusty test archive package,$^)
 	$(NOECHO)rm -f $@
-	$(NOECHO)(cd $(BUILDDIR) && zip -q -u -r $@ $(subst $(BUILDDIR)/,,$^))
+	$(NOECHO)(cd $(BUILDDIR) && zip $(TEST_PACKAGE_ZIP_ARGS)  $@ $(subst $(BUILDDIR)/,,$^))
+	@$(call ECHO_DONE_SILENT,KERNEL,creating Trusty test archive package,$^)
 
 EXTRA_BUILDDEPS += $(TEST_PACKAGE_ZIP)
 
 endif
 
+endif # TRUSTY_APPLOADER_ENABLED
+
 
 #
 # Build a rust-project.json for rust-analyzer
@@ -372,20 +412,6 @@
 ALLOW_FP_USE := $(TRUSTY_KERNEL_SAVED_ALLOW_FP_USE)
 SCS_ENABLED := $(TRUSTY_KERNEL_SAVED_SCS_ENABLED)
 
-
-#
-# Check for duplicate tasks
-#
-$(foreach _task,$(TRUSTY_BUILTIN_USER_TASKS),\
-  $(eval DUPLICATE_TASKS += $$(filter $$(TASKS_SEEN),$$(_task)))\
-  $(eval TASKS_SEEN += $$(_task))\
-)
-
-ifneq ($(strip $(DUPLICATE_TASKS)),)
-$(error Duplicate TRUSTY_BUILTIN_USER_TASKS: $(DUPLICATE_TASKS))
-endif
-
-
 #
 # Generate combined user task obj/bin if necessary
 #
@@ -403,10 +429,12 @@
 $(BUILTIN_TASK_OBJS): GLOBAL_COMPILEFLAGS := $(GLOBAL_COMPILEFLAGS)
 $(BUILTIN_TASK_OBJS): ARCH_COMPILEFLAGS := $(ARCH_$(ARCH)_COMPILEFLAGS)
 $(BUILTIN_TASK_OBJS): USER_TASK_OBJ_ASM:=$(TRUSTY_APP_DIR)/appobj.S
+$(BUILTIN_TASK_OBJS): LOG_NAME:=$(TRUSTY_APP_DIR)
 $(BUILTIN_TASK_OBJS): %.o: %.elf %.manifest $(USER_TASK_OBJ_ASM)
 	@$(MKDIR)
-	@echo converting $< to $@
+	@$(call ECHO,$(LOG_NAME),converting,$< to $@)
 	$(NOECHO)$(CC) -DUSER_TASK_ELF=\"$<\" -DMANIFEST_DATA=\"$(word 2,$^)\" $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) -c $(USER_TASK_OBJ_ASM) -o $@
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),converting,$< to $@)
 
 EXTRA_OBJS += $(BUILTIN_TASK_OBJS)
 
@@ -414,8 +442,6 @@
 
 # Reset app variables
 BUILDDIR := $(TRUSTY_TOP_LEVEL_BUILDDIR)
-TASKS_SEEN :=
-DUPLICATE_TASKS :=
 TRUSTY_APP :=
 TRUSTY_APP_NAME :=
 TRUSTY_APP_BASE_LDFLAGS :=
@@ -423,6 +449,7 @@
 TRUSTY_APP_ALIGNMENT :=
 TRUSTY_APP_MEMBASE :=
 TRUSTY_APP_SYMTAB_ENABLED :=
+TRUSTY_APPLOADER_ENABLED :=
 TRUSTY_TOP_LEVEL_BUILDDIR :=
 TRUSTY_USERSPACE :=
 TRUSTY_USERSPACE_SAVED_ARCH :=
diff --git a/build-config-kerneltests b/build-config-kerneltests
index 6ff1621..832fdd6 100644
--- a/build-config-kerneltests
+++ b/build-config-kerneltests
@@ -15,19 +15,20 @@
 # This file lists kernel tests
 
 [
-    include("external/lk/build-config-kerneltests"),
+    include("external/lk/build-config-kerneltests", optional=True),
+    include("external/trusty/lk/build-config-kerneltests", optional=True),
     porttest("com.android.kernel.btitest"),
     porttest("com.android.kernel.cachetest"),
     porttest("com.android.kernel.console-unittest"),
-    porttest("com.android.kernel.device_tree.test"),
     porttest("com.android.kernel.dpc-unittest"),
     porttest("com.android.kernel.iovectest"),
     porttest("com.android.kernel.ktipc.test"),
     porttest("com.android.kernel.libctest"),
     porttest("com.android.kernel.libcxxtest"),
+    porttest("com.android.kernel.memorylatency.bench").type(BENCHMARK),
     porttest("com.android.kernel.memorytest"),
     porttest("com.android.kernel.mmutest"),
-    porttest("com.android.kernel.pacbench"),
+    porttest("com.android.kernel.pacbench").type(BENCHMARK),
     porttest("com.android.kernel.pactest"),
     porttest("com.android.kernel.smp-unittest").needs(smp4=True),
     porttest("com.android.kernel.uirq-unittest"),
diff --git a/include/shared/lk/trusty_bench_common.h b/include/shared/lk/trusty_bench_common.h
index 82a9296..a7cadb9 100644
--- a/include/shared/lk/trusty_bench_common.h
+++ b/include/shared/lk/trusty_bench_common.h
@@ -22,8 +22,11 @@
  */
 
 #pragma once
-#include <lib/unittest/unittest.h>
+#include <ctype.h>
 #include <stdio.h>
+
+#include <lib/unittest/unittest.h>
+
 #include "trusty_bench_option_cb.h"
 
 /**
@@ -75,6 +78,11 @@
  * @bench_result:       Function pointer holding the BENCH_RESULT body
  *                      Used to get the value to be aggregate for this metric
  *                      after each BENCH body run.
+ * @formatted_value_cb: A callback of
+ *                      trusty_bench_get_formatted_value_callback_t type for
+ *                      formatting the result value to a string
+ * @param_name_cb:      A callback of trusty_bench_get_param_name_callback_t
+ *                      type for formatting the param name
  */
 struct bench_metric_list_node {
     struct list_node node;
@@ -83,6 +91,8 @@
     size_t param_idx;
     size_t col_sz;
     int64_t (*bench_result)(void);
+    trusty_bench_get_formatted_value_callback_t formatted_value_cb;
+    trusty_bench_get_param_name_callback_t param_name_cb;
 };
 
 /*
@@ -98,6 +108,71 @@
 static size_t trusty_bench_table_total_width;
 
 /**
+ * trusty_bench_validate_numeric - Utility function to parse Google F1 SQL valid
+ * values except NaN and +/-inf
+ *
+ * @s: string to parse
+ *
+ * Return: true if s is a valid double
+ */
+static inline bool trusty_bench_validate_numeric(const char* s) {
+    bool found_dot = false;
+    /* ignore initital spaces */
+    while (*s != '\0' && (*s == ' ' || *s == '\t')) {
+        ++s;
+    }
+
+    /* can optionally start with sign */
+    if (*s == '+' || *s == '-') {
+        ++s;
+    }
+
+    /* Then digits and one optional dot */
+    while (*s != '\0' && *s != ' ' && *s != '\t') {
+        switch (*s) {
+        case '.':
+            if (found_dot) {
+                return false;
+            }
+            found_dot = true;
+            break;
+        case 'E':
+        case 'e':
+            found_dot = true;  // dot are not allowed anymore
+
+            // Start Exponent. Ignore Sign.
+            ++s;
+            if (*s == '+' || *s == '-') {
+                ++s;
+            }
+            // Make sure there is an exponent after the E+/-.
+            // Let the loop do the increment
+            if (!isdigit(*s)) {
+                return false;
+            }
+            break;
+        default:
+            // Note: Leading 0 are accepted by SQL SAFE_CAST parsing functions
+            if (!isdigit(*s)) {
+                return false;
+            }
+            break;
+        }
+        ++s;
+    }
+
+    /* ignore trailing spaces */
+    while (*s != '\0' && (*s == ' ' || *s == '\t')) {
+        ++s;
+    }
+    if (*s == '\0') {
+        return true;
+    }
+
+    return false;
+}
+
+/**
  * trusty_bench_sprint_col_stat -     print the value of one statistical
  * aggregate in a formatted column
  * @buffer:             Buffer in which to write the results. Preallocated.
@@ -115,5 +190,17 @@
     } else {
         trusty_bench_get_formatted_value_cb(buffer, buffer_len, val,
                                             metric_name);
+        EXPECT_EQ(trusty_bench_validate_numeric(buffer), true,
+                  "%s is not a valid double representation.\n", buffer);
     }
 }
+
+/* Number of CPU on which to bench */
+static uint8_t trusty_bench_nb_cpu = 1;
+static size_t trusty_cur_bench_nb_params = 1;
+static size_t trusty_bench_multi_cpu_param_idx(size_t param_idx) {
+    return param_idx % trusty_cur_bench_nb_params;
+}
+static size_t trusty_bench_cpu_idx(size_t param_idx) {
+    return param_idx / trusty_cur_bench_nb_params;
+}
diff --git a/include/shared/lk/trusty_bench_json_print.h b/include/shared/lk/trusty_bench_json_print.h
index f8d6825..d3d4f1d 100644
--- a/include/shared/lk/trusty_bench_json_print.h
+++ b/include/shared/lk/trusty_bench_json_print.h
@@ -32,7 +32,7 @@
  * schema validation. Schema is available in the same folder as this file
  * "trusty_bench_json_schema.vXXX.json"
  */
-#define BENCH_SCHEMA_VERSION "2"
+#define BENCH_SCHEMA_VERSION "3"
 
 /**
  * trusty_bench_print_json_metric_list -  Prints a machine readable json of all
@@ -65,9 +65,12 @@
         first_iter = false;
         trusty_unittest_printf("{");
         trusty_unittest_printf("\"metric_name\": \"%s\", ", entry->name);
-        if (nb_params > 1) {
+        if (nb_params > 1 || trusty_bench_nb_cpu > 1) {
             trusty_unittest_printf("\"param_id\": %zu, ", entry->param_idx);
-            if (trusty_bench_get_param_name_cb) {
+            if (entry->param_name_cb) {
+                entry->param_name_cb(buf, sizeof(buf), entry->param_idx);
+                trusty_unittest_printf("\"param_name\": \"%s\", ", buf);
+            } else if (trusty_bench_get_param_name_cb) {
                 trusty_bench_get_param_name_cb(buf, sizeof(buf),
                                                entry->param_idx);
                 trusty_unittest_printf("\"param_name\": \"%s\", ", buf);
diff --git a/include/shared/lk/trusty_bench_json_schema.v2.json b/include/shared/lk/trusty_bench_json_schema.v3.json
similarity index 96%
rename from include/shared/lk/trusty_bench_json_schema.v2.json
rename to include/shared/lk/trusty_bench_json_schema.v3.json
index bbfd063..13b3f03 100644
--- a/include/shared/lk/trusty_bench_json_schema.v2.json
+++ b/include/shared/lk/trusty_bench_json_schema.v3.json
@@ -15,8 +15,8 @@
         },
         "schema_version": {
             "type": "number",
-            "minimum": 2,
-            "maximum": 2
+            "minimum": 3,
+            "maximum": 3
         },
         "results": {
             "type": "array",
@@ -29,6 +29,7 @@
     },
     "required": [
         "suite_name",
+        "bench_name",
         "results",
         "schema_version"
     ],
diff --git a/include/shared/lk/trusty_bench_print_tables.h b/include/shared/lk/trusty_bench_print_tables.h
index f348ddd..1eb76e9 100644
--- a/include/shared/lk/trusty_bench_print_tables.h
+++ b/include/shared/lk/trusty_bench_print_tables.h
@@ -29,10 +29,12 @@
 static size_t trusty_bench_max_column_width;
 
 /* Max Width ever needed for a metric cell in the table */
-static size_t trusty_bench_max_metric_name_width;
+static size_t trusty_bench_max_metric_name_width =
+        6 /* strlen("Metric") is the minimum needed*/;
 
 /* Max Width ever needed for a Param cell in the table */
-static size_t trusty_bench_max_param_name_width;
+static size_t trusty_bench_max_param_name_width =
+        5 /* strlen("Param") is the minimum needed*/;
 
 /* Max Width ever needed for a Metric Value cell in the table */
 static size_t trusty_bench_max_metric_digit_width;
@@ -97,7 +99,11 @@
                                             const char* param) {
     char buffer[64];
 
-    snprintf(buffer, sizeof(buffer), "RUNNING %s_%s_%s", suite, bench, param);
+    int nb_chars =
+            snprintf(buffer, sizeof(buffer), "RUNNING %s_%s", suite, bench);
+    if (*param != '\0' && nb_chars < (int)sizeof(buffer)) {
+        snprintf(buffer + nb_chars, sizeof(buffer) - nb_chars, "_%s", param);
+    }
     trusty_bench_print_border(BENCH_TITLE_WIDTH);
     trusty_bench_center_print(BENCH_TITLE_WIDTH - 1, buffer);
     trusty_unittest_printf("|\n");
@@ -173,8 +179,10 @@
         /* First must be bigger than the size of the param header if any */
         size_t column_width = 0;
 
-        if (nb_params > 1) {
-            if (trusty_bench_get_param_name_cb) {
+        if (nb_params > 1 || trusty_bench_nb_cpu > 1) {
+            if (entry->param_name_cb) {
+                entry->param_name_cb(buf, sizeof(buf), entry->param_idx);
+            } else if (trusty_bench_get_param_name_cb) {
                 trusty_bench_get_param_name_cb(buf, sizeof(buf),
                                                entry->param_idx);
             } else {
@@ -244,7 +252,9 @@
                          node) {
         char buf[BENCH_MAX_COL_SIZE];
 
-        if (trusty_bench_get_param_name_cb) {
+        if (entry->param_name_cb) {
+            entry->param_name_cb(buf, sizeof(buf), entry->param_idx);
+        } else if (trusty_bench_get_param_name_cb) {
             trusty_bench_get_param_name_cb(buf, sizeof(buf), entry->param_idx);
         } else {
             snprintf(buf, sizeof(buf), "%zu", entry->param_idx);
@@ -261,17 +271,25 @@
  * @val:                Value to print
  * @metric_name:        Metric for which the aggregate stat is to be printed.
  */
-static inline void trusty_bench_print_col_stat(size_t sz,
-                                               int64_t val,
-                                               const char* metric_name) {
-    if (trusty_bench_get_formatted_value_cb == NULL) {
+static inline void trusty_bench_print_col_stat(
+        size_t sz,
+        int64_t val,
+        const char* metric_name,
+        trusty_bench_get_formatted_value_callback_t value_format_cb) {
+    if (value_format_cb == NULL) {
+        value_format_cb = trusty_bench_get_formatted_value_cb;
+    }
+    if (value_format_cb == NULL) {
         trusty_unittest_printf("%*" PRId64 "|", (int)sz, val);
     } else {
         char buffer[32];
 
-        trusty_bench_get_formatted_value_cb(buffer, sizeof(buffer), val,
-                                            metric_name);
-        trusty_unittest_printf("%*s|", (int)sz, buffer);
+        value_format_cb(buffer, sizeof(buffer), val, metric_name);
+        if (trusty_bench_validate_numeric(buffer)) {
+            trusty_unittest_printf("%*s|", (int)sz, buffer);
+        } else {
+            trusty_unittest_printf("%*s|", (int)sz, "");
+        }
     }
 }
 
@@ -292,12 +310,16 @@
                                   true);
     list_for_every_entry(metric_list, entry, struct bench_metric_list_node,
                          node) {
-        if (idx == BENCH_AGGREGATE_COLD) {
+        if (entry->metric.cnt == 0) {
+            trusty_bench_center_print(trusty_bench_max_metric_digit_width, "-");
+            trusty_unittest_printf("|");
+        } else if (idx == BENCH_AGGREGATE_COLD) {
             trusty_bench_print_col_stat(entry->col_sz, entry->metric.cold,
-                                        entry->name);
+                                        entry->name, entry->formatted_value_cb);
         } else {
-            trusty_bench_print_col_stat(
-                    entry->col_sz, entry->metric.aggregates[idx], entry->name);
+            trusty_bench_print_col_stat(entry->col_sz,
+                                        entry->metric.aggregates[idx],
+                                        entry->name, entry->formatted_value_cb);
         }
     }
     trusty_unittest_printf("\n");
@@ -320,12 +342,13 @@
     trusty_bench_print_border(trusty_bench_table_total_width);
     trusty_bench_print_header(metric_list);
     trusty_bench_print_border(trusty_bench_table_total_width);
-    if (nb_params > 1) {
+    if (nb_params > 1 || trusty_bench_nb_cpu > 1) {
         trusty_bench_print_params(metric_list);
         trusty_bench_print_border(trusty_bench_table_total_width);
     }
-    trusty_bench_print_stat(metric_list, BENCH_AGGREGATE_AVG, "avg");
+
     trusty_bench_print_stat(metric_list, BENCH_AGGREGATE_MIN, "min");
+    trusty_bench_print_stat(metric_list, BENCH_AGGREGATE_AVG, "avg");
     trusty_bench_print_stat(metric_list, BENCH_AGGREGATE_MAX, "max");
     trusty_bench_print_stat(metric_list, BENCH_AGGREGATE_COLD, "cold");
 
@@ -352,23 +375,23 @@
                    4 * trusty_bench_max_metric_digit_width + 6;
 
     /* Need one column for params? */
-    if (nb_params > 1) {
+    if (nb_params > 1 || trusty_bench_nb_cpu > 1) {
         width += trusty_bench_max_param_name_width + 1;
     }
     trusty_bench_print_border(width);
     trusty_unittest_printf("|");
     trusty_bench_print_col_header(trusty_bench_max_metric_name_width, "Metric",
                                   false);
-    if (nb_params > 1) {
+    if (nb_params > 1 || trusty_bench_nb_cpu > 1) {
         trusty_bench_print_col_header(trusty_bench_max_param_name_width,
                                       "Param", false);
     }
     trusty_bench_print_col_header(trusty_bench_max_metric_digit_width, "Min",
                                   false);
-    trusty_bench_print_col_header(trusty_bench_max_metric_digit_width, "Max",
-                                  false);
     trusty_bench_print_col_header(trusty_bench_max_metric_digit_width, "Avg",
                                   false);
+    trusty_bench_print_col_header(trusty_bench_max_metric_digit_width, "Max",
+                                  false);
     trusty_bench_print_col_header(trusty_bench_max_metric_digit_width, "Cold",
                                   false);
     trusty_unittest_printf("\n");
@@ -384,10 +407,12 @@
         trusty_unittest_printf("|");
         trusty_bench_print_col_header(trusty_bench_max_metric_name_width,
                                       entry->name, false);
-        if (nb_params > 1) {
+        if (nb_params > 1 || trusty_bench_nb_cpu > 1) {
             char buf[BENCH_MAX_COL_SIZE];
 
-            if (trusty_bench_get_param_name_cb) {
+            if (entry->param_name_cb) {
+                entry->param_name_cb(buf, sizeof(buf), entry->param_idx);
+            } else if (trusty_bench_get_param_name_cb) {
                 trusty_bench_get_param_name_cb(buf, sizeof(buf),
                                                entry->param_idx);
             } else {
@@ -396,17 +421,30 @@
             trusty_bench_print_col_header(trusty_bench_max_param_name_width,
                                           buf, false);
         }
-        trusty_bench_print_col_stat(
-                trusty_bench_max_metric_digit_width,
-                entry->metric.aggregates[BENCH_AGGREGATE_MIN], entry->name);
-        trusty_bench_print_col_stat(
-                trusty_bench_max_metric_digit_width,
-                entry->metric.aggregates[BENCH_AGGREGATE_MAX], entry->name);
-        trusty_bench_print_col_stat(
-                trusty_bench_max_metric_digit_width,
-                entry->metric.aggregates[BENCH_AGGREGATE_AVG], entry->name);
-        trusty_bench_print_col_stat(trusty_bench_max_metric_digit_width,
-                                    entry->metric.cold, entry->name);
+
+        if (entry->metric.cnt) {
+            trusty_bench_print_col_stat(
+                    trusty_bench_max_metric_digit_width,
+                    entry->metric.aggregates[BENCH_AGGREGATE_MIN], entry->name,
+                    entry->formatted_value_cb);
+            trusty_bench_print_col_stat(
+                    trusty_bench_max_metric_digit_width,
+                    entry->metric.aggregates[BENCH_AGGREGATE_AVG], entry->name,
+                    entry->formatted_value_cb);
+            trusty_bench_print_col_stat(
+                    trusty_bench_max_metric_digit_width,
+                    entry->metric.aggregates[BENCH_AGGREGATE_MAX], entry->name,
+                    entry->formatted_value_cb);
+            trusty_bench_print_col_stat(trusty_bench_max_metric_digit_width,
+                                        entry->metric.cold, entry->name,
+                                        entry->formatted_value_cb);
+        } else {
+            for (int i = 0; i < 4; i++) {
+                trusty_bench_center_print(trusty_bench_max_metric_digit_width,
+                                          "-");
+                trusty_unittest_printf("|");
+            }
+        }
         trusty_unittest_printf("\n");
     }
     trusty_bench_print_border(width);
diff --git a/include/shared/lk/trusty_benchmark.h b/include/shared/lk/trusty_benchmark.h
index a0c3e24..d0c3bf8 100644
--- a/include/shared/lk/trusty_benchmark.h
+++ b/include/shared/lk/trusty_benchmark.h
@@ -50,6 +50,7 @@
  *  - Will cancel execution of the next BENCH body if any ASSERT_<op> fails.
  *    Test will be considered failed.
  *  - All ASSERT_<op> macros from trusty_unittest can be used
+ *  - GTEST_SKIP() maybe be called to skip the benchmark run.
  *
  * BENCH_TEARDOWN(suite_name)
  * {
@@ -122,6 +123,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <stdarg.h>
+#include <stdlib.h>
 
 #include <lib/unittest/unittest.h>
 #include <trusty_log.h>
@@ -130,10 +132,131 @@
 #include "trusty_bench_option_cb.h"
 #include "trusty_bench_print_tables.h"
 #include "trusty_unittest.h"
+#ifdef TRUSTY_USERSPACE
+#ifdef WITH_PTHREAD
+#include <lib/thread/pthread.h>
+#endif
+#elif WITH_SMP
+#include <kernel/mp.h>
+#endif
+#include <uapi/err.h>
+
+/*
+ * A few helper macros for static dispatch
+ */
+#define NB_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, N, ...) N
+#define NB_ARGS(...) NB_ARGS_HELPER(__VA_ARGS__, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define CAT(a, ...) PRIMITIVE_CAT(a, __VA_ARGS__)
+#define PRIMITIVE_CAT(a, ...) a##__VA_ARGS__
+
+#define EVAL(...) __VA_ARGS__
 
 __BEGIN_CDECLS
 
 /**
+ * struct benchmark_internal_state - Store internals for current bench.
+ * @last_bench_body_duration:   nanoseconds duration of the last execution of
+ *                              the bench body.
+ * @cur_param_idx:              index of current parameter in param_array.
+ */
+static struct benchmark_internal_state {
+    int64_t last_bench_body_duration;
+    size_t cur_param_idx;
+} bench_state;
+
+/**
+ * bench_get_duration_ns - convenience function to use in BENCH_RESULT to get
+ * the duration of last bench body execution.
+ *
+ * Return: The duration of the last completed BENCH body in nanoseconds.
+ */
+static inline int64_t bench_get_duration_ns(void) {
+    return bench_state.last_bench_body_duration;
+}
+
+/**
+ * bench_get_param_idx - convenience function to use to get the
+ * index of the current parameter BENCH_XXX is running for.
+ * Return: The index of the parameter BENCH_XXX is running for.
+ */
+static inline size_t bench_get_param_idx(void) {
+    return bench_state.cur_param_idx % trusty_cur_bench_nb_params;
+}
+
+/**
+ * bench_get_cpu_idx - convenience function to use to get the
+ * index of the current cpu BENCH_XXX is running for.
+ * Return: The index of the cpu BENCH_XXX is running for.
+ */
+static inline size_t bench_get_cpu_idx(void) {
+    return bench_state.cur_param_idx / trusty_cur_bench_nb_params;
+}
+
+/*
+ * Helper macros to run on tests on all CPUs
+ */
+#if defined(TRUSTY_USERSPACE) && defined(WITH_PTHREAD)
+static int trusty_bench_multi_cpus_setup(void) {
+    if (trusty_bench_nb_cpu > 1) {
+        cpu_set_t cpu_set;
+
+        CPU_ZERO(&cpu_set);
+        CPU_SET(bench_state.cur_param_idx / trusty_cur_bench_nb_params,
+                &cpu_set);
+
+        return pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
+                                      &cpu_set);
+    }
+    return NO_ERROR;
+}
+
+static int trusty_bench_multi_cpus_teardown(void) {
+    if (trusty_bench_nb_cpu > 1) {
+        cpu_set_t cpu_set;
+
+        CPU_ZERO(&cpu_set);
+        for (int i = 0; i < SMP_MAX_CPUS; i++) {
+            CPU_SET(i, &cpu_set);
+        }
+
+        return pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
+                                      &cpu_set);
+    }
+    return NO_ERROR;
+}
+#elif !defined(TRUSTY_USERSPACE) && WITH_SMP
+static int trusty_bench_multi_cpus_setup(void) {
+    if (trusty_bench_nb_cpu > 1) {
+        const int cpu = bench_state.cur_param_idx / trusty_cur_bench_nb_params;
+
+        if (cpu < SMP_MAX_CPUS && mp_is_cpu_active(cpu)) {
+            thread_set_pinned_cpu(get_current_thread(), cpu);
+        } else {
+            return EINVAL;
+        }
+    }
+
+    return NO_ERROR;
+}
+
+static int trusty_bench_multi_cpus_teardown(void) {
+    if (trusty_bench_nb_cpu > 1) {
+        thread_set_pinned_cpu(get_current_thread(), -1);
+    }
+    return NO_ERROR;
+}
+#else
+static int trusty_bench_multi_cpus_setup(void) {
+    return NO_ERROR;
+}
+
+static int trusty_bench_multi_cpus_teardown(void) {
+    return NO_ERROR;
+}
+#endif
+
+/**
  * trusty_bench_update_metric -  Update the appropriate metric with the value
  * returned by BENCH_RESULT
  * @m:              The metric whose aggregate needs to be updated.
@@ -177,8 +300,8 @@
 }
 
 /**
- * trusty_bench_run_metrics -        Run All Metric Updaters after one iteration
- * of bench function for all param/metric in the last BENCH.
+ * trusty_bench_reset_metrics -        Run All Metric Updaters after one
+ * iteration of bench function for all param/metric in the last BENCH.
  * @metric_list:        List of metrics aggregated during all BENCH runs.
  * @param_idx:          Index of the current parameter in the param_array of
  *                      BENCH.
@@ -216,68 +339,94 @@
     static void suite_name##_teardown(void)
 
 /**
- * BENCH_RESULT -       Declare a metric name for the corresponding BENCH and
- *                      declare the functions to update it after every iteration
+ * BENCH_RESULT_INNER -       Declare a metric name for the corresponding BENCH
+ * and declare the functions to update it after every iteration
  * @suite_name:         Identifier of the current suite.
  * @bench_name:         Unique identifier of the Bench in the suite.
  * @metric_name:        Name of the metric to print in the result table.
+ * @formatted_value_cb:        [optional] A callback of
+ * trusty_bench_get_formatted_value_callback_t type for formatting the result
+ * value to a string
+ * @param_name_cb:              [optional] A callback of
+ * trusty_bench_get_param_name_callback_t type for formatting the param name
  */
-#define BENCH_RESULT(suite_name, bench_name, metric_name)                        \
-    static int64_t update_##suite_name##_##bench_name##_##metric_name(void);     \
-    static struct bench_metric_list_node                                         \
-            suite_name##_##bench_name##_##metric_name##_node = {                 \
-                    .node = LIST_INITIAL_CLEARED_VALUE,                          \
-                    .metric = {0, 0, 0, {INT32_MAX, 0, 0}},                      \
-                    .name = STRINGIFY(metric_name),                              \
-                    .param_idx = 0,                                              \
-                    .bench_result =                                              \
-                            update_##suite_name##_##bench_name##_##metric_name}; \
-    __attribute__((constructor)) void                                            \
-            suite_name##_##bench_name##_##metric_name##_add(void) {              \
-        list_add_tail(&suite_name##_##bench_name##_metric_list,                  \
-                      &suite_name##_##bench_name##_##metric_name##_node.node);   \
-    }                                                                            \
-                                                                                 \
+#define BENCH_RESULT_INNER(suite_name, bench_name, metric_name,                 \
+                           formatted_value_cb_, param_name_cb_)                 \
+    static int64_t update_##suite_name##_##bench_name##_##metric_name(void);    \
+    static struct bench_metric_list_node                                        \
+            suite_name##_##bench_name##_##metric_name##_node = {                \
+                    .node = LIST_INITIAL_CLEARED_VALUE,                         \
+                    .metric = {0, 0, 0, {INT32_MAX, 0, 0}},                     \
+                    .name = STRINGIFY(metric_name),                             \
+                    .param_idx = 0,                                             \
+                    .bench_result =                                             \
+                            update_##suite_name##_##bench_name##_##metric_name, \
+                    .formatted_value_cb = formatted_value_cb_,                  \
+                    .param_name_cb = param_name_cb_};                           \
+    __attribute__((constructor)) void                                           \
+            suite_name##_##bench_name##_##metric_name##_add(void) {             \
+        list_add_tail(&suite_name##_##bench_name##_metric_list,                 \
+                      &suite_name##_##bench_name##_##metric_name##_node.node);  \
+    }                                                                           \
+                                                                                \
     static int64_t update_##suite_name##_##bench_name##_##metric_name(void)
 
-/**
- * struct benchmark_internal_state - Store internals for current bench.
- * @last_bench_body_duration:   nanoseconds duration of the last execution of
- *                              the bench body.
- * @cur_param_idx:              index of current parameter in param_array.
- */
-static struct benchmark_internal_state {
-    int64_t last_bench_body_duration;
-    size_t cur_param_idx;
-} bench_state;
+/* Dispatch Mechanics for BENCH_RESULT */
+#define BENCH_RESULT_3(suite_name, bench_name, metric_name) \
+    BENCH_RESULT_INNER(suite_name, bench_name, metric_name, 0, 0)
+#define BENCH_RESULT_4(suite_name, bench_name, metric_name, \
+                       formatted_value_cb)                  \
+    BENCH_RESULT_INNER(suite_name, bench_name, metric_name, \
+                       formatted_value_cb, 0)
+#define BENCH_RESULT_5(suite_name, bench_name, metric_name, \
+                       formatted_value_cb, param_name_cb)   \
+    BENCH_RESULT_INNER(suite_name, bench_name, metric_name, \
+                       formatted_value_cb, param_name_cb)
 
 /**
- * bench_get_duration_ns - convenience function to use in BENCH_RESULT to get
- * the duration of last bench body execution.
- *
- * Return: The duration of the last completed BENCH body in nanoseconds.
+ * BENCH_RESULT             Dispatch BENCH_RESULT Called 3, 4 or 5 parameters.
+ * @suite_name:             Identifier of the current suite.
+ * @bench_name:             Unique identifier of the Bench in the suite.
+ * @metric_name:        Name of the metric to print in the result table.
+ * @formatted_value_cb:        [optional] A callback of
+ * trusty_bench_get_formatted_value_callback_t type for formatting the result
+ * value to a string
+ * @param_name_cb:              [optional] A callback of
+ * trusty_bench_get_param_name_callback_t type for formatting the param name
  */
-static inline int64_t bench_get_duration_ns(void) {
-    return bench_state.last_bench_body_duration;
-}
+#define BENCH_RESULT(...) \
+    CAT(BENCH_RESULT_, EVAL(NB_ARGS(__VA_ARGS__)))(__VA_ARGS__)
 
 /**
- * bench_get_param_idx - convenience function to use to get the
- * index of the current parameter BENCH_XXX is running for.
- * Return: The index of the parameter BENCH_XXX is running for.
+ * PARAM_TEST_NODES_SIMPLE -    Create the unparameterized test node lists for
+ *                              BENCH
+ * @suite_name:                 Identifier of the current suite.
+ * @bench_name:                 Unique identifier of the Bench in the suite.
  */
-static inline size_t bench_get_param_idx(void) {
-    return bench_state.cur_param_idx;
-}
+#define PARAM_TEST_NODES_SIMPLE(suite_name, bench_name)                        \
+    static struct test_list_node suite_name##_##bench_name##_bench_##_node = { \
+            .node = LIST_INITIAL_CLEARED_VALUE,                                \
+            .suite = STRINGIFY(suite_name_),                                   \
+            .name = STRINGIFY(bench_name_),                                    \
+            .func = suite_name##_##bench_name##_bench_,                        \
+            .needs_param = 0,                                                  \
+    };                                                                         \
+                                                                               \
+    __attribute__((constructor)) void                                          \
+            suite_name##_##bench_name##_bench_##_add(void) {                   \
+        list_add_tail(&_test_list,                                             \
+                      &suite_name##_##bench_name##_bench_##_node.node);        \
+    }
 
 /**
- * PARAM_TEST_NODES -    Create the unparameterized test node lists for BENCH
- * @suite_name:         Identifier of the current suite.
- * @bench_name:         Unique identifier of the Bench in the suite.
- * @params:             identifier of the param Array for parametric benches or
- *                      "non_parametric" for simple ones.
+ * PARAM_TEST_NODES_PARAMETRIC -    Create the unparameterized test node lists
+ *                                  for BENCH
+ * @suite_name:                     Identifier of the current suite.
+ * @bench_name:                     Unique identifier of the Bench in the suite.
+ * @params:                         identifier of the param Array for parametric
+ * benches
  */
-#define PARAM_TEST_NODES(suite_name, bench_name, params)                  \
+#define PARAM_TEST_NODES_PARAMETRIC(suite_name, bench_name, params)       \
     static struct test_list_node                                          \
             suite_name##_##bench_name##_bench_##params##_node = {         \
                     .node = LIST_INITIAL_CLEARED_VALUE,                   \
@@ -327,6 +476,8 @@
             list_pool[idx].name = entry->name;
             list_pool[idx].param_idx = idx_param;
             list_pool[idx].bench_result = entry->bench_result;
+            list_pool[idx].formatted_value_cb = entry->formatted_value_cb;
+            list_pool[idx].param_name_cb = entry->param_name_cb;
             list_add_tail(parameterized_list, &(list_pool[idx].node));
             ++idx;
         }
@@ -355,7 +506,7 @@
 }
 
 /**
- * get_extended_test_name - Print Status of Currently Running Bench.
+ * get_extended_bench_name - Print Status of Currently Running Bench.
  *
  * @test_name_in:   Name of the Current Unparameterized Test.
  * @test_name_out:  Name of the Current Unparameterized Test.
@@ -365,8 +516,8 @@
  *                  If test_name_out allocation/print failed returns asprintf
  *                  return code
  */
-static inline int get_extended_test_name(const char* test_name_in,
-                                         char** test_name_out) {
+static inline int get_extended_bench_name(const char* test_name_in,
+                                          char** test_name_out) {
     int res = snprintf(NULL, 0, "%s_%zu", test_name_in,
                        bench_state.cur_param_idx);
     *test_name_out = NULL;
@@ -395,93 +546,110 @@
  * @params:                 An array T array_name[nb_params] of parameter
  * @metric_list:            List of metric nodes to update
  */
-#define BENCH_CORE(suite_name, bench_name, nb_runs, nb_params, params,        \
-                   metric_list)                                               \
-    reset_vertical_print_widths();                                            \
-    trusty_bench_print_title(STRINGIFY(suite_name), STRINGIFY(bench_name),    \
-                             STRINGIFY(params));                              \
-    static trusty_bench_print_callback_t trusty_bench_print_cb =              \
-            &BENCHMARK_PRINT_CB;                                              \
-    for (size_t idx_param = 0; idx_param < nb_params; ++idx_param) {          \
-        bench_state.cur_param_idx = idx_param;                                \
-        char* extended_test_name = NULL;                                      \
-        int res_alloc = get_extended_test_name(                               \
-                STRINGIFY(bench_name##_##params), &extended_test_name);       \
-        if (res_alloc < 0) {                                                  \
-            TLOGE("ERROR %d expanding test name\n", res_alloc);               \
-            _test_context.all_ok = false;                                     \
-            _test_context.tests_failed++;                                     \
-            continue;                                                         \
-        }                                                                     \
-        TEST_BEGIN_FUNC(STRINGIFY(suite_name), extended_test_name);           \
-                                                                              \
-        int rc = suite_name##_setup();                                        \
-                                                                              \
-        if (rc != NO_ERROR) {                                                 \
-            TLOGE("ERROR %d during benchmark setup\n", rc);                   \
-            _test_context.all_ok = false;                                     \
-            _test_context.tests_failed++;                                     \
-            continue;                                                         \
-        }                                                                     \
-        int64_t overhead = trusty_bench_get_overhead();                       \
-                                                                              \
-        /* Cold Run */                                                        \
-        int64_t start_time;                                                   \
-        int64_t end_time;                                                     \
-        start_time = get_current_time_ns();                                   \
-        int64_t res = suite_name##_##bench_name##_inner_##params();           \
-        end_time = get_current_time_ns();                                     \
-                                                                              \
-        if (res != NO_ERROR) {                                                \
-            TLOGE("ERROR During Cold Run%" PRId64 "\n", res);                 \
-            _test_context.all_ok = false;                                     \
-            _test_context.tests_failed++;                                     \
-            continue;                                                         \
-        }                                                                     \
-                                                                              \
-        bench_state.last_bench_body_duration = end_time - start_time;         \
-        if (overhead >= bench_state.last_bench_body_duration) {               \
-            TLOGE("Benchmark internal function is too fast %" PRId64          \
-                  "ns, while the benchmark overhead is %" PRId64 "ns.",       \
-                  overhead, bench_state.last_bench_body_duration);            \
-        }                                                                     \
-                                                                              \
-        bench_state.last_bench_body_duration -= overhead;                     \
-                                                                              \
-        if (!_test_context.hard_fail && _test_context.all_ok) {               \
-            trusty_bench_run_metrics(&metric_list, idx_param, true);          \
-        }                                                                     \
-                                                                              \
-        for (size_t idx_run = 0; idx_run < nb_runs; ++idx_run) {              \
-            if (!_test_context.hard_fail && _test_context.all_ok) {           \
-                start_time = get_current_time_ns();                           \
-                res = suite_name##_##bench_name##_inner_##params();           \
-                end_time = get_current_time_ns();                             \
-                bench_state.last_bench_body_duration = end_time - start_time; \
-                if (overhead >= bench_state.last_bench_body_duration) {       \
-                    TLOGE("Benchmark internal function is too fast %" PRId64  \
-                          "ns, while the benchmark overhead is %" PRId64      \
-                          "ns.",                                              \
-                          overhead, bench_state.last_bench_body_duration);    \
-                }                                                             \
-                                                                              \
-                bench_state.last_bench_body_duration -= overhead;             \
-                if (res != NO_ERROR) {                                        \
-                    TLOGE("ERROR %" PRId64 "\n", res);                        \
-                }                                                             \
-            }                                                                 \
-            if (!_test_context.hard_fail && _test_context.all_ok) {           \
-                trusty_bench_run_metrics(&metric_list, idx_param, false);     \
-            }                                                                 \
-        }                                                                     \
-        suite_name##_teardown();                                              \
-        TEST_END_FUNC();                                                      \
-        free(extended_test_name);                                             \
-        extended_test_name = NULL;                                            \
-    }                                                                         \
-    trusty_bench_print_cb(&metric_list, nb_params, STRINGIFY(suite_name),     \
-                          STRINGIFY(bench_name##_##params));                  \
-    trusty_bench_get_param_name_cb = NULL;                                    \
+#define BENCH_CORE(suite_name, bench_name, nb_runs, nb_params, params,          \
+                   metric_list)                                                 \
+    reset_vertical_print_widths();                                              \
+    trusty_bench_print_title(STRINGIFY(suite_name), STRINGIFY(bench_name),      \
+                             STRINGIFY(params));                                \
+    static trusty_bench_print_callback_t trusty_bench_print_cb =                \
+            &BENCHMARK_PRINT_CB;                                                \
+    trusty_cur_bench_nb_params = nb_params;                                     \
+    for (size_t idx_param = 0; idx_param < (nb_params * trusty_bench_nb_cpu);   \
+         ++idx_param) {                                                         \
+        bench_state.cur_param_idx = idx_param;                                  \
+        char* extended_test_name = NULL;                                        \
+        int res_alloc = get_extended_bench_name(                                \
+                STRINGIFY(bench_name##_##params), &extended_test_name);         \
+        if (res_alloc < 0) {                                                    \
+            TLOGE("ERROR %d expanding test name\n", res_alloc);                 \
+            _test_context.all_ok = false;                                       \
+            _test_context.tests_failed++;                                       \
+            continue;                                                           \
+        }                                                                       \
+        TEST_BEGIN_FUNC(STRINGIFY(suite_name), extended_test_name);             \
+                                                                                \
+        int rc = trusty_bench_multi_cpus_setup();                               \
+        if (rc != NO_ERROR) {                                                   \
+            _test_context.skipped = true;                                       \
+            _test_context.tests_skipped++;                                      \
+        } else {                                                                \
+            rc = suite_name##_setup();                                          \
+        }                                                                       \
+                                                                                \
+        if (_test_context.skipped) {                                            \
+            trusty_unittest_print_status(" SKIPPED");                           \
+            continue;                                                           \
+        } else if (rc != NO_ERROR) {                                            \
+            TLOGE("ERROR %d during benchmark setup\n", rc);                     \
+            _test_context.all_ok = false;                                       \
+            _test_context.tests_failed++;                                       \
+            continue;                                                           \
+        }                                                                       \
+        int64_t overhead = trusty_bench_get_overhead();                         \
+                                                                                \
+        /* Cold Run */                                                          \
+        int64_t start_time;                                                     \
+        int64_t end_time;                                                       \
+        start_time = get_current_time_ns();                                     \
+        int64_t res = suite_name##_##bench_name##_inner_##params();             \
+        end_time = get_current_time_ns();                                       \
+                                                                                \
+        if (res != NO_ERROR) {                                                  \
+            TLOGE("ERROR During Cold Run%" PRId64 "\n", res);                   \
+            _test_context.all_ok = false;                                       \
+            _test_context.tests_failed++;                                       \
+            continue;                                                           \
+        }                                                                       \
+                                                                                \
+        bench_state.last_bench_body_duration = end_time - start_time;           \
+        if (5 * overhead >= bench_state.last_bench_body_duration) {             \
+            trusty_unittest_printf(                                             \
+                    "WARNING: Benchmark internal function is too fast %" PRId64 \
+                    "ns, while the benchmark overhead is %" PRId64 "ns.",       \
+                    overhead, bench_state.last_bench_body_duration);            \
+        }                                                                       \
+                                                                                \
+        bench_state.last_bench_body_duration -= overhead;                       \
+                                                                                \
+        if (!_test_context.hard_fail && _test_context.all_ok) {                 \
+            trusty_bench_run_metrics(&metric_list, idx_param, true);            \
+        }                                                                       \
+                                                                                \
+        for (size_t idx_run = 0; idx_run < nb_runs; ++idx_run) {                \
+            if (!_test_context.hard_fail && _test_context.all_ok) {             \
+                start_time = get_current_time_ns();                             \
+                res = suite_name##_##bench_name##_inner_##params();             \
+                end_time = get_current_time_ns();                               \
+                bench_state.last_bench_body_duration = end_time - start_time;   \
+                if (overhead >= bench_state.last_bench_body_duration) {         \
+                    TLOGE("Benchmark internal function is too fast %" PRId64    \
+                          "ns, while the benchmark overhead is %" PRId64        \
+                          "ns.",                                                \
+                          overhead, bench_state.last_bench_body_duration);      \
+                }                                                               \
+                                                                                \
+                bench_state.last_bench_body_duration -= overhead;               \
+                if (res != NO_ERROR) {                                          \
+                    TLOGE("ERROR %" PRId64 "\n", res);                          \
+                }                                                               \
+            }                                                                   \
+            if (!_test_context.hard_fail && _test_context.all_ok) {             \
+                trusty_bench_run_metrics(&metric_list, idx_param, false);       \
+            }                                                                   \
+        }                                                                       \
+        suite_name##_teardown();                                                \
+        rc = trusty_bench_multi_cpus_teardown();                                \
+        if (rc != NO_ERROR) {                                                   \
+            TLOGW("failed to reset CPU affinity: %d\n", rc);                    \
+        }                                                                       \
+        TEST_END_FUNC();                                                        \
+        free(extended_test_name);                                               \
+        extended_test_name = NULL;                                              \
+    }                                                                           \
+    trusty_bench_print_cb(&metric_list, (nb_params * trusty_bench_nb_cpu),      \
+                          STRINGIFY(suite_name),                                \
+                          STRINGIFY(bench_name##_##params));                    \
+    trusty_bench_get_param_name_cb = NULL;                                      \
     trusty_bench_get_formatted_value_cb = NULL
 
 /**
@@ -494,8 +662,8 @@
  * @params:                 An array T array_name[nb_params] of parameter
  * @nb_params:              Number of parameters in the parameter Array
  */
-#define BENCH_PARAMETERIZED_PTR(suite_name, bench_name, nb_runs, params,         \
-                                nb_params)                                       \
+#define BENCH_PARAMETERIZED_PTR(nb_cpu, suite_name, bench_name, nb_runs,         \
+                                params, nb_params)                               \
     static int suite_name##_##bench_name##_inner_##params(void);                 \
     static void suite_name##_##bench_name##_bench_##params(void);                \
     static struct list_node suite_name##_##bench_name##_metric_list =            \
@@ -505,10 +673,11 @@
                     suite_name##_##bench_name##_metric_##params##_list);         \
                                                                                  \
     static void suite_name##_##bench_name##_bench_##params(void) {               \
+        trusty_bench_nb_cpu = nb_cpu;                                            \
         struct bench_metric_list_node* metric_pool = set_param_metric(           \
                 &suite_name##_##bench_name##_metric_list,                        \
                 &suite_name##_##bench_name##_metric_##params##_list,             \
-                nb_params);                                                      \
+                (nb_params * trusty_bench_nb_cpu));                              \
         if (metric_pool == NULL) {                                               \
             _test_context.hard_fail = true;                                      \
             return;                                                              \
@@ -528,8 +697,8 @@
  * @nb_runs:                The number of execution of its body for each param
  * @params:                 An array T array_name[nb_params] of parameter
  */
-#define BENCH_PARAMETERIZED(suite_name, bench_name, nb_runs, params) \
-    BENCH_PARAMETERIZED_PTR(suite_name, bench_name, nb_runs, params, \
+#define BENCH_PARAMETERIZED(nb_cpu, suite_name, bench_name, nb_runs, params) \
+    BENCH_PARAMETERIZED_PTR(nb_cpu, suite_name, bench_name, nb_runs, params, \
                             countof(params))
 
 /**
@@ -538,30 +707,30 @@
  * @bench_name:         Unique identifier of the Bench in the suite.
  * @nb_runs:            The number of execution of its body.
  */
-#define BENCH_SIMPLE(suite_name, bench_name, nb_runs)                    \
-    static int suite_name##_##bench_name##_inner_non_parametric(void);   \
-    static void suite_name##_##bench_name##_bench_non_parametric(void);  \
-    static struct list_node suite_name##_##bench_name##_metric_list =    \
-            LIST_INITIAL_VALUE(suite_name##_##bench_name##_metric_list); \
-    static void suite_name##_##bench_name##_bench_non_parametric(void) { \
-        bench_state.cur_param_idx = 0;                                   \
-        BENCH_CORE(suite_name, bench_name, nb_runs, 1, non_parametric,   \
-                   suite_name##_##bench_name##_metric_list);             \
-    }                                                                    \
-                                                                         \
-    PARAM_TEST_NODES(suite_name, bench_name, non_parametric)             \
-    static int suite_name##_##bench_name##_inner_non_parametric(void)
-
-/*
- * A few helper macros for static dispatch of BENCH
- */
-#define NB_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, N, ...) N
-#define NB_ARGS(...) NB_ARGS_HELPER(__VA_ARGS__, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-
-#define CAT(a, ...) PRIMITIVE_CAT(a, __VA_ARGS__)
-#define PRIMITIVE_CAT(a, ...) a##__VA_ARGS__
-
-#define EVAL(...) __VA_ARGS__
+#define BENCH_SIMPLE(nb_cpu, suite_name, bench_name, nb_runs)                  \
+    static int suite_name##_##bench_name##_inner_(void);                       \
+    static void suite_name##_##bench_name##_bench_(void);                      \
+    static struct list_node suite_name##_##bench_name##_metric_list =          \
+            LIST_INITIAL_VALUE(suite_name##_##bench_name##_metric_list);       \
+    static struct list_node suite_name##_##bench_name##_metric_cpu##_list =    \
+            LIST_INITIAL_VALUE(suite_name##_##bench_name##_metric_cpu##_list); \
+    static void suite_name##_##bench_name##_bench_(void) {                     \
+        bench_state.cur_param_idx = 0;                                         \
+        trusty_bench_nb_cpu = nb_cpu;                                          \
+        struct bench_metric_list_node* metric_pool = set_param_metric(         \
+                &suite_name##_##bench_name##_metric_list,                      \
+                &suite_name##_##bench_name##_metric_cpu##_list,                \
+                trusty_bench_nb_cpu);                                          \
+        if (metric_pool == NULL) {                                             \
+            _test_context.hard_fail = true;                                    \
+            return;                                                            \
+        }                                                                      \
+        BENCH_CORE(suite_name, bench_name, nb_runs, 1, ,                       \
+                   suite_name##_##bench_name##_metric_cpu##_list);             \
+    }                                                                          \
+                                                                               \
+    PARAM_TEST_NODES(suite_name, bench_name)                                   \
+    static int suite_name##_##bench_name##_inner_(void)
 
 /*
  * BENCH - Routing the BENCH macros depending on its number of parameters.
@@ -583,6 +752,31 @@
  * @nb_params:              [optional] if 4th parameter is a pointer, Number of
  *                          parameters in the parameter Array
  */
-#define BENCH(...) CAT(BENCH_, EVAL(NB_ARGS(__VA_ARGS__)))(__VA_ARGS__)
+#define BENCH(...) CAT(BENCH_, EVAL(NB_ARGS(__VA_ARGS__)))(1, __VA_ARGS__)
+
+/**
+ * BENCH_ALL_CPU - Called 3, 4 or 5 parameters. This allows
+ *                          to reuse Other macros for different bench by
+ * aliasing an array to a pointer
+ * @suite_name:             Identifier of the current suite.
+ * @bench_name:             Unique identifier of the Bench in the suite.
+ * @nb_runs:                The number of execution of its body for each param
+ * @params:                 [optional] An array T array_name[nb_params] of
+ *                          parameter, or a pointer T*, in the latter case a 5th
+ *                          parameter is needed
+ * @nb_params:              [optional] if 4th parameter is a pointer, Number of
+ *                          parameters in the parameter Array
+ */
+#define BENCH_ALL_CPU(...) \
+    CAT(BENCH_, EVAL(NB_ARGS(__VA_ARGS__)))(SMP_MAX_CPUS, __VA_ARGS__)
+
+/*
+ * PARAM_TEST_NODES - Routing the PARAM_TEST_NODES macros depending on its
+ * number of parameters.
+ */
+#define PARAM_TEST_NODES_2 PARAM_TEST_NODES_SIMPLE
+#define PARAM_TEST_NODES_3 PARAM_TEST_NODES_PARAMETRIC
+#define PARAM_TEST_NODES(...) \
+    CAT(PARAM_TEST_NODES_, EVAL(NB_ARGS(__VA_ARGS__)))(__VA_ARGS__)
 
 __END_CDECLS
diff --git a/include/shared/lk/trusty_unittest.h b/include/shared/lk/trusty_unittest.h
index 9efc7a5..ed22548 100644
--- a/include/shared/lk/trusty_unittest.h
+++ b/include/shared/lk/trusty_unittest.h
@@ -96,6 +96,7 @@
 /**
  * struct test_context - struct representing the state of a test run.
  * @tests_total:    Number of conditions checked
+ * @tests_skipped:  Number of tests skipped
  * @tests_disabled: Number of disabled tests skipped
  * @tests_failed:   Number of conditions failed
  * @inst_name:      Name of the current parameter instantiation
@@ -104,10 +105,12 @@
  * @test_name:      Name of current test case
  * @test_param:     The current test parameter
  * @all_ok:         State of current test case
+ * @skipped:        Current test was skipped.
  * @hard_fail:      Type of test failure (when @all_ok is false)
  */
 struct test_context {
     unsigned int tests_total;
+    unsigned int tests_skipped;
     unsigned int tests_disabled;
     unsigned int tests_failed;
     const char* inst_name;
@@ -116,6 +119,7 @@
     const char* test_name;
     const void* test_param;
     bool all_ok;
+    bool skipped;
     bool hard_fail;
 };
 
@@ -216,12 +220,15 @@
     _test_context.test_name = test_name;
     _test_context.all_ok = true;
     _test_context.hard_fail = false;
+    _test_context.skipped = false;
     _test_context.tests_total++;
     trusty_unittest_print_status("RUN     ");
 }
 
 static inline void TEST_END_FUNC(void) {
-    if (_test_context.all_ok) {
+    if (_test_context.skipped) {
+        trusty_unittest_print_status(" SKIPPED");
+    } else if (_test_context.all_ok) {
         trusty_unittest_print_status("      OK");
     } else {
         trusty_unittest_print_status(" FAILED ");
@@ -248,7 +255,7 @@
         TEST_BEGIN_FUNC(STRINGIFY(suite_name), STRINGIFY(test_name));        \
         {                                                                    \
             pre;                                                             \
-            if (!_test_context.hard_fail) {                                  \
+            if (!_test_context.hard_fail && !_test_context.skipped) {        \
                 suite_name##_##test_name##_inner arg;                        \
             }                                                                \
             post;                                                            \
@@ -594,6 +601,10 @@
                 "[  PASSED  ] %d tests.\n",
                 _test_context.tests_total - _test_context.tests_failed);
     }
+    if (_test_context.tests_skipped) {
+        trusty_unittest_printf("[  SKIPPED ] %d tests.\n",
+                               _test_context.tests_skipped);
+    }
     if (_test_context.tests_disabled) {
         trusty_unittest_printf("[ DISABLED ] %d tests.\n",
                                _test_context.tests_disabled);
@@ -609,6 +620,23 @@
     return RUN_ALL_SUITE_TESTS(NULL);
 }
 
+/**
+ * GTEST_SKIP() - Skip current test
+ *
+ * This will skip the current test without triggering a failure. It will use
+ * same test_abort label as the ASSERT_... macros. Calling this after a test has
+ * failed or calling ASSERT_.../EXPECT_... macros after GTEST_SKIP has jumped
+ * to test_abort is not supported.
+ */
+#define GTEST_SKIP()                       \
+    {                                      \
+        if (!_test_context.skipped) {      \
+            _test_context.skipped = true;  \
+            _test_context.tests_skipped++; \
+        }                                  \
+        goto test_abort;                   \
+    }
+
 #define ASSERT_EXPECT_TEST(op, op_pre, op_sep, op_args, is_hard_fail,        \
                            fail_action, vals_type, vals_format_placeholder,  \
                            print_cast, print_op, val1, val2, extra_msg...)   \
diff --git a/include/trusty_log.h b/include/trusty_log.h
index da390eb..75911a0 100644
--- a/include/trusty_log.h
+++ b/include/trusty_log.h
@@ -22,24 +22,24 @@
 
 #include <debug.h>
 
-#define _tlog(level, fmt, x...)                                  \
-    do {                                                         \
-        dprintf(level, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x); \
-    } while (0)
+#define _tlog(level, fmt, x...) dprintf(level, fmt, ##x)
+#define _vtlog(level, fmt, args) vdprintf(level, fmt, args)
 
-#define TLOG(fmt, x...) _tlog(ALWAYS, fmt, ##x)
+#define TLOG(fmt, x...) _tlog(ALWAYS, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x)
 
 /* debug  */
-#define TLOGD(fmt, x...) _tlog(SPEW, fmt, ##x)
+#define TLOGD(fmt, x...) _tlog(SPEW, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x)
 
 /* info */
-#define TLOGI(fmt, x...) _tlog(SPEW, fmt, ##x)
+#define TLOGI(fmt, x...) _tlog(SPEW, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x)
 
 /* warning */
-#define TLOGW(fmt, x...) _tlog(INFO, fmt, ##x)
+#define TLOGW(fmt, x...) _tlog(INFO, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x)
 
 /* error */
-#define TLOGE(fmt, x...) _tlog(CRITICAL, fmt, ##x)
+#define TLOGE(fmt, x...) \
+    _tlog(CRITICAL, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x)
 
 /* critical */
-#define TLOGC(fmt, x...) _tlog(CRITICAL, fmt, ##x)
+#define TLOGC(fmt, x...) \
+    _tlog(CRITICAL, "%s: %d: " fmt, TLOG_TAG, __LINE__, ##x)
diff --git a/kerneltests-inc.mk b/kerneltests-inc.mk
index ab69590..8dd8b09 100644
--- a/kerneltests-inc.mk
+++ b/kerneltests-inc.mk
@@ -26,6 +26,7 @@
 	trusty/kernel/app/cachetest \
 	trusty/kernel/app/consoletest \
 	trusty/kernel/app/dpctest \
+	trusty/kernel/app/list-ports \
 	trusty/kernel/app/memorytest \
 	trusty/kernel/app/memorylatencybench \
 	trusty/kernel/app/mmutest \
@@ -47,4 +48,4 @@
 
 endif
 
-include external/lk/kerneltests-inc.mk
+include $(LKROOT)/kerneltests-inc.mk
diff --git a/lib/arm_ffa/arm_ffa.c b/lib/arm_ffa/arm_ffa.c
index c0b8675..160ed7f 100644
--- a/lib/arm_ffa/arm_ffa.c
+++ b/lib/arm_ffa/arm_ffa.c
@@ -22,6 +22,8 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#define LOCAL_TRACE 0
+
 #include <assert.h>
 #include <err.h>
 #include <interface/arm_ffa/arm_ffa.h>
@@ -37,11 +39,12 @@
 #include <trace.h>
 
 static bool arm_ffa_init_is_success = false;
-uint16_t ffa_local_id;
-size_t ffa_buf_size;
-bool supports_ns_bit = false;
-void* ffa_tx;
-void* ffa_rx;
+static uint16_t ffa_local_id;
+static size_t ffa_buf_size;
+static void* ffa_tx;
+static void* ffa_rx;
+static bool supports_ns_bit = false;
+static bool supports_rx_release = false;
 
 static mutex_t ffa_rxtx_buffer_lock = MUTEX_INITIAL_VALUE(ffa_rxtx_buffer_lock);
 
@@ -145,6 +148,96 @@
     }
 }
 
+/*
+ * Call with ffa_rxtx_buffer_lock acquired and the ffa_tx buffer already
+ * populated with struct ffa_mtd. Transmit in a single fragment.
+ */
+static status_t arm_ffa_call_mem_retrieve_req(uint32_t* total_len,
+                                              uint32_t* fragment_len) {
+    struct smc_ret8 smc_ret;
+    struct ffa_mtd* req = ffa_tx;
+    size_t len;
+
+    DEBUG_ASSERT(is_mutex_held(&ffa_rxtx_buffer_lock));
+
+    len = offsetof(struct ffa_mtd, emad[0]) +
+          req->emad_count * sizeof(struct ffa_emad);
+
+    smc_ret = smc8(SMC_FC_FFA_MEM_RETRIEVE_REQ, len, len, 0, 0, 0, 0, 0);
+
+    int error;
+    switch (smc_ret.r0) {
+    case SMC_FC_FFA_MEM_RETRIEVE_RESP:
+        if (total_len) {
+            *total_len = (uint32_t)smc_ret.r1;
+        }
+        if (fragment_len) {
+            *fragment_len = (uint32_t)smc_ret.r2;
+        }
+        return NO_ERROR;
+    case SMC_FC_FFA_ERROR:
+        error = smc_ret.r2;
+        switch (error) {
+        case FFA_ERROR_NOT_SUPPORTED:
+            return ERR_NOT_SUPPORTED;
+        case FFA_ERROR_INVALID_PARAMETERS:
+            return ERR_INVALID_ARGS;
+        case FFA_ERROR_NO_MEMORY:
+            return ERR_NO_MEMORY;
+        case FFA_ERROR_DENIED:
+            return ERR_BAD_STATE;
+        case FFA_ERROR_ABORTED:
+            return ERR_CANCELLED;
+        default:
+            TRACEF("Unknown error: 0x%x\n", error);
+            return ERR_NOT_VALID;
+        }
+    default:
+        return ERR_NOT_VALID;
+    }
+}
+
+static status_t arm_ffa_call_mem_frag_rx(uint64_t handle,
+                                         uint32_t offset,
+                                         uint32_t* fragment_len) {
+    struct smc_ret8 smc_ret;
+
+    DEBUG_ASSERT(is_mutex_held(&ffa_rxtx_buffer_lock));
+
+    smc_ret = smc8(SMC_FC_FFA_MEM_FRAG_RX, (uint32_t)handle, handle >> 32,
+                   offset, 0, 0, 0, 0);
+
+    /* FRAG_RX is followed by FRAG_TX on successful completion. */
+    switch (smc_ret.r0) {
+    case SMC_FC_FFA_MEM_FRAG_TX: {
+        uint64_t handle_out = smc_ret.r1 + ((uint64_t)smc_ret.r2 << 32);
+        if (handle != handle_out) {
+            TRACEF("Handle for response doesn't match the request, %" PRId64
+                   " != %" PRId64,
+                   handle, handle_out);
+            return ERR_NOT_VALID;
+        }
+        *fragment_len = smc_ret.r3;
+        return NO_ERROR;
+    }
+    case SMC_FC_FFA_ERROR:
+        switch ((int)smc_ret.r2) {
+        case FFA_ERROR_NOT_SUPPORTED:
+            return ERR_NOT_SUPPORTED;
+        case FFA_ERROR_INVALID_PARAMETERS:
+            return ERR_INVALID_ARGS;
+        case FFA_ERROR_ABORTED:
+            return ERR_CANCELLED;
+        default:
+            TRACEF("Unexpected error %d\n", (int)smc_ret.r2);
+            return ERR_NOT_VALID;
+        }
+    default:
+        TRACEF("Unexpected function id returned 0x%08lx\n", smc_ret.r0);
+        return ERR_NOT_VALID;
+    }
+}
+
 static status_t arm_ffa_call_mem_relinquish(
         uint64_t handle,
         uint32_t flags,
@@ -242,6 +335,45 @@
     }
 }
 
+static status_t arm_ffa_call_rx_release(void) {
+    struct smc_ret8 smc_ret;
+
+    DEBUG_ASSERT(is_mutex_held(&ffa_rxtx_buffer_lock));
+
+    smc_ret = smc8(SMC_FC_FFA_RX_RELEASE, 0, 0, 0, 0, 0, 0, 0);
+    switch (smc_ret.r0) {
+    case SMC_FC_FFA_SUCCESS:
+    case SMC_FC64_FFA_SUCCESS:
+        return NO_ERROR;
+
+    case SMC_FC_FFA_ERROR:
+        switch ((int)smc_ret.r2) {
+        case FFA_ERROR_NOT_SUPPORTED:
+            return ERR_NOT_SUPPORTED;
+        case FFA_ERROR_DENIED:
+            return ERR_BAD_STATE;
+        default:
+            return ERR_NOT_VALID;
+        }
+    default:
+        return ERR_NOT_VALID;
+    }
+}
+
+static status_t arm_ffa_rx_release_is_implemented(bool* is_implemented) {
+    bool is_implemented_val;
+    status_t res = arm_ffa_call_features(SMC_FC_FFA_RX_RELEASE,
+                                         &is_implemented_val, NULL, NULL);
+    if (res != NO_ERROR) {
+        TRACEF("Failed to query for feature FFA_RX_RELEASE, err = %d\n", res);
+        return res;
+    }
+    if (is_implemented) {
+        *is_implemented = is_implemented_val;
+    }
+    return NO_ERROR;
+}
+
 static status_t arm_ffa_rxtx_map_is_implemented(bool* is_implemented,
                                                 size_t* buf_size_log2) {
     ffa_features2_t features2;
@@ -324,6 +456,310 @@
     return NO_ERROR;
 }
 
+/* Helper function to set up the tx buffer with standard values
+   before calling FFA_MEM_RETRIEVE_REQ. */
+static void arm_ffa_populate_receive_req_tx_buffer(uint16_t sender_id,
+                                                   uint64_t handle,
+                                                   uint64_t tag) {
+    struct ffa_mtd* req = ffa_tx;
+    DEBUG_ASSERT(is_mutex_held(&ffa_rxtx_buffer_lock));
+
+    memset(req, 0, sizeof(struct ffa_mtd));
+
+    req->sender_id = sender_id;
+    req->handle = handle;
+    /* We must use the same tag as the one used by the sender to retrieve. */
+    req->tag = tag;
+
+    /*
+     * We only support retrieving memory for ourselves for now.
+     * TODO: Also support stream endpoints. Possibly more than one.
+     */
+    req->emad_count = 1;
+    memset(req->emad, 0, sizeof(struct ffa_emad));
+    req->emad[0].mapd.endpoint_id = ffa_local_id;
+}
+
+/* *desc_buffer is malloc'd and on success passes responsibility to free to
+   the caller. Populate the tx buffer before calling. */
+static status_t arm_ffa_mem_retrieve(uint16_t sender_id,
+                                     uint64_t handle,
+                                     uint32_t* len,
+                                     uint32_t* fragment_len) {
+    status_t res = NO_ERROR;
+
+    DEBUG_ASSERT(is_mutex_held(&ffa_rxtx_buffer_lock));
+    DEBUG_ASSERT(len);
+
+    uint32_t len_out, fragment_len_out;
+    res = arm_ffa_call_mem_retrieve_req(&len_out, &fragment_len_out);
+    LTRACEF("total_len: %u, fragment_len: %u\n", len_out, fragment_len_out);
+    if (res != NO_ERROR) {
+        TRACEF("FF-A memory retrieve request failed, err = %d\n", res);
+        return res;
+    }
+    if (fragment_len_out > len_out) {
+        TRACEF("Fragment length larger than total length %u > %u\n",
+               fragment_len_out, len_out);
+        return ERR_IO;
+    }
+
+    /* Check that the first fragment fits in our buffer */
+    if (fragment_len_out > ffa_buf_size) {
+        TRACEF("Fragment length %u larger than buffer size\n",
+               fragment_len_out);
+        return ERR_IO;
+    }
+
+    if (fragment_len) {
+        *fragment_len = fragment_len_out;
+    }
+    if (len) {
+        *len = len_out;
+    }
+
+    return NO_ERROR;
+}
+
+status_t arm_ffa_mem_address_range_get(struct arm_ffa_mem_frag_info* frag_info,
+                                       size_t index,
+                                       paddr_t* addr,
+                                       size_t* size) {
+    uint32_t page_count;
+    size_t frag_idx;
+
+    DEBUG_ASSERT(frag_info);
+
+    if (index < frag_info->start_index ||
+        index >= frag_info->start_index + frag_info->count) {
+        return ERR_OUT_OF_RANGE;
+    }
+
+    frag_idx = index - frag_info->start_index;
+
+    page_count = frag_info->address_ranges[frag_idx].page_count;
+    LTRACEF("address %p, page_count 0x%x\n",
+            (void*)frag_info->address_ranges[frag_idx].address,
+            frag_info->address_ranges[frag_idx].page_count);
+    if (page_count < 1 || ((size_t)page_count > (SIZE_MAX / FFA_PAGE_SIZE))) {
+        TRACEF("bad page count 0x%x at %zu\n", page_count, index);
+        return ERR_IO;
+    }
+
+    if (addr) {
+        *addr = (paddr_t)frag_info->address_ranges[frag_idx].address;
+    }
+    if (size) {
+        *size = page_count * FFA_PAGE_SIZE;
+    }
+
+    return NO_ERROR;
+}
+
+status_t arm_ffa_mem_retrieve_start(uint16_t sender_id,
+                                    uint64_t handle,
+                                    uint64_t tag,
+                                    uint32_t* address_range_count,
+                                    uint* arch_mmu_flags,
+                                    struct arm_ffa_mem_frag_info* frag_info) {
+    status_t res;
+    struct ffa_mtd* mtd;
+    struct ffa_emad* emad;
+    struct ffa_comp_mrd* comp_mrd;
+    uint32_t computed_len;
+    uint32_t header_size;
+
+    uint32_t total_len;
+    uint32_t fragment_len;
+
+    DEBUG_ASSERT(frag_info);
+
+    mutex_acquire(&ffa_rxtx_buffer_lock);
+    arm_ffa_populate_receive_req_tx_buffer(sender_id, handle, tag);
+    res = arm_ffa_mem_retrieve(sender_id, handle, &total_len, &fragment_len);
+
+    if (res != NO_ERROR) {
+        TRACEF("FF-A memory retrieve failed err=%d\n", res);
+        return res;
+    }
+
+    if (fragment_len <
+        offsetof(struct ffa_mtd, emad) + sizeof(struct ffa_emad)) {
+        TRACEF("Fragment too short for memory transaction descriptor\n");
+        return ERR_IO;
+    }
+
+    mtd = ffa_rx;
+    emad = mtd->emad;
+
+    /*
+     * We don't retrieve the memory on behalf of anyone else, so we only
+     * expect one receiver address range descriptor.
+     */
+    if (mtd->emad_count != 1) {
+        TRACEF("unexpected response count %d != 1\n", mtd->emad_count);
+        return ERR_IO;
+    }
+
+    LTRACEF("comp_mrd_offset: %u\n", emad->comp_mrd_offset);
+    if (emad->comp_mrd_offset + sizeof(*comp_mrd) > fragment_len) {
+        TRACEF("Fragment length %u too short for comp_mrd_offset %u\n",
+               fragment_len, emad->comp_mrd_offset);
+        return ERR_IO;
+    }
+
+    comp_mrd = ffa_rx + emad->comp_mrd_offset;
+
+    uint32_t address_range_count_out = comp_mrd->address_range_count;
+    frag_info->address_ranges = comp_mrd->address_range_array;
+    LTRACEF("address_range_count: %u\n", address_range_count_out);
+
+    computed_len = emad->comp_mrd_offset +
+                   offsetof(struct ffa_comp_mrd, address_range_array) +
+                   sizeof(struct ffa_cons_mrd) * comp_mrd->address_range_count;
+    if (total_len != computed_len) {
+        TRACEF("Reported length %u != computed length %u\n", total_len,
+               computed_len);
+        return ERR_IO;
+    }
+
+    header_size = emad->comp_mrd_offset +
+                  offsetof(struct ffa_comp_mrd, address_range_array);
+    frag_info->count =
+            (fragment_len - header_size) / sizeof(struct ffa_cons_mrd);
+    LTRACEF("Descriptors in fragment %u\n", frag_info->count);
+
+    if (frag_info->count * sizeof(struct ffa_cons_mrd) + header_size !=
+        fragment_len) {
+        TRACEF("fragment length %u, contains partial descriptor\n",
+               fragment_len);
+        return ERR_IO;
+    }
+
+    frag_info->received_len = fragment_len;
+    frag_info->start_index = 0;
+
+    uint arch_mmu_flags_out = 0;
+
+    switch (mtd->flags & FFA_MTD_FLAG_TYPE_MASK) {
+    case FFA_MTD_FLAG_TYPE_SHARE_MEMORY:
+        /*
+         * If memory is shared, assume it is not safe to execute out of. This
+         * specifically indicates that another party may have access to the
+         * memory.
+         */
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
+        break;
+    case FFA_MTD_FLAG_TYPE_LEND_MEMORY:
+        break;
+    case FFA_MTD_FLAG_TYPE_DONATE_MEMORY:
+        TRACEF("Unexpected donate memory transaction type is not supported\n");
+        return ERR_NOT_IMPLEMENTED;
+    default:
+        TRACEF("Unknown memory transaction type: 0x%x\n", mtd->flags);
+        return ERR_NOT_VALID;
+    }
+
+    switch (mtd->memory_region_attributes & ~FFA_MEM_ATTR_NONSECURE) {
+    case FFA_MEM_ATTR_DEVICE_NGNRE:
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_UNCACHED_DEVICE;
+        break;
+    case FFA_MEM_ATTR_NORMAL_MEMORY_UNCACHED:
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_UNCACHED;
+        break;
+    case (FFA_MEM_ATTR_NORMAL_MEMORY_CACHED_WB | FFA_MEM_ATTR_INNER_SHAREABLE):
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_CACHED;
+        break;
+    default:
+        TRACEF("Invalid memory attributes, 0x%x\n",
+               mtd->memory_region_attributes);
+        return ERR_NOT_VALID;
+    }
+
+    if (!(emad->mapd.memory_access_permissions & FFA_MEM_PERM_RW)) {
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_PERM_RO;
+    }
+    if (emad->mapd.memory_access_permissions & FFA_MEM_PERM_NX) {
+        /*
+         * Don't allow executable mappings if the stage 2 page tables don't
+         * allow it. The hardware allows the stage 2 NX bit to only apply to
+         * EL1, not EL0, but neither FF-A nor LK can currently express this, so
+         * disallow both if FFA_MEM_PERM_NX is set.
+         */
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
+    }
+
+    if (!supports_ns_bit ||
+        (mtd->memory_region_attributes & FFA_MEM_ATTR_NONSECURE)) {
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_NS;
+        /* Regardless of origin, we don't want to execute out of NS memory. */
+        arch_mmu_flags_out |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
+    }
+
+    if (arch_mmu_flags) {
+        *arch_mmu_flags = arch_mmu_flags_out;
+    }
+    if (address_range_count) {
+        *address_range_count = address_range_count_out;
+    }
+
+    return res;
+}
+
+/* This assumes that the fragment is completely composed of memory
+   region descriptors (struct ffa_cons_mrd) */
+status_t arm_ffa_mem_retrieve_next_frag(
+        uint64_t handle,
+        struct arm_ffa_mem_frag_info* frag_info) {
+    status_t res;
+    uint32_t fragment_len;
+
+    mutex_acquire(&ffa_rxtx_buffer_lock);
+
+    res = arm_ffa_call_mem_frag_rx(handle, frag_info->received_len,
+                                   &fragment_len);
+
+    if (res != NO_ERROR) {
+        TRACEF("Failed to get memory retrieve fragment, err = %d\n", res);
+        return res;
+    }
+
+    frag_info->received_len += fragment_len;
+    frag_info->start_index += frag_info->count;
+
+    frag_info->count = fragment_len / sizeof(struct ffa_cons_mrd);
+    if (frag_info->count * sizeof(struct ffa_cons_mrd) != fragment_len) {
+        TRACEF("fragment length %u, contains partial descriptor\n",
+               fragment_len);
+        return ERR_IO;
+    }
+
+    frag_info->address_ranges = ffa_rx;
+
+    return NO_ERROR;
+}
+
+status_t arm_ffa_rx_release(void) {
+    status_t res;
+    ASSERT(is_mutex_held(&ffa_rxtx_buffer_lock));
+
+    if (!supports_rx_release) {
+        res = NO_ERROR;
+    } else {
+        res = arm_ffa_call_rx_release();
+    }
+
+    mutex_release(&ffa_rxtx_buffer_lock);
+
+    if (res == ERR_NOT_SUPPORTED) {
+        TRACEF("Tried to release rx buffer when the operation is not supported!\n");
+    } else if (res != NO_ERROR) {
+        TRACEF("Failed to release rx buffer, err = %d\n", res);
+        return res;
+    }
+    return NO_ERROR;
+}
+
 status_t arm_ffa_mem_relinquish(uint64_t handle) {
     status_t res;
 
@@ -372,6 +808,18 @@
         return res;
     }
 
+    res = arm_ffa_rx_release_is_implemented(&is_implemented);
+    if (res != NO_ERROR) {
+        TRACEF("Error checking if FFA_RX_RELEASE is implemented (err=%d)\n",
+               res);
+        return res;
+    }
+    if (is_implemented) {
+        supports_rx_release = true;
+    } else {
+        TRACEF("FFA_RX_RELEASE is not implemented\n");
+    }
+
     res = arm_ffa_rxtx_map_is_implemented(&is_implemented, &buf_size_log2);
     if (res != NO_ERROR) {
         TRACEF("Error checking if FFA_RXTX_MAP is implemented (err=%d)\n", res);
diff --git a/lib/arm_ffa/include/lib/arm_ffa/arm_ffa.h b/lib/arm_ffa/include/lib/arm_ffa/arm_ffa.h
index 04ad3b9..aaaeb1c 100644
--- a/lib/arm_ffa/include/lib/arm_ffa/arm_ffa.h
+++ b/lib/arm_ffa/include/lib/arm_ffa/arm_ffa.h
@@ -23,6 +23,7 @@
 
 #pragma once
 
+#include <arch/ops.h>
 #include <stdbool.h>
 
 /**
@@ -35,16 +36,6 @@
  */
 bool arm_ffa_is_init(void);
 
-/*
- * TODO: Temporary share variables with lib/sm/shared_mem.c while
- * implementation is being moved to lib/arm_ffa.
- */
-extern uint16_t ffa_local_id;
-extern size_t ffa_buf_size;
-extern bool supports_ns_bit;
-extern void* ffa_tx;
-extern void* ffa_rx;
-
 /**
  * arm_ffa_mem_relinquish() - Relinquish Trusty's access to a memory region.
  * @handle:        Handle of object to relinquish.
@@ -53,3 +44,74 @@
  * reclaim the memory (if it has not been retrieved by anyone else).
  */
 status_t arm_ffa_mem_relinquish(uint64_t handle);
+
+struct arm_ffa_cons_mrd;
+
+/**
+ * struct arm_ffa_mem_frag_info - A fragment of an FF-A shared memory object.
+ * @received_len: Length of the fragment.
+ * @start_index: Index of the address range array where to start reading.
+ * @count: Number of elements in the address range buffer.
+ * @address_ranges: The array of address ranges.
+ */
+struct arm_ffa_mem_frag_info {
+    uint32_t received_len;
+    size_t start_index;
+    uint32_t count;
+    struct ffa_cons_mrd* address_ranges;
+};
+
+/**
+ * arm_ffa_mem_address_range_get() - Gets one address range from the buffer.
+ * @buffer: Buffer that describes a part of an FF-A shared memory object.
+ * @index: The index of the address range to retrieve.
+ * @addr: [out] Start of the retrieved address range.
+ * @size: [out] Size of the retrieved address range.
+ *
+ * Return: 0 on success, LK error code on failure.
+ */
+status_t arm_ffa_mem_address_range_get(struct arm_ffa_mem_frag_info* buffer,
+                                       size_t index,
+                                       paddr_t* addr,
+                                       size_t* size);
+/**
+ * arm_ffa_mem_retrieve_start() - Retrieve a memory region from the
+ *                                SPMC/hypervisor for access from Trusty.
+ * @sender_id: Id of the memory owner.
+ * @handle: The handle identifying the memory region in the transaction.
+ * @tag: The tag identifying the transaction.
+ * @address_range_count: [out] The number of address ranges retrieved.
+ * @arch_mmu_flags: [out] The MMU flags of the received memory.
+ * @frag_info: [out] The shared memory object fragment.
+ *
+ * Only expects one descriptor in the returned memory access descriptor array,
+ * since we don't retrieve memory on behalf of anyone else.
+ *
+ * Grabs RXTX buffer lock. The lock must be subsequently released through
+ * `arm_ffa_rx_release()`.
+ *
+ * Return: 0 on success, LK error code on failure.
+ */
+status_t arm_ffa_mem_retrieve_start(uint16_t sender_id,
+                                    uint64_t handle,
+                                    uint64_t tag,
+                                    uint32_t* address_range_count,
+                                    uint* arch_mmu_flags,
+                                    struct arm_ffa_mem_frag_info* frag_info);
+/**
+ * arm_ffa_mem_retrieve_next_frag() - Performs an FF-A call to retrieve the
+ *                                    next fragment of a shared memory object.
+ * @handle: The handle of the FF-A memory object to retrieve.
+ * @frag_info: [out] the retrieved fragment of the memory object.
+ *
+ * Return: 0 on success, LK error code on failure.
+ */
+status_t arm_ffa_mem_retrieve_next_frag(
+        uint64_t handle,
+        struct arm_ffa_mem_frag_info* frag_info);
+/**
+ * arm_ffa_rx_release() - Relinquish ownership of the RX buffer.
+ *
+ * Return: 0 on success, LK error code on failure.
+ */
+status_t arm_ffa_rx_release(void);
diff --git a/lib/backtrace/backtrace.c b/lib/backtrace/backtrace.c
index fa10216..31b89ce 100644
--- a/lib/backtrace/backtrace.c
+++ b/lib/backtrace/backtrace.c
@@ -27,6 +27,7 @@
 #include <lib/backtrace/backtrace.h>
 #include <lib/backtrace/symbolize.h>
 #include <lib/trusty/trusty_app.h>
+#include <trusty/uuid.h>
 
 /*
  * Traces on release builds look like this for:
@@ -86,11 +87,9 @@
 static void print_stack_address(struct thread* thread, uintptr_t addr) {
 #if TEST_BUILD
     /*
-     * For security reasons, never print absolute addresses in
-     * release builds
+     * For security, never print absolute addresses in release builds.
      */
-    printf("0x%" PRI0xPTR, addr);
-    return;
+    printf("0x%" PRI0xPTR " ", addr);
 #endif
 
     if (is_on_user_stack(thread, addr)) {
@@ -331,7 +330,9 @@
     printf("\nBacktrace for thread: %s\n", thread->name);
     struct trusty_app *app = current_trusty_app();
     if (app) {
-        printf("(app: %s)\n", app->props.app_name);
+        char uuid_str[UUID_STR_SIZE];
+        uuid_to_str(&app->props.uuid, uuid_str);
+        printf("(app: %s uuid: %s)\n", app->props.app_name, uuid_str);
     }
 
     dump_backtrace_etc(thread, &frame);
diff --git a/lib/backtrace/rules.mk b/lib/backtrace/rules.mk
index 497ed26..22b4bc2 100644
--- a/lib/backtrace/rules.mk
+++ b/lib/backtrace/rules.mk
@@ -34,6 +34,7 @@
 MODULE := $(LOCAL_DIR)
 
 MODULE_DEPS := \
+	trusty/kernel/lib/libc-ext \
 	trusty/kernel/lib/trusty \
 
 MODULE_SRCS := \
diff --git a/lib/extmem/include/lib/extmem/extmem.h b/lib/extmem/include/lib/extmem/extmem.h
index 2ddfba9..5f652c0 100644
--- a/lib/extmem/include/lib/extmem/extmem.h
+++ b/lib/extmem/include/lib/extmem/extmem.h
@@ -230,7 +230,7 @@
 /**
  * ext_mem_get_vmm_obj - Lookup shared memory object.
  * @client_id:      Id of external entity where the memory originated.
- * @mem_obj_id:     Id of shared memory opbject to lookup and return.
+ * @mem_obj_id:     Id of shared memory object to lookup and return.
  * @tag:            Tag of the memory. If a non-FF-A object, use 0.
  * @size:           Size hint for object. Caller expects an object at least this
  *                  big.
diff --git a/lib/libc-trusty/rules.mk b/lib/libc-trusty/rules.mk
index 964b48b..f13cbe2 100644
--- a/lib/libc-trusty/rules.mk
+++ b/lib/libc-trusty/rules.mk
@@ -32,15 +32,6 @@
 MODULE_DEPS += lib/heap
 endif
 
-# Generate a random 32-bit seed for the RNG
-KERNEL_LIBC_RANDSEED_HEX := $(shell xxd -l4 -g0 -p /dev/urandom)
-KERNEL_LIBC_RANDSEED := 0x$(KERNEL_LIBC_RANDSEED_HEX)U
-
-MODULE_DEFINES += \
-	KERNEL_LIBC_RANDSEED=$(KERNEL_LIBC_RANDSEED) \
-
-$(info KERNEL_LIBC_RANDSEED = $(KERNEL_LIBC_RANDSEED))
-
 # Clang currently generates incorrect code when it simplifies calls to libc
 # and then inlines them.  The simplification pass does not set a calling
 # convention on the new call, leading to problems when inlining.
@@ -48,7 +39,7 @@
 MODULE_DISABLE_LTO := true
 
 MUSL_DIR := external/trusty/musl
-LK_DIR := external/lk
+LK_DIR := $(LKROOT)
 LIBC_TRUSTY_DIR := trusty/user/base/lib/libc-trusty
 
 MODULE_INCLUDES += \
@@ -115,7 +106,6 @@
 	$(LK_DIR)/lib/libc/eabi_unwind_stubs.c \
 	$(LK_DIR)/lib/libc/io_handle.c \
 	$(LK_DIR)/lib/libc/printf.c \
-	$(LK_DIR)/lib/libc/rand.c \
 	$(LK_DIR)/lib/libc/stdio.c \
 	$(LK_DIR)/lib/libc/strtol.c \
 	$(LK_DIR)/lib/libc/strtoll.c \
@@ -126,6 +116,9 @@
 	$(LK_DIR)/lib/libc/atexit.c \
 	$(LK_DIR)/lib/libc/pure_virtual.cpp
 
+MODULE_DEPS += \
+	$(LK_DIR)/lib/libc/rand
+
 # These stubs are only needed because binder uses libutils which uses pthreads mutex directly
 MODULE_SRCS += \
 	$(LIBC_TRUSTY_DIR)/pthreads.c
diff --git a/lib/memlog/memlog.c b/lib/memlog/memlog.c
index 92a0879..390a2ee 100644
--- a/lib/memlog/memlog.c
+++ b/lib/memlog/memlog.c
@@ -166,7 +166,7 @@
     status_t result;
     struct log_rb* rb;
 
-    if (!IS_PAGE_ALIGNED(sz)) {
+    if (!IS_PAGE_ALIGNED(sz) || sz == 0) {
         return SM_ERR_INVALID_PARAMETERS;
     }
 
diff --git a/lib/metrics/rules.mk b/lib/metrics/rules.mk
index 0edb8e5..c3eaad3 100644
--- a/lib/metrics/rules.mk
+++ b/lib/metrics/rules.mk
@@ -27,7 +27,7 @@
 	$(LOCAL_DIR)/metrics.c \
 
 MODULE_DEPS += \
-	external/lk/lib/dpc \
+	$(LKROOT)/lib/dpc \
 	trusty/kernel/lib/trusty \
 	trusty/user/base/interface/metrics/ \
 
diff --git a/lib/mmutest/mmutest_arm.S b/lib/mmutest/mmutest_arm.S
index 12f5228..fc13647 100644
--- a/lib/mmutest/mmutest_arm.S
+++ b/lib/mmutest/mmutest_arm.S
@@ -99,7 +99,7 @@
 	set_fault_handler	.Lmmutest_setup_fault
 	ldrtne	r1, [r0]
 
-	neg	r1, r1
+	mvn	r1, r1
 
 	set_fault_handler	.Lmmutest_fault
 	streq	r1, [r0]
diff --git a/lib/mmutest/mmutest_arm64.S b/lib/mmutest/mmutest_arm64.S
index da10fa4..2e08c08 100644
--- a/lib/mmutest/mmutest_arm64.S
+++ b/lib/mmutest/mmutest_arm64.S
@@ -92,7 +92,7 @@
 
 	set_fault_handler	.Lmmutest_setup_fault
 	ldr	w3, [x0]
-	neg	w3, w3
+	mvn	w3, w3
 
 	set_fault_handler	.Lmmutest_fault
 	str	w3, [x0]
@@ -104,7 +104,7 @@
 .Lmmutest_arch_store_uint32_user:
 	set_fault_handler	.Lmmutest_setup_fault
 	ldtr	w3, [x0]
-	neg	w3, w3
+	mvn	w3, w3
 
 	set_fault_handler	.Lmmutest_fault
 	sttr	w3, [x0]
diff --git a/lib/mmutest/mmutest_pan_x86.c b/lib/mmutest/mmutest_pan_x86.c
new file mode 100644
index 0000000..9f5ab5a
--- /dev/null
+++ b/lib/mmutest/mmutest_pan_x86.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <arch/x86.h>
+#include <lib/mmutest/mmutest.h>
+
+bool mmutest_arch_pan_supported(void) {
+    return check_smap_avail();
+}
+
+bool mmutest_arch_pan_enabled(void) {
+    return x86_get_cr4() & X86_CR4_SMAP;
+}
diff --git a/lib/mmutest/mmutest_x86.S b/lib/mmutest/mmutest_x86.S
new file mode 100644
index 0000000..25f248a
--- /dev/null
+++ b/lib/mmutest/mmutest_x86.S
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2015, Google Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <asm.h>
+#include <arch/asm_macros.h>
+#include <err.h>
+
+/**
+ * int mmutest_arch_rodata_pnx(void) - Test that rodata section is mapped pnx
+ *
+ * Returns ERR_FAULT if rodata is not executable.
+ * Return 0 if rodata is executable.
+ */
+.section .rodata
+	set_fault_handler	.Lmmutest_fault
+FUNCTION(mmutest_arch_rodata_pnx)
+	xor	%eax, %eax
+
+	ret
+
+/**
+ * int mmutest_arch_data_pnx(void) - Test that data section is mapped pnx
+ *
+ * Returns ERR_FAULT if data is not executable.
+ * Return 0 if data is executable.
+ */
+.section .data
+	set_fault_handler	.Lmmutest_fault
+FUNCTION(mmutest_arch_data_pnx)
+	xor	%eax, %eax
+
+	ret
+.section .text
+
+/**
+ * int mmutest_arch_rodata_ro(void) - Test that rodata section is mapped read-only
+ *
+ * Returns ERR_FAULT if rodata is not writable
+ * Returns 1 if write to rodata is silently dropped
+ * Returns 0 if rodata is writable
+ */
+FUNCTION(mmutest_arch_rodata_ro)
+	leaq	.Ltest_rodata_long(%rip), %rax
+
+	set_fault_handler	.Lmmutest_fault
+	movl	$0, (%rax)
+
+	movl	(%rax), %eax
+	ret
+
+.section .rodata
+	.balign	4
+.Ltest_rodata_long:
+	.long	0x1
+
+.section .text
+
+/**
+ * int mmutest_arch_store_uint32(uint32_t *ptr, bool user) - Test if ptr is writable
+ * @ptr:  Memory location to test
+ * @user: Use unprivileged store
+ *
+ * Returns ERR_FAULT if ptr is not writable
+ * Returns ERR_GENERIC if ptr is not readable
+ * Returns 2 if write does not fault, but data is lost on readback from memory
+ * Returns 1 if write does not fault, but data is lost on readback from cache
+ * Returns 0 if ptr is writable
+ */
+FUNCTION(mmutest_arch_store_uint32)
+	test	%sil, %sil
+	jnz	.Lmmutest_arch_store_uint32_user
+
+	set_fault_handler	.Lmmutest_setup_fault
+	movl	(%rdi), %edx
+	not	%edx
+
+	set_fault_handler	.Lmmutest_fault
+	movl	%edx, (%rdi)
+	mfence
+	movl	(%rdi), %ecx
+
+	jmp	.Lmmutest_arch_store_uint32_str_done
+
+.Lmmutest_arch_store_uint32_user:
+	/* TODO: call helper functions to write then read from user-space */
+	movl	$ERR_NOT_IMPLEMENTED, %eax
+	ret
+
+.Lmmutest_arch_store_uint32_str_done:
+
+	cmp	%edx, %ecx
+	jne	.Lmmutest_arch_store_uint32_cache_read_mismatch
+
+	push	%rdi
+	push	%rsi
+	push	%rdx
+	movq	$4, %rsi
+	call	arch_clean_invalidate_cache_range
+	pop	%rdx
+	pop	%rsi
+	pop	%rdi
+
+	test	%sil, %sil
+	jnz	.Lmmutest_arch_store_uint32_memory_reload_user
+
+	movl	(%rdi), %ecx
+	jmp	.Lmmutest_arch_store_uint32_memory_reload_done
+
+.Lmmutest_arch_store_uint32_memory_reload_user:
+	/* TODO: call helper function to read from user-space */
+
+.Lmmutest_arch_store_uint32_memory_reload_done:
+	cmp	%edx, %ecx
+	jne	.Lmmutest_arch_store_uint32_memory_mismatch
+
+	xor	%eax, %eax
+	ret
+
+.Lmmutest_arch_store_uint32_cache_read_mismatch:
+	movl	$1, %eax
+	ret
+
+.Lmmutest_arch_store_uint32_memory_mismatch:
+	movl	$2, %eax
+	ret
+
+/**
+ * int mmutest_arch_nop(int ret) - Return ret
+ *
+ * Returns ret if run from executable page.
+ * Does not return if run from non-executable page.
+ */
+FUNCTION(mmutest_arch_nop)
+	ret
+FUNCTION(mmutest_arch_nop_end)
+
+.Lmmutest_setup_fault:
+	movl	$ERR_GENERIC, %eax
+	ret
+
+.Lmmutest_fault:
+	movl	$ERR_FAULT, %eax
+	ret
diff --git a/lib/shared/binder_discover/binder_discover.cpp b/lib/shared/binder_discover/binder_discover.cpp
index 3cb4274..ec6df76 100644
--- a/lib/shared/binder_discover/binder_discover.cpp
+++ b/lib/shared/binder_discover/binder_discover.cpp
@@ -89,8 +89,8 @@
             android::RpcTransportCtxFactoryTipcTrusty::make());
     android::status_t status = sess->setupPreconnectedClient({}, [=]() {
         int srv_fd = connect(port, IPC_CONNECT_WAIT_FOR_PORT);
-        return srv_fd >= 0 ? android::base::unique_fd(srv_fd)
-                           : android::base::unique_fd();
+        return srv_fd >= 0 ? android::binder::unique_fd(srv_fd)
+                           : android::binder::unique_fd();
     });
     if (status != android::OK) {
         return status;
diff --git a/lib/shared/binder_discover/rules.mk b/lib/shared/binder_discover/rules.mk
index cb99920..cec24ed 100644
--- a/lib/shared/binder_discover/rules.mk
+++ b/lib/shared/binder_discover/rules.mk
@@ -32,7 +32,7 @@
 
 ifeq ($(call TOBOOL,$(TRUSTY_USERSPACE)),true)
 MODULE_LIBRARY_EXPORTED_DEPS := \
-	external/lk/lib/binary_search_tree \
+	$(LKROOT)/lib/binary_search_tree \
 	frameworks/native/libs/binder/trusty \
 	trusty/user/base/lib/libstdc++-trusty \
 
diff --git a/lib/shared/device_tree/client/device_tree.cpp b/lib/shared/device_tree/client/device_tree.cpp
index e53d9ae..b4dd890 100644
--- a/lib/shared/device_tree/client/device_tree.cpp
+++ b/lib/shared/device_tree/client/device_tree.cpp
@@ -23,6 +23,7 @@
 #include <lib/shared/device_tree/device_tree.h>
 #include <lib/shared/ibinder/macros.h>
 #include <stdio.h>
+#include <trusty_log.h>
 #include <uapi/err.h>
 
 #include <com/android/trusty/device_tree/IDeviceTree.h>
diff --git a/lib/shared/device_tree/service/device_tree_service.cpp b/lib/shared/device_tree/service/device_tree_service.cpp
index f38f0da..197fdb8 100644
--- a/lib/shared/device_tree/service/device_tree_service.cpp
+++ b/lib/shared/device_tree/service/device_tree_service.cpp
@@ -18,6 +18,7 @@
 
 #include <lk/compiler.h>
 #include <lk/trace.h>
+#include <trusty_log.h>
 
 #include <libfdt.h>
 
diff --git a/lib/sm/include/lib/sm.h b/lib/sm/include/lib/sm.h
index 1153672..d9f8dd4 100644
--- a/lib/sm/include/lib/sm.h
+++ b/lib/sm/include/lib/sm.h
@@ -83,6 +83,9 @@
 /* Version */
 long smc_sm_api_version(struct smc32_args* args);
 
+/* SMP mode */
+long smc_get_smp_max_cpus(struct smc32_args* args);
+
 /* Interrupt controller irq/fiq support */
 long smc_intc_get_next_irq(struct smc32_args* args);
 /* return 0 to enter ns-fiq handler, return non-0 to return */
diff --git a/lib/sm/include/lib/sm/smcall.h b/lib/sm/include/lib/sm/smcall.h
index 0301187..0b1c539 100644
--- a/lib/sm/include/lib/sm/smcall.h
+++ b/lib/sm/include/lib/sm/smcall.h
@@ -139,6 +139,14 @@
 
 #define SMC_FC_FIQ_RESUME SMC_FASTCALL_NR(SMC_ENTITY_SECURE_MONITOR, 12)
 
+/**
+ * SMC_FC_GET_SMP_MAX_CPUS - Find max number of cpus supported by Trusty.
+ *
+ * This call must be made before booting secondary cpus as Trusty
+ * may support less number of cpus and crash if execution switches to them
+ */
+#define SMC_FC_GET_SMP_MAX_CPUS SMC_FASTCALL_NR(SMC_ENTITY_SECURE_MONITOR, 13)
+
 /* TRUSTED_OS entity calls */
 #define SMC_SC_VIRTIO_GET_DESCR SMC_STDCALL_NR(SMC_ENTITY_TRUSTED_OS, 20)
 #define SMC_SC_VIRTIO_START SMC_STDCALL_NR(SMC_ENTITY_TRUSTED_OS, 21)
diff --git a/lib/sm/shared_mem.c b/lib/sm/shared_mem.c
index b6dd7e1..6abc437 100644
--- a/lib/sm/shared_mem.c
+++ b/lib/sm/shared_mem.c
@@ -45,8 +45,6 @@
     struct ext_mem_obj ext_mem_obj;
 };
 
-static mutex_t sm_mem_ffa_lock = MUTEX_INITIAL_VALUE(sm_mem_ffa_lock);
-
 static void sm_mem_obj_compat_destroy(struct vmm_obj* vmm_obj) {
     struct ext_mem_obj* obj = containerof(vmm_obj, struct ext_mem_obj, vmm_obj);
     free(obj);
@@ -127,12 +125,10 @@
 
     DEBUG_ASSERT(obj);
 
-    mutex_acquire(&sm_mem_ffa_lock);
     ret = arm_ffa_mem_relinquish(obj->ext_mem_obj.id);
     if (ret != NO_ERROR) {
         TRACEF("Failed to relinquish the shared memory (%d)\n", ret);
     }
-    mutex_release(&sm_mem_ffa_lock);
 
     free(obj);
 }
@@ -172,320 +168,96 @@
     return obj;
 }
 
-/**
- * ffa_mem_retrieve_req - Call SPM/Hypervisor to retrieve memory region.
- * @sender_id:  FF-A vm id of sender.
- * @handle:     FF-A allocated handle.
+/* sm_mem_get_vmm_obj - Looks up a shared memory object using FF-A.
+ * @client_id:      Id of external entity where the memory originated.
+ * @mem_obj_id:     Id of shared memory object to lookup and return.
+ * @tag:            Tag of the memory.
+ * @size:           Size hint for object. Caller expects an object at least this
+ *                  big.
+ * @objp:           Pointer to return object in.
+ * @obj_ref:        Reference to *@objp.
  *
- * Helper function to start retrieval. Does not process result.
- *
- * Return: &struct smc_ret8.
+ * Return: 0 on success. ERR_NOT_FOUND if @id does not exist.
  */
-static struct smc_ret8 ffa_mem_retrieve_req(uint16_t sender_id,
-                                            uint64_t handle,
-                                            uint64_t tag) {
-    struct ffa_mtd* req = ffa_tx;
-
-    DEBUG_ASSERT(is_mutex_held(&sm_mem_ffa_lock));
-
-    req->sender_id = sender_id;
-
-    /* Accept any memory region attributes. */
-    req->memory_region_attributes = 0;
-
-    req->reserved_3 = 0;
-    req->flags = 0;
-    req->handle = handle;
-
-    /* We must use the same tag as the one used by the sender to retrieve. */
-    req->tag = tag;
-    req->reserved_24_27 = 0;
-
-    /*
-     * We only support retrieving memory for ourselves for now.
-     * TODO: Also support stream endpoints. Possibly more than one.
-     */
-    req->emad_count = 1;
-    req->emad[0].mapd.endpoint_id = ffa_local_id;
-
-    /* Accept any memory access permissions. */
-    req->emad[0].mapd.memory_access_permissions = 0;
-    req->emad[0].mapd.flags = 0;
-
-    /*
-     * Set composite memory region descriptor offset to 0 to indicate that the
-     * relayer should allocate the address ranges. Other values will not work
-     * for relayers that use identity maps (e.g. EL3).
-     */
-    req->emad[0].comp_mrd_offset = 0;
-    req->emad[0].reserved_8_15 = 0;
-
-    size_t len = offsetof(struct ffa_mtd, emad[1]);
-
-    /* Start FFA_MEM_RETRIEVE_REQ. */
-    return smc8(SMC_FC_FFA_MEM_RETRIEVE_REQ, len, len, 0, 0, 0, 0, 0);
-}
-
-/**
- * ffa_mem_retrieve - Call SPM/Hypervisor to retrieve memory region.
- * @sender_id:  FF-A vm id of sender.
- * @handle:     FF-A allocated handle.
- * @objp:       Pointer to return object in.
- * @obj_ref:    Reference to *@objp.
- *
- * Return: 0 on success, lk error code on failure.
- */
-static int ffa_mem_retrieve(uint16_t sender_id,
-                            uint64_t handle,
-                            uint64_t tag,
-                            struct vmm_obj** objp,
-                            struct obj_ref* obj_ref) {
-    struct smc_ret8 smc_ret;
-    struct ffa_mtd* resp = ffa_rx;
-    struct ffa_emad* emad = resp->emad;
+static status_t sm_mem_get_vmm_obj(ext_mem_client_id_t client_id,
+                                   ext_mem_obj_id_t mem_obj_id,
+                                   uint64_t tag,
+                                   size_t size,
+                                   struct vmm_obj** objp,
+                                   struct obj_ref* obj_ref) {
+    int ret;
+    struct arm_ffa_mem_frag_info frag_info;
+    uint32_t address_range_count;
+    uint arch_mmu_flags;
     struct sm_mem_obj* obj;
     struct obj_ref tmp_obj_ref = OBJ_REF_INITIAL_VALUE(tmp_obj_ref);
-    int ret;
-    uint arch_mmu_flags;
-    struct ffa_comp_mrd* comp_mrd;
 
-    DEBUG_ASSERT(is_mutex_held(&sm_mem_ffa_lock));
     DEBUG_ASSERT(objp);
     DEBUG_ASSERT(obj_ref);
 
-    if (!ffa_tx) {
-        TRACEF("no FF-A buffer\n");
-        return ERR_NOT_READY;
+    if ((client_id & 0xffff) != client_id) {
+        TRACEF("Invalid client ID\n");
+        return ERR_INVALID_ARGS;
     }
 
-    smc_ret = ffa_mem_retrieve_req(sender_id, handle, tag);
-    if ((uint32_t)smc_ret.r0 != SMC_FC_FFA_MEM_RETRIEVE_RESP) {
-        TRACEF("bad reply: 0x%lx 0x%lx 0x%lx\n", smc_ret.r0, smc_ret.r1,
-               smc_ret.r2);
-        return ERR_IO;
+    ret = arm_ffa_mem_retrieve_start((uint16_t)client_id, mem_obj_id, tag,
+                                     &address_range_count, &arch_mmu_flags,
+                                     &frag_info);
+
+    if (ret != NO_ERROR) {
+        TRACEF("Failed to get FF-A memory buffer, err=%d\n", ret);
+        goto err_mem_get_access;
     }
-    size_t total_len = (uint32_t)smc_ret.r1;
-    size_t fragment_len = (uint32_t)smc_ret.r2;
-
-    /*
-     * We don't retrieve the memory on behalf of anyone else, so we only
-     * expect one receiver address range descriptor.
-     */
-    if (resp->emad_count != 1) {
-        TRACEF("unexpected response count %d != 1\n", resp->emad_count);
-    }
-
-    switch (resp->flags & FFA_MTD_FLAG_TYPE_MASK) {
-    case FFA_MTD_FLAG_TYPE_SHARE_MEMORY:
-    case FFA_MTD_FLAG_TYPE_LEND_MEMORY:
-        break;
-    default:
-        /* Donate or an unknown sharing type */
-        TRACEF("Unknown transfer kind: 0x%x\n",
-               resp->flags & FFA_MTD_FLAG_TYPE_MASK);
-        return ERR_IO;
-    }
-
-    /* Check that the first fragment contains the entire header. */
-    size_t header_size = offsetof(struct ffa_mtd, emad[1]);
-    if (fragment_len < header_size) {
-        TRACEF("fragment length %zd too short\n", fragment_len);
-        return ERR_IO;
-    }
-
-    /* Check that the first fragment fits in our buffer */
-    if (fragment_len > ffa_buf_size) {
-        TRACEF("fragment length %zd larger than buffer size\n", fragment_len);
-        return ERR_IO;
-    }
-
-    size_t comp_mrd_offset = emad->comp_mrd_offset;
-
-    /*
-     * We have already checked that fragment_len is larger than *resp. Since
-     * *comp_mrd is smaller than that (verified here), the fragment_len -
-     * sizeof(*comp_mrd) subtraction below will never underflow.
-     */
-    STATIC_ASSERT(sizeof(*resp) >= sizeof(*comp_mrd));
-
-    if (comp_mrd_offset > fragment_len - sizeof(*comp_mrd)) {
-        TRACEF("fragment length %zd too short for comp_mrd_offset %zd\n",
-               fragment_len, comp_mrd_offset);
-        return ERR_IO;
-    }
-    comp_mrd = (void*)resp + comp_mrd_offset;
-
-    /*
-     * Set arch_mmu_flags based on mem_attr returned.
-     */
-    switch (resp->memory_region_attributes & ~FFA_MEM_ATTR_NONSECURE) {
-    case FFA_MEM_ATTR_DEVICE_NGNRE:
-        arch_mmu_flags = ARCH_MMU_FLAG_UNCACHED_DEVICE;
-        break;
-    case FFA_MEM_ATTR_NORMAL_MEMORY_UNCACHED:
-        arch_mmu_flags = ARCH_MMU_FLAG_UNCACHED;
-        break;
-    case (FFA_MEM_ATTR_NORMAL_MEMORY_CACHED_WB | FFA_MEM_ATTR_INNER_SHAREABLE):
-        arch_mmu_flags = ARCH_MMU_FLAG_CACHED;
-        break;
-    default:
-        TRACEF("unsupported memory attributes, 0x%x\n",
-               resp->memory_region_attributes);
-        return ERR_NOT_SUPPORTED;
-    }
-
-    if (!supports_ns_bit || (resp->memory_region_attributes & FFA_MEM_ATTR_NONSECURE)) {
-        arch_mmu_flags |= ARCH_MMU_FLAG_NS;
-    } else {
-        LTRACEF("secure memory path triggered\n");
-    }
-
-    if (!(emad->mapd.memory_access_permissions & FFA_MEM_PERM_RW)) {
-        arch_mmu_flags |= ARCH_MMU_FLAG_PERM_RO;
-    }
-    if (emad->mapd.memory_access_permissions & FFA_MEM_PERM_NX) {
-        /*
-         * Don't allow executable mappings if the stage 2 page tables don't
-         * allow it. The hardware allows the stage 2 NX bit to only apply to
-         * EL1, not EL0, but neither FF-A nor LK can currently express this, so
-         * disallow both if FFA_MEM_PERM_NX is set.
-         */
-        arch_mmu_flags |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
-    }
-
-    if ((resp->flags & FFA_MTD_FLAG_TYPE_MASK) ==
-        FFA_MTD_FLAG_TYPE_SHARE_MEMORY) {
-        /*
-         * If memory is shared, assume it is not safe to execute out of. This
-         * specifically indicates that another party may have access to the
-         * memory.
-         */
-        arch_mmu_flags |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
-    }
-
-    /*
-     * Regardless of origin, we don't want to execute out of NS memory.
-     */
-    if (arch_mmu_flags & ARCH_MMU_FLAG_NS) {
-        arch_mmu_flags |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
-    }
-
-    /*
-     * Check that the overall length of the message matches the expected length
-     * for the number of entries specified in the header.
-     */
-    uint32_t address_range_descriptor_count = comp_mrd->address_range_count;
-    size_t expected_len =
-            comp_mrd_offset +
-            offsetof(struct ffa_comp_mrd,
-                     address_range_array[address_range_descriptor_count]);
-    if (total_len != expected_len) {
-        TRACEF("length mismatch smc %zd != computed %zd for count %d\n",
-               total_len, expected_len, address_range_descriptor_count);
-        return ERR_IO;
-    }
-
-    header_size = comp_mrd_offset + sizeof(*comp_mrd);
-
-    struct ffa_cons_mrd* desc = comp_mrd->address_range_array;
-
-    /*
-     * Compute full descriptor count and size of partial descriptor in first
-     * fragment.
-     */
-    size_t desc_count = (fragment_len - header_size) / sizeof(*desc);
-    if (desc_count * sizeof(*desc) + header_size != fragment_len) {
-        TRACEF("fragment length %zd, contains partial descriptor\n",
-               fragment_len);
-        return ERR_IO;
-    }
-
-    /* The first fragment should not be larger than the whole message */
-    if (desc_count > address_range_descriptor_count) {
-        TRACEF("bad fragment length %zd > %zd\n", fragment_len, total_len);
-        return ERR_IO;
-    }
-
-    LTRACEF("handle %" PRId64 ", desc count %d\n", handle,
-            address_range_descriptor_count);
-
-    /* Allocate a new shared memory object. */
-    obj = sm_mem_alloc_obj(sender_id, handle, tag,
-                           address_range_descriptor_count, arch_mmu_flags,
-                           &tmp_obj_ref);
+    obj = sm_mem_alloc_obj(client_id, mem_obj_id, tag, address_range_count,
+                           arch_mmu_flags, &tmp_obj_ref);
     if (!obj) {
-        return ERR_NO_MEMORY;
+        TRACEF("Failed to allocate a shared memory object\n");
+        ret = ERR_NO_MEMORY;
+        goto err_mem_alloc_obj;
     }
 
-    for (uint ri = 0, di = 0; ri < address_range_descriptor_count; ri++, di++) {
-        if (di >= desc_count) {
-            mutex_release(&sm_mem_ffa_lock);
-            /* Drop lock to allow interleaving large object retrieval */
-            mutex_acquire(&sm_mem_ffa_lock);
-            /*
-             * All descriptors in this fragment has been consumed.
-             * Fetch next fragment from the SPM/Hypervisor.
-             */
-            smc_ret = smc8(SMC_FC_FFA_MEM_FRAG_RX, (uint32_t)handle,
-                           handle >> 32, fragment_len, 0, 0, 0, 0);
-            if ((uint32_t)smc_ret.r0 != SMC_FC_FFA_MEM_FRAG_TX) {
-                TRACEF("bad reply: 0x%lx 0x%lx 0x%lx\n", smc_ret.r0, smc_ret.r1,
-                       smc_ret.r2);
-                ret = ERR_IO;
-                goto err_mem_frag_rx;
-            }
-            fragment_len += (uint32_t)smc_ret.r3;
-
-            desc = ffa_rx;
-            di = 0;
-
-            /*
-             * Compute descriptor count in this fragment.
-             */
-            desc_count = ((uint32_t)smc_ret.r3) / sizeof(*desc);
-            if ((uint32_t)smc_ret.r3 != desc_count * sizeof(*desc)) {
-                TRACEF("fragment length %ld, contains partial descriptor\n",
-                       smc_ret.r3);
-                ret = ERR_IO;
-                goto err_bad_data;
+    for (uint32_t i = 0; i < address_range_count; i++) {
+        if (frag_info.start_index + frag_info.count <= i) {
+            arm_ffa_rx_release();
+            ret = arm_ffa_mem_retrieve_next_frag(mem_obj_id, &frag_info);
+            if (ret != NO_ERROR) {
+                TRACEF("Failed to get next fragment, err=%d\n", ret);
+                goto err_mem_next_frag;
             }
         }
-
-        /* Copy one descriptor into object */
-        obj->ext_mem_obj.page_runs[ri].paddr = desc[di].address;
-        if (desc[di].page_count < 1 ||
-            ((size_t)desc[di].page_count > (SIZE_MAX / FFA_PAGE_SIZE))) {
-            TRACEF("bad page count 0x%x at %d/%d %d/%zd\n", desc[di].page_count,
-                   ri, address_range_descriptor_count, di, desc_count);
-            ret = ERR_IO;
-            goto err_bad_data;
+        ret = arm_ffa_mem_address_range_get(
+                &frag_info, i, &obj->ext_mem_obj.page_runs[i].paddr,
+                &obj->ext_mem_obj.page_runs[i].size);
+        if (ret != NO_ERROR) {
+            TRACEF("Failed to get address range, err=%d\n", ret);
+            goto err_mem_address_range;
         }
-        obj->ext_mem_obj.page_runs[ri].size =
-                (size_t)desc[di].page_count * FFA_PAGE_SIZE;
-        LTRACEF("added ns memory at 0x%" PRIxPADDR ", size %zd, %d/%d %d/%zd\n",
-                obj->ext_mem_obj.page_runs[ri].paddr,
-                obj->ext_mem_obj.page_runs[ri].size, ri,
-                address_range_descriptor_count, di, desc_count);
     }
 
     /* No lock needed as the object is not yet visible to anyone else */
     obj_ref_transfer(obj_ref, &tmp_obj_ref);
     *objp = &obj->ext_mem_obj.vmm_obj;
 
+    arm_ffa_rx_release();
+
     return 0;
 
-err_mem_frag_rx:
-err_bad_data:
+err_mem_address_range:
+err_mem_next_frag:
     DEBUG_ASSERT(obj_ref_active(&tmp_obj_ref));
     vmm_obj_del_ref(&obj->ext_mem_obj.vmm_obj, &tmp_obj_ref);
 
+err_mem_alloc_obj:
+err_mem_get_access:
+    arm_ffa_rx_release();
     return ret;
 }
 
 /*
  * ext_mem_get_vmm_obj - Lookup or create shared memory object.
  * @client_id:  Id of external entity where the memory originated.
- * @mem_obj_id: Id of shared memory opbject to lookup and return.
+ * @mem_obj_id: Id of shared memory object to lookup and return.
+ * @tag:        Value to identify the transaction.
  * @size:       Size hint for object.
  * @objp:       Pointer to return object in.
  * @obj_ref:    Reference to *@objp.
@@ -499,22 +271,17 @@
                              size_t size,
                              struct vmm_obj** objp,
                              struct obj_ref* obj_ref) {
-    int ret;
-
-    if (client_id == 0 && tag == 0 &&
-        sm_get_api_version() < TRUSTY_API_VERSION_MEM_OBJ) {
-        /* If client is not running under a hypervisor allow using old api. */
+    if (sm_get_api_version() >= TRUSTY_API_VERSION_MEM_OBJ) {
+        return sm_mem_get_vmm_obj(client_id, mem_obj_id, tag, size, objp,
+                                  obj_ref);
+    } else if (!client_id && !tag) {
+        /* If client is not running under a hypervisor allow using
+           old api. */
         return sm_mem_compat_get_vmm_obj(client_id, mem_obj_id, size, objp,
                                          obj_ref);
+    } else {
+        return ERR_NOT_SUPPORTED;
     }
-
-    mutex_acquire(&sm_mem_ffa_lock);
-
-    ret = ffa_mem_retrieve((uint16_t)client_id, mem_obj_id, tag, objp, obj_ref);
-
-    mutex_release(&sm_mem_ffa_lock);
-
-    return ret;
 }
 
 /**
diff --git a/lib/sm/sm.c b/lib/sm/sm.c
index bfe64a1..81db962 100644
--- a/lib/sm/sm.c
+++ b/lib/sm/sm.c
@@ -102,6 +102,10 @@
     return api_version;
 }
 
+long smc_get_smp_max_cpus(struct smc32_args* args) {
+    return SMP_MAX_CPUS;
+}
+
 uint32_t sm_get_api_version(void) {
     return sm_api_version;
 }
diff --git a/lib/sm/smcall.c b/lib/sm/smcall.c
index 3287d8a..5837145 100644
--- a/lib/sm/smcall.c
+++ b/lib/sm/smcall.c
@@ -149,6 +149,7 @@
 #endif
         [SMC_FUNCTION(SMC_FC_GET_VERSION_STR)] = smc_get_version_str,
         [SMC_FUNCTION(SMC_FC_API_VERSION)] = smc_sm_api_version,
+        [SMC_FUNCTION(SMC_FC_GET_SMP_MAX_CPUS)] = smc_get_smp_max_cpus,
 };
 
 static long smc_fastcall_secure_monitor(struct smc32_args* args) {
diff --git a/lib/trusty/include/lib/trusty/ipc.h b/lib/trusty/include/lib/trusty/ipc.h
index 8fb640e..3b839ca 100644
--- a/lib/trusty/include/lib/trusty/ipc.h
+++ b/lib/trusty/include/lib/trusty/ipc.h
@@ -198,4 +198,21 @@
  */
 int ipc_port_check_access(uint32_t port_flags, const uuid_t* uuid);
 
+#if TEST_BUILD
+/**
+ * ipc_get_port_list() - List all ports used by application
+ * @out_port_list: pointer to list of ports. must be freed by caller
+ *
+ * Return: The size of the list if positive. An Error if negative.
+ */
+int ipc_get_port_list(struct ipc_port** out_port_list);
+
+/**
+ * ipc_free_port_list() - Free the given pointer to port list
+ * @out_port_list: pointer to list of ports.
+ *
+ */
+void ipc_free_port_list(struct ipc_port* out_port_list);
+#endif
+
 __END_CDECLS
diff --git a/lib/trusty/include/lib/trusty/tipc_virtio_dev.h b/lib/trusty/include/lib/trusty/tipc_virtio_dev.h
index 8aaa77d..add1356 100644
--- a/lib/trusty/include/lib/trusty/tipc_virtio_dev.h
+++ b/lib/trusty/include/lib/trusty/tipc_virtio_dev.h
@@ -30,6 +30,8 @@
 
 struct tipc_dev;
 
+struct trusty_virtio_bus;
+
 /*
  * This ID has to match to the value defined in virtio_ids.h on Linux side
  */
@@ -84,7 +86,8 @@
 /*
  *  Create TIPC device and register it witth virtio subsystem
  */
-status_t create_tipc_device(const struct tipc_vdev_descr* descr,
+status_t create_tipc_device(struct trusty_virtio_bus* vb,
+                            const struct tipc_vdev_descr* descr,
                             size_t descr_sz,
                             const uuid_t* uuid,
                             struct tipc_dev** dev_ptr);
diff --git a/lib/trusty/include/lib/trusty/trusty_app.h b/lib/trusty/include/lib/trusty/trusty_app.h
index 505be84..c06e355 100644
--- a/lib/trusty/include/lib/trusty/trusty_app.h
+++ b/lib/trusty/include/lib/trusty/trusty_app.h
@@ -137,7 +137,7 @@
 struct trusty_thread {
     vaddr_t stack_start;
     size_t stack_size;
-#if USER_SCS_ENABLED
+#if USER_SCS_SUPPORTED
     vaddr_t shadow_stack_base;
     size_t shadow_stack_size;
 #endif
diff --git a/lib/trusty/ipc.c b/lib/trusty/ipc.c
index 7e28edd..c9c765f 100644
--- a/lib/trusty/ipc.c
+++ b/lib/trusty/ipc.c
@@ -193,6 +193,30 @@
     return ret;
 }
 
+#if TEST_BUILD
+int ipc_get_port_list(struct ipc_port** out_port_list) {
+    struct ipc_port* port;
+
+    mutex_acquire(&ipc_port_lock);
+    int len = list_length(&ipc_port_list);
+    *out_port_list = calloc(sizeof(struct ipc_port), len);
+    if (out_port_list == NULL) {
+        return ERR_NO_MEMORY;
+    }
+    struct ipc_port* current_port = *out_port_list;
+    list_for_every_entry(&ipc_port_list, port, struct ipc_port, node) {
+        memcpy(current_port, port, sizeof(struct ipc_port));
+        ++current_port;
+    }
+    mutex_release(&ipc_port_lock);
+    return len;
+}
+
+void ipc_free_port_list(struct ipc_port* out_port_list) {
+    free(out_port_list);
+}
+#endif
+
 static void add_to_waiting_for_port_list_locked(struct ipc_chan* client) {
     DEBUG_ASSERT(client);
     DEBUG_ASSERT(!list_in_list(&client->node));
diff --git a/lib/trusty/ipc_msg.c b/lib/trusty/ipc_msg.c
index a90288e..1842811 100644
--- a/lib/trusty/ipc_msg.c
+++ b/lib/trusty/ipc_msg.c
@@ -192,7 +192,7 @@
 static ssize_t kern_msg_write_locked(struct ipc_msg_queue* mq,
                                      struct msg_item* item,
                                      const struct ipc_msg_kern* msg) {
-    ssize_t ret;
+    ssize_t ret = NO_ERROR;
     uint8_t* buf = msg_queue_get_buf(mq, item);
 
     if (msg->num_handles) {
@@ -205,11 +205,14 @@
             return ERR_INVALID_ARGS;
     }
 
-    /* copy message body */
-    ret = kern_iovec_to_membuf(
-            buf, mq->item_sz, (const struct iovec_kern*)msg->iov, msg->num_iov);
-    if (ret < 0)
-        return ret;
+    /* copy any message body */
+    if (likely(msg->num_iov)) {
+        ret = kern_iovec_to_membuf(buf, mq->item_sz,
+                                   (const struct iovec_kern*)msg->iov,
+                                   msg->num_iov);
+        if (ret < 0)
+            return ret;
+    }
 
     /* copy attached handles */
     for (uint i = 0; i < msg->num_handles; i++) {
@@ -393,7 +396,7 @@
                                 int32_t msg_id,
                                 uint32_t offset,
                                 struct ipc_msg_kern* kmsg) {
-    int ret;
+    int ret = 0;
     struct msg_item* item;
 
     item = msg_check_read_item(mq, msg_id, offset);
@@ -403,10 +406,12 @@
     const uint8_t* buf = msg_queue_get_buf(mq, item) + offset;
     size_t bytes_left = item->len - offset;
 
-    ret = membuf_to_kern_iovec((const struct iovec_kern*)kmsg->iov,
-                               kmsg->num_iov, buf, bytes_left);
-    if (ret < 0)
-        return ret;
+    if (likely(kmsg->num_iov)) {
+        ret = membuf_to_kern_iovec((const struct iovec_kern*)kmsg->iov,
+                                   kmsg->num_iov, buf, bytes_left);
+        if (ret < 0)
+            return ret;
+    }
 
     uint hcnt = MIN(kmsg->num_handles, item->num_handles);
     for (uint i = 0; i < hcnt; i++) {
diff --git a/lib/trusty/is_ns_client.c b/lib/trusty/is_ns_client.c
new file mode 100644
index 0000000..5c80e1e
--- /dev/null
+++ b/lib/trusty/is_ns_client.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2014-2015, Google, Inc. All rights reserved
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <lib/trusty/ipc.h>
+
+/*
+ *  Returns true if uuid is associated with NS client.
+ */
+bool is_ns_client(const uuid_t* uuid) {
+    if (uuid == &zero_uuid)
+        return true;
+
+    return false;
+}
diff --git a/lib/trusty/rules.mk b/lib/trusty/rules.mk
index c877bb8..ef1b0cb 100644
--- a/lib/trusty/rules.mk
+++ b/lib/trusty/rules.mk
@@ -72,8 +72,10 @@
 endif
 
 ifneq (true,$(call TOBOOL,$(WITH_CUSTOM_TRUSTY_IPC_CONFIG)))
-MODULE_SRCS += \
-	$(LOCAL_DIR)/tipc_config.c
+MODULE_SRCS += $(LOCAL_DIR)/is_ns_client.c
+ifeq (true, $(call TOBOOL,$(WITH_TRUSTY_VIRTIO_IPC_DEV)))
+MODULE_SRCS += $(LOCAL_DIR)/tipc_config.c
+endif
 endif
 endif
 
@@ -81,6 +83,7 @@
 	$(LOCAL_DIR)/include \
 
 MODULE_DEPS += \
+	lib/binary_search_tree \
 	lib/syscall \
 	trusty/kernel/lib/app_manifest \
 	trusty/kernel/lib/backtrace \
diff --git a/lib/trusty/smcall.c b/lib/trusty/smcall.c
index 99886b3..db8eff6 100644
--- a/lib/trusty/smcall.c
+++ b/lib/trusty/smcall.c
@@ -136,11 +136,11 @@
         break;
 
     case SMC_SC_VDEV_RESET:
-        res = virtio_device_reset(args->params[0]);
+        res = virtio_device_reset(args->client_id, args->params[0]);
         break;
 
     case SMC_SC_VDEV_KICK_VQ:
-        res = virtio_kick_vq(args->params[0], args->params[1]);
+        res = virtio_kick_vq(args->client_id, args->params[0], args->params[1]);
         break;
 
     case SMC_SC_CREATE_QL_TIPC_DEV:
@@ -183,7 +183,7 @@
 
     switch (args->params[0]) {
     case SMC_NC_VDEV_KICK_VQ:
-        res = virtio_kick_vq(args->params[1], args->params[2]);
+        res = virtio_kick_vq(args->client_id, args->params[1], args->params[2]);
         break;
 
     default:
diff --git a/lib/trusty/tipc_config.c b/lib/trusty/tipc_config.c
index bddd6e3..76c2527 100644
--- a/lib/trusty/tipc_config.c
+++ b/lib/trusty/tipc_config.c
@@ -28,29 +28,29 @@
 #include <lib/trusty/tipc_virtio_dev.h>
 #include <lk/init.h>
 
+#include "trusty_virtio.h"
+
 /* Default TIPC device (/dev/trusty-ipc-dev0) */
 DECLARE_TIPC_DEVICE_DESCR(_descr0, 0, 32, 32, "dev0");
 
-/*
- *  Returns true if uuid is associated with NS client.
- */
-bool is_ns_client(const uuid_t* uuid) {
-    if (uuid == &zero_uuid)
-        return true;
-
-    return false;
-}
-
-static void tipc_init(uint level) {
+static status_t tipc_init(struct trusty_virtio_bus* vb) {
     status_t res;
 
-    res = create_tipc_device(&_descr0, sizeof(_descr0), &zero_uuid, NULL);
+    res = create_tipc_device(vb, &_descr0, sizeof(_descr0), &zero_uuid, NULL);
     if (res != NO_ERROR) {
         TRACEF("WARNING: failed (%d) to register tipc device\n", res);
     }
+    return res;
 }
 
-LK_INIT_HOOK_FLAGS(tipc_init,
-                   tipc_init,
+static void register_tipc_init(uint level) {
+    static struct trusty_virtio_bus_notifier vb_notifier = {
+            .on_create = tipc_init,
+    };
+    trusty_virtio_register_bus_notifier(&vb_notifier);
+}
+
+LK_INIT_HOOK_FLAGS(register_tipc_init,
+                   register_tipc_init,
                    LK_INIT_LEVEL_APPS - 2,
                    LK_INIT_FLAG_PRIMARY_CPU);
diff --git a/lib/trusty/tipc_dev_ql.c b/lib/trusty/tipc_dev_ql.c
index 4d9e0f3..24ccda4 100644
--- a/lib/trusty/tipc_dev_ql.c
+++ b/lib/trusty/tipc_dev_ql.c
@@ -347,7 +347,7 @@
     if (ns_payload_len <= sizeof(req.hdr))
         return set_status(dev, opcode, ERR_INVALID_ARGS, 0);
 
-    if (ns_payload_len >= sizeof(req))
+    if (ns_payload_len - sizeof(req.hdr) >= sizeof(req.body))
         return set_status(dev, opcode, ERR_INVALID_ARGS, 0);
 
     /* copy out and zero terminate */
@@ -642,7 +642,7 @@
 
     /* check for minimum size */
     if (cmd_sz < sizeof(cmd_hdr)) {
-        TRACEF("message is too short (%zd)\n", (size_t)cmd_sz);
+        TRACEF("message is too short (%zu)\n", (size_t)cmd_sz);
         goto err_invalid;
     }
 
diff --git a/lib/trusty/tipc_virtio_dev.c b/lib/trusty/tipc_virtio_dev.c
index d64148f..fe29117 100644
--- a/lib/trusty/tipc_virtio_dev.c
+++ b/lib/trusty/tipc_virtio_dev.c
@@ -90,6 +90,8 @@
     struct vqueue vqs[TIPC_VQ_NUM];
     struct tipc_ept epts[TIPC_ADDR_MAX_NUM];
     unsigned long inuse[BITMAP_NUM_WORDS(TIPC_ADDR_MAX_NUM)];
+    struct vqueue_mapped_list send_mapped;
+    struct vqueue_mapped_list receive_mapped;
 
     event_t have_handles;
     struct handle_list handle_list;
@@ -101,6 +103,7 @@
 
     bool tx_stop;
     bool rx_stop;
+    bool reuse_mapping;
 };
 
 struct tipc_shm {
@@ -126,6 +129,10 @@
     TIPC_CTRL_MSGTYPE_CONN_RSP,
     TIPC_CTRL_MSGTYPE_DISC_REQ,
     TIPC_CTRL_MSGTYPE_RELEASE,
+    TIPC_CTRL_MSGTYPE_REUSE_MSGBUF_REQ,
+    TIPC_CTRL_MSGTYPE_REUSE_MSGBUF_RSP,
+    TIPC_CTRL_MSGTYPE_UNMAP_REQ,
+    TIPC_CTRL_MSGTYPE_UNMAP_RSP,
 };
 
 /*
@@ -164,6 +171,15 @@
     uint64_t id;
 } __PACKED;
 
+struct tipc_unmap_req_body {
+    uint64_t id;
+} __PACKED;
+
+struct tipc_unmap_rsp_body {
+    int32_t result;
+    uint64_t id;
+} __PACKED;
+
 typedef int (*tipc_data_cb_t)(uint8_t* dst, size_t sz, void* ctx);
 
 struct tipc_ext_mem {
@@ -474,6 +490,48 @@
     return NO_ERROR;
 }
 
+static int handle_reuse_msgbuf_req(struct tipc_dev* dev, uint32_t remote) {
+    struct {
+        struct tipc_ctrl_msg_hdr hdr;
+    } msg;
+
+    /* on or off based on request */
+    dev->reuse_mapping = true;
+
+    /* send response */
+    msg.hdr.type = TIPC_CTRL_MSGTYPE_REUSE_MSGBUF_RSP;
+    msg.hdr.body_len = 0;
+
+    return tipc_send_buf(dev, TIPC_CTRL_ADDR, TIPC_CTRL_ADDR, &msg, sizeof(msg),
+                         true);
+}
+
+static int handle_unmap_req(struct tipc_dev* dev,
+                            uint32_t remote,
+                            const volatile struct tipc_unmap_req_body* ns_req) {
+    struct vqueue_mapped_list* mapped[2];
+    struct {
+        struct tipc_ctrl_msg_hdr hdr;
+        struct tipc_unmap_rsp_body body;
+    } msg;
+
+    mapped[0] = &dev->send_mapped;
+    mapped[1] = &dev->receive_mapped;
+
+    /* try to unmap */
+    msg.body.result = vqueue_unmap_memid(ns_req->id, mapped, 2);
+
+    /* copy id from request to response so that host can reclaim */
+    msg.body.id = ns_req->id;
+
+    /* send response */
+    msg.hdr.type = TIPC_CTRL_MSGTYPE_UNMAP_RSP;
+    msg.hdr.body_len = sizeof(msg.body);
+
+    return tipc_send_buf(dev, TIPC_CTRL_ADDR, TIPC_CTRL_ADDR, &msg, sizeof(msg),
+                         true);
+}
+
 static int handle_ctrl_msg(struct tipc_dev* dev,
                            uint32_t remote,
                            const volatile void* ns_data,
@@ -510,6 +568,16 @@
             break;
         return handle_disc_req(dev, remote, ns_msg_body);
 
+    case TIPC_CTRL_MSGTYPE_REUSE_MSGBUF_REQ:
+        if (msg_body_len != 0)
+            break;
+        return handle_reuse_msgbuf_req(dev, remote);
+
+    case TIPC_CTRL_MSGTYPE_UNMAP_REQ:
+        if (msg_body_len != sizeof(struct tipc_unmap_req_body))
+            break;
+        return handle_unmap_req(dev, remote, ns_msg_body);
+
     default:
         break;
     }
@@ -684,7 +752,8 @@
 
     /* map in_iovs, Non-secure, no-execute, cached, read-only */
     uint map_flags = ARCH_MMU_FLAG_PERM_NO_EXECUTE | ARCH_MMU_FLAG_PERM_RO;
-    int ret = vqueue_map_iovs(dev->vd.client_id, &buf->in_iovs, map_flags);
+    int ret = vqueue_map_iovs(dev->vd.client_id, &buf->in_iovs, map_flags,
+                              &dev->receive_mapped);
     if (ret) {
         TRACEF("failed to map iovs %d\n", ret);
         return ret;
@@ -726,7 +795,9 @@
     }
 
 done:
-    vqueue_unmap_iovs(&buf->in_iovs);
+    if (!dev->reuse_mapping) {
+        vqueue_unmap_iovs(&buf->in_iovs, &dev->receive_mapped);
+    }
 
     return ret;
 }
@@ -1262,7 +1333,8 @@
 
     /* map in provided buffers (no-execute, read-write) */
     uint map_flags = ARCH_MMU_FLAG_PERM_NO_EXECUTE;
-    ret = vqueue_map_iovs(dev->vd.client_id, &buf.out_iovs, map_flags);
+    ret = vqueue_map_iovs(dev->vd.client_id, &buf.out_iovs, map_flags,
+                          &dev->send_mapped);
     if (ret == NO_ERROR) {
         struct tipc_hdr* hdr = buf.out_iovs.iovs[0].iov_base;
 
@@ -1287,7 +1359,8 @@
             ret += sizeof(struct tipc_hdr);
         }
 
-        vqueue_unmap_iovs(&buf.out_iovs);
+        if (!dev->reuse_mapping)
+            vqueue_unmap_iovs(&buf.out_iovs, &dev->send_mapped);
     }
 
 done:
@@ -1333,7 +1406,8 @@
         .kick_vqueue = tipc_vdev_kick_vq,
 };
 
-status_t create_tipc_device(const struct tipc_vdev_descr* descr,
+status_t create_tipc_device(struct trusty_virtio_bus* vb,
+                            const struct tipc_vdev_descr* descr,
                             size_t size,
                             const uuid_t* uuid,
                             struct tipc_dev** dev_ptr) {
@@ -1356,7 +1430,16 @@
     handle_list_init(&dev->handle_list);
     event_init(&dev->have_handles, false, EVENT_FLAG_AUTOUNSIGNAL);
 
-    ret = virtio_register_device(&dev->vd);
+    bst_root_initialize(&dev->send_mapped.list);
+    mutex_init(&dev->send_mapped.lock);
+    dev->send_mapped.in_direction = false;
+    bst_root_initialize(&dev->receive_mapped.list);
+    mutex_init(&dev->receive_mapped.lock);
+    dev->receive_mapped.in_direction = true;
+    dev->reuse_mapping = false;
+
+    ret = virtio_register_device(vb, &dev->vd);
+
     if (ret != NO_ERROR)
         goto err_register;
 
diff --git a/lib/trusty/trusty_app.c b/lib/trusty/trusty_app.c
index 54b2210..18bd8b4 100644
--- a/lib/trusty/trusty_app.c
+++ b/lib/trusty/trusty_app.c
@@ -134,8 +134,8 @@
 static struct list_node allowed_mmio_ranges_list =
         LIST_INITIAL_VALUE(allowed_mmio_ranges_list);
 
-#define PRINT_TRUSTY_APP_UUID(tid, u)                                                              \
-    dprintf(SPEW,                                                                                  \
+#define PRINT_TRUSTY_APP_UUID(level, tid, u)                                                       \
+    dprintf((level),                                                                               \
             "trusty_app %d uuid: 0x%08xx 0x%04xx 0x%04xx 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n", \
             tid, (u)->time_low, (u)->time_mid, (u)->time_hi_and_version,                           \
             (u)->clock_seq_and_node[0], (u)->clock_seq_and_node[1],                                \
@@ -514,7 +514,7 @@
     thread_sleep_until_ns(trusty_thread->app->min_start_time);
 
     user_addr_t shadow_stack_base = 0;
-#if USER_SCS_ENABLED
+#if USER_SCS_SUPPORTED
     shadow_stack_base = trusty_thread->shadow_stack_base;
 #endif
 
@@ -540,7 +540,7 @@
 
     vmm_free_region(trusty_thread->app->aspace, stack_bot);
 
-#if USER_SCS_ENABLED
+#if USER_SCS_SUPPORTED
     if (trusty_thread->shadow_stack_base) {
         /*
          * revert the adjustment of shadow_stack_base to reconstruct pointer
@@ -585,7 +585,7 @@
         goto err_stack;
     }
 
-#if USER_SCS_ENABLED
+#if USER_SCS_SUPPORTED
     vaddr_t shadow_stack_base = 0;
     if (shadow_stack_size) {
         err = vmm_alloc(
@@ -611,7 +611,7 @@
     trusty_thread->entry = entry;
     trusty_thread->stack_start = stack_bot + stack_size; /* stack grows down */
     trusty_thread->stack_size = stack_size;
-#if USER_SCS_ENABLED
+#if USER_SCS_SUPPORTED
     /* make shadow stack hit guard page if too small */
     size_t adjustment =
             round_up(shadow_stack_size, PAGE_SIZE) - shadow_stack_size;
@@ -636,7 +636,7 @@
     return trusty_thread;
 
 err_thread:
-#if USER_SCS_ENABLED
+#if USER_SCS_SUPPORTED
     if (shadow_stack_size) {
         vmm_free_region(trusty_app->aspace, shadow_stack_base);
     }
@@ -1042,7 +1042,7 @@
             trusty_app->props.priority = manifest_entry.value.priority;
             break;
         case APP_MANIFEST_CONFIG_KEY_MIN_SHADOW_STACK_SIZE:
-#if !USER_SCS_ENABLED
+#if !USER_SCS_SUPPORTED
             if (manifest_entry.value.min_shadow_stack_size) {
                 dprintf(CRITICAL,
                         "Shadow call stack requested by app %u, %s. Kernel "
@@ -1079,13 +1079,14 @@
         return ERR_NOT_VALID;
     }
 
-    PRINT_TRUSTY_APP_UUID(trusty_app->app_id, &trusty_app->props.uuid);
-
     if (trusty_app_find_by_uuid_locked(&trusty_app->props.uuid)) {
+        PRINT_TRUSTY_APP_UUID(CRITICAL, trusty_app->app_id,
+                              &trusty_app->props.uuid);
         dprintf(CRITICAL, "app already registered\n");
         return ERR_ALREADY_EXISTS;
     }
 
+    PRINT_TRUSTY_APP_UUID(SPEW, trusty_app->app_id, &trusty_app->props.uuid);
     dprintf(SPEW, "trusty_app %u name: %s priority: %u\n", trusty_app->app_id,
             trusty_app->props.app_name, trusty_app->props.priority);
 
diff --git a/lib/trusty/trusty_virtio.c b/lib/trusty/trusty_virtio.c
index 82b2a74..615b9ff 100644
--- a/lib/trusty/trusty_virtio.c
+++ b/lib/trusty/trusty_virtio.c
@@ -31,9 +31,13 @@
 
 #include <arch/arch_ops.h>
 #include <inttypes.h>
+#include <kernel/event.h>
 #include <kernel/mutex.h>
+#include <kernel/spinlock.h>
 #include <kernel/vm.h>
+#include <lib/binary_search_tree.h>
 #include <lk/init.h>
+#include <lk/reflist.h>
 
 #include <remoteproc/remoteproc.h>
 #include "trusty_virtio.h"
@@ -59,15 +63,238 @@
     size_t descr_size;
     volatile int state;
     struct list_node vdev_list;
+    struct bst_node node;
+    ext_mem_client_id_t client_id;
+    struct obj refobj;
+    /*
+     * This is a reference to refobj in the same virtio bus and gets deleted
+     * after the first VIRTIO_STOP or a failed GET_DESCR SMC. It's needed to
+     * ensure that refobj has at least one reference even if there are no
+     * pending virtio SMCs and should only be deleted when a VM exits. After
+     * it's deleted, no further SMCs can get references to the bus.
+     */
+    struct obj_ref tree_node_ref;
+    /*
+     * The last reference to the bus may get dropped from an interrupt-free
+     * context which can't free the bus so this event is used to signal that the
+     * bus may be freed.
+     */
+    event_t free_bus_event;
 };
 
-static struct trusty_virtio_bus _virtio_bus = {
-        .vdev_cnt = 0,
-        .descr_size = 0,
-        .next_dev_id = 0,
-        .state = VIRTIO_BUS_STATE_UNINITIALIZED,
-        .vdev_list = LIST_INITIAL_VALUE(_virtio_bus.vdev_list),
-};
+static spin_lock_t virtio_buses_tree_lock = SPIN_LOCK_INITIAL_VALUE;
+static struct bst_root virtio_buses_tree = BST_ROOT_INITIAL_VALUE;
+
+static int compare_client_ids(struct bst_node* a, struct bst_node* b) {
+    DEBUG_ASSERT(a);
+    DEBUG_ASSERT(b);
+    struct trusty_virtio_bus* bus_a =
+            containerof(a, struct trusty_virtio_bus, node);
+    struct trusty_virtio_bus* bus_b =
+            containerof(b, struct trusty_virtio_bus, node);
+    ext_mem_client_id_t id_a = bus_a->client_id;
+    ext_mem_client_id_t id_b = bus_b->client_id;
+    if (id_a < id_b) {
+        return 1;
+    } else if (id_a > id_b) {
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+static void signal_client_bus_free(struct obj* obj) {
+    struct trusty_virtio_bus* vb =
+            containerof_null_safe(obj, struct trusty_virtio_bus, refobj);
+    DEBUG_ASSERT(vb);
+    /*
+     * This function may be called with interrupts disabled, so signal that the
+     * bus may be freed instead of freeing it directly here.
+     */
+    event_signal(&vb->free_bus_event, false);
+}
+
+static void release_bus_ref_locked(struct trusty_virtio_bus* vb,
+                                   struct obj_ref* ref) {
+    DEBUG_ASSERT(vb);
+    DEBUG_ASSERT(ref);
+    obj_del_ref(&vb->refobj, ref, signal_client_bus_free);
+}
+
+static void release_bus_ref(struct trusty_virtio_bus* vb, struct obj_ref* ref) {
+    DEBUG_ASSERT(ref);
+    DEBUG_ASSERT(vb);
+    spin_lock_saved_state_t state;
+    spin_lock_irqsave(&virtio_buses_tree_lock, state);
+
+    release_bus_ref_locked(vb, ref);
+
+    spin_unlock_irqrestore(&virtio_buses_tree_lock, state);
+}
+
+static struct trusty_virtio_bus* alloc_new_bus(ext_mem_client_id_t client_id,
+                                               struct obj_ref* ref) {
+    DEBUG_ASSERT(ref);
+    struct trusty_virtio_bus* new_bus = (struct trusty_virtio_bus*)calloc(
+            1, sizeof(struct trusty_virtio_bus));
+    if (!new_bus) {
+        return NULL;
+    }
+    new_bus->state = VIRTIO_BUS_STATE_UNINITIALIZED;
+    new_bus->vdev_list =
+            (struct list_node)LIST_INITIAL_VALUE(new_bus->vdev_list);
+    new_bus->node = (struct bst_node)BST_NODE_INITIAL_VALUE;
+    new_bus->client_id = client_id;
+    obj_ref_init(&new_bus->tree_node_ref);
+    /*
+     * Initialize the refobj with the caller's reference and only add
+     * tree_node_ref after we've added the bus to the tree
+     */
+    obj_init(&new_bus->refobj, ref);
+    event_init(&new_bus->free_bus_event, 0, EVENT_FLAG_AUTOUNSIGNAL);
+    return new_bus;
+}
+
+static status_t create_new_bus(ext_mem_client_id_t client_id,
+                               struct trusty_virtio_bus** vb,
+                               struct obj_ref* ref) {
+    DEBUG_ASSERT(vb);
+    DEBUG_ASSERT(ref);
+    struct obj_ref tmp_ref = OBJ_REF_INITIAL_VALUE(tmp_ref);
+    struct trusty_virtio_bus* new_bus = alloc_new_bus(client_id, &tmp_ref);
+    if (!new_bus) {
+        LTRACEF("Could not allocate memory for virtio bus for client %" PRId64
+                "\n",
+                client_id);
+        return ERR_NO_MEMORY;
+    }
+    spin_lock_saved_state_t state;
+    spin_lock_irqsave(&virtio_buses_tree_lock, state);
+
+    bool inserted =
+            bst_insert(&virtio_buses_tree, &new_bus->node, compare_client_ids);
+
+    if (inserted) {
+        /* Add tree_node_ref if the bus was inserted */
+        obj_add_ref(&new_bus->refobj, &new_bus->tree_node_ref);
+        /* Transfer the local reference to the parameter */
+        obj_ref_transfer(ref, &tmp_ref);
+    } else {
+        /* If the bus was not inserted delete the caller's reference */
+        release_bus_ref_locked(new_bus, &tmp_ref);
+    }
+    spin_unlock_irqrestore(&virtio_buses_tree_lock, state);
+
+    if (!inserted) {
+        DEBUG_ASSERT(!obj_has_ref(&new_bus->refobj));
+        free(new_bus);
+        return ERR_ALREADY_EXISTS;
+    }
+    *vb = new_bus;
+    return NO_ERROR;
+}
+
+static struct trusty_virtio_bus* get_client_bus_locked(
+        ext_mem_client_id_t client_id) {
+    struct trusty_virtio_bus bus = {
+            .node = BST_NODE_INITIAL_VALUE,
+            .client_id = client_id,
+    };
+    struct bst_node* node =
+            bst_search(&virtio_buses_tree, &bus.node, compare_client_ids);
+    return containerof_null_safe(node, struct trusty_virtio_bus, node);
+}
+
+static struct trusty_virtio_bus* get_client_bus(ext_mem_client_id_t client_id,
+                                                struct obj_ref* ref) {
+    DEBUG_ASSERT(ref);
+    spin_lock_saved_state_t state;
+    spin_lock_irqsave(&virtio_buses_tree_lock, state);
+
+    struct trusty_virtio_bus* vb = get_client_bus_locked(client_id);
+    if (vb) {
+        obj_add_ref(&vb->refobj, ref);
+    }
+    spin_unlock_irqrestore(&virtio_buses_tree_lock, state);
+
+    return vb;
+}
+
+/*
+ * Frees the client bus if it's in the virtio tree and deletes a reference to
+ * the bus held by the caller.
+ */
+static void remove_client_bus(struct trusty_virtio_bus* vb,
+                              struct obj_ref* ref) {
+    DEBUG_ASSERT(vb);
+    DEBUG_ASSERT(ref);
+
+    spin_lock_saved_state_t state;
+    spin_lock_irqsave(&virtio_buses_tree_lock, state);
+    /*
+     * Check if the bus is still in the tree or if another call to
+     * remove_client_bus beat us
+     */
+    bool bus_in_tree = obj_ref_active(&vb->tree_node_ref);
+
+    if (bus_in_tree) {
+        /*
+         * Remove the bus from the virtio tree to prevent further calls to
+         * get_client_bus_locked from succeeding
+         */
+        bst_delete(&virtio_buses_tree, &vb->node);
+        release_bus_ref_locked(vb, &vb->tree_node_ref);
+    }
+    release_bus_ref_locked(vb, ref);
+    /*
+     * If there are other calls to remove_client_bus, we need to drop this lock
+     * before waiting on the free bus event because they may delete other
+     * references to the bus
+     */
+    spin_unlock_irqrestore(&virtio_buses_tree_lock, state);
+    if (bus_in_tree) {
+        /* Blocks until the last reference to the bus is dropped */
+        event_wait(&vb->free_bus_event);
+        /*
+         * Only the first call to remove_client_bus will find the bus in the
+         * tree and end up freeing the bus
+         */
+        DEBUG_ASSERT(!obj_has_ref(&vb->refobj));
+        free(vb);
+    }
+}
+
+static mutex_t virtio_bus_notifier_lock =
+        MUTEX_INITIAL_VALUE(virtio_bus_notifier_lock);
+static struct list_node virtio_bus_notifier_list =
+        LIST_INITIAL_VALUE(virtio_bus_notifier_list);
+
+void trusty_virtio_register_bus_notifier(struct trusty_virtio_bus_notifier* n) {
+    mutex_acquire(&virtio_bus_notifier_lock);
+    list_add_tail(&virtio_bus_notifier_list, &n->node);
+    mutex_release(&virtio_bus_notifier_lock);
+}
+
+static status_t on_create_virtio_bus(struct trusty_virtio_bus* vb) {
+    DEBUG_ASSERT(vb);
+    status_t ret = NO_ERROR;
+    struct trusty_virtio_bus_notifier* n;
+    mutex_acquire(&virtio_bus_notifier_lock);
+    list_for_every_entry(&virtio_bus_notifier_list, n,
+                         struct trusty_virtio_bus_notifier, node) {
+        if (!n->on_create) {
+            continue;
+        }
+        ret = n->on_create(vb);
+        if (ret != NO_ERROR) {
+            LTRACEF("call to on_create notifier failed (%d)\n", ret);
+            goto on_create_err;
+        }
+    }
+on_create_err:
+    mutex_release(&virtio_bus_notifier_lock);
+    return ret;
+}
 
 static status_t map_descr(ext_mem_client_id_t client_id,
                           ext_mem_obj_id_t buf_id,
@@ -110,9 +337,9 @@
 /*
  *     Register virtio device
  */
-status_t virtio_register_device(struct vdev* vd) {
+status_t virtio_register_device(struct trusty_virtio_bus* vb, struct vdev* vd) {
     status_t ret = ERR_BAD_STATE;
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+    DEBUG_ASSERT(vb);
 
     if (vb->state == VIRTIO_BUS_STATE_UNINITIALIZED) {
         ret = validate_vdev(vd);
@@ -128,8 +355,8 @@
 /*
  *
  */
-static void finalize_vdev_registry(void) {
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+static void finalize_vdev_registry(struct trusty_virtio_bus* vb) {
+    DEBUG_ASSERT(vb);
 
     if (vb->state == VIRTIO_BUS_STATE_UNINITIALIZED) {
         struct vdev* vd;
@@ -159,16 +386,42 @@
                                uint buf_mmu_flags) {
     status_t ret;
     struct vdev* vd;
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+    struct trusty_virtio_bus* vb = NULL;
+    struct obj_ref tmp_ref = OBJ_REF_INITIAL_VALUE(tmp_ref);
 
     LTRACEF("descr_buf: %u bytes @ 0x%" PRIx64 "\n", buf_sz, buf_id);
 
-    finalize_vdev_registry();
+    ret = create_new_bus(client_id, &vb, &tmp_ref);
+    if (ret == ERR_ALREADY_EXISTS) {
+        LTRACEF("Client %" PRId64 " may only call the VIRTIO_GET_DESCR once\n",
+                client_id);
+        return ERR_NOT_ALLOWED;
+    } else if (ret != NO_ERROR) {
+        LTRACEF("Could not create virtio bus for client %" PRId64 "\n",
+                client_id);
+        return ret;
+    }
+
+    /* on_create notifiers must only be called if virtio bus is uninitialized */
+    if (vb->state == VIRTIO_BUS_STATE_UNINITIALIZED) {
+        ret = on_create_virtio_bus(vb);
+        /* If on_create notifiers failed remove the new virtio bus */
+        if (ret != NO_ERROR) {
+            goto err_failed_on_create;
+        }
+    }
+    /*
+     * finalize_vdev_registry in the first call to this function switches the
+     * bus state to idle so it should never be uninitialized after this point
+     */
+    finalize_vdev_registry(vb);
+    ASSERT(vb->state != VIRTIO_BUS_STATE_UNINITIALIZED);
 
     if ((size_t)buf_sz < vb->descr_size) {
         LTRACEF("buffer (%zu bytes) is too small (%zu needed)\n",
                 (size_t)buf_sz, vb->descr_size);
-        return ERR_NOT_ENOUGH_BUFFER;
+        ret = ERR_NOT_ENOUGH_BUFFER;
+        goto err_buffer;
     }
 
     /* map in NS memory */
@@ -176,7 +429,7 @@
     ret = map_descr(client_id, buf_id, &va, vb->descr_size, buf_mmu_flags);
     if (ret != NO_ERROR) {
         LTRACEF("failed (%d) to map in descriptor buffer\n", (int)ret);
-        return ret;
+        goto err_failed_map;
     }
     memset(va, 0, vb->descr_size);
 
@@ -200,7 +453,15 @@
 
     unmap_descr(va, vb->descr_size);
 
+    release_bus_ref(vb, &tmp_ref);
+
     return vb->descr_size;
+
+err_failed_map:
+err_buffer:
+err_failed_on_create:
+    remove_client_bus(vb, &tmp_ref);
+    return ret;
 }
 
 /*
@@ -215,7 +476,12 @@
     void* descr_va;
     void* ns_descr_va = NULL;
     struct vdev* vd;
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+    struct obj_ref tmp_ref = OBJ_REF_INITIAL_VALUE(tmp_ref);
+    struct trusty_virtio_bus* vb = get_client_bus(client_id, &tmp_ref);
+    if (!vb) {
+        LTRACEF("Could not get virtio bus for client %" PRId64 "\n", client_id);
+        return ERR_BAD_STATE;
+    }
 
     LTRACEF("%u bytes @ 0x%" PRIx64 "\n", descr_sz, ns_descr_id);
 
@@ -223,7 +489,8 @@
         if (client_id != vd->client_id) {
             LTRACEF("mismatched client id 0x%" PRIx64 " != 0x%" PRIx64 "\n",
                     client_id, vd->client_id);
-            return ERR_INVALID_ARGS;
+            ret = ERR_INVALID_ARGS;
+            goto err_invalid_args;
         }
     }
 
@@ -233,7 +500,8 @@
     if (oldstate != VIRTIO_BUS_STATE_IDLE) {
         /* bus should be in initializing state */
         LTRACEF("unexpected state state (%d)\n", oldstate);
-        return ERR_BAD_STATE;
+        ret = ERR_BAD_STATE;
+        goto err_bad_state;
     }
 
     if ((size_t)descr_sz != vb->descr_size) {
@@ -270,6 +538,7 @@
     free(descr_va);
 
     vb->state = VIRTIO_BUS_STATE_ACTIVE;
+    release_bus_ref(vb, &tmp_ref);
 
     return NO_ERROR;
 
@@ -278,6 +547,9 @@
 err_alloc_descr:
 err_bad_params:
     vb->state = oldstate;
+err_bad_state:
+err_invalid_args:
+    release_bus_ref(vb, &tmp_ref);
     return ret;
 }
 
@@ -285,9 +557,15 @@
                      ext_mem_obj_id_t descr_id,
                      ns_size_t descr_sz,
                      uint descr_mmu_flags) {
+    status_t ret;
     int oldstate;
     struct vdev* vd;
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+    struct obj_ref tmp_ref = OBJ_REF_INITIAL_VALUE(tmp_ref);
+    struct trusty_virtio_bus* vb = get_client_bus(client_id, &tmp_ref);
+    if (!vb) {
+        LTRACEF("Could not get virtio bus for client %" PRId64 "\n", client_id);
+        return ERR_BAD_STATE;
+    }
 
     LTRACEF("%u bytes @ 0x%" PRIx64 "\n", descr_sz, descr_id);
 
@@ -295,15 +573,18 @@
         if (client_id != vd->client_id) {
             LTRACEF("mismatched client id 0x%" PRIx64 " != 0x%" PRIx64 "\n",
                     client_id, vd->client_id);
-            return ERR_INVALID_ARGS;
+            ret = ERR_INVALID_ARGS;
+            goto err_invalid_args;
         }
     }
 
     oldstate = atomic_cmpxchg(&vb->state, VIRTIO_BUS_STATE_ACTIVE,
                               VIRTIO_BUS_STATE_DEACTIVATING);
 
-    if (oldstate != VIRTIO_BUS_STATE_ACTIVE)
-        return ERR_BAD_STATE;
+    if (oldstate != VIRTIO_BUS_STATE_ACTIVE) {
+        ret = ERR_BAD_STATE;
+        goto err_bad_state;
+    }
 
     /* reset all devices */
     list_for_every_entry(&vb->vdev_list, vd, struct vdev, node) {
@@ -311,22 +592,39 @@
     }
 
     vb->state = VIRTIO_BUS_STATE_IDLE;
+    remove_client_bus(vb, &tmp_ref);
 
     return NO_ERROR;
+
+err_bad_state:
+    /* Remove the bus even if it was not in the active state */
+    remove_client_bus(vb, &tmp_ref);
+    return ret;
+
+err_invalid_args:
+    release_bus_ref(vb, &tmp_ref);
+    return ret;
 }
 
 /*
  *  Reset virtio device with specified device id
  */
-status_t virtio_device_reset(uint devid) {
+status_t virtio_device_reset(ext_mem_client_id_t client_id, uint devid) {
     struct vdev* vd;
     status_t ret = ERR_NOT_FOUND;
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+    struct obj_ref tmp_ref = OBJ_REF_INITIAL_VALUE(tmp_ref);
+    struct trusty_virtio_bus* vb = get_client_bus(client_id, &tmp_ref);
+    if (!vb) {
+        LTRACEF("Could not get virtio bus for client %" PRId64 "\n", client_id);
+        return ERR_BAD_STATE;
+    }
 
     LTRACEF("dev=%d\n", devid);
 
-    if (vb->state != VIRTIO_BUS_STATE_ACTIVE)
-        return ERR_BAD_STATE;
+    if (vb->state != VIRTIO_BUS_STATE_ACTIVE) {
+        ret = ERR_BAD_STATE;
+        goto err_bad_state;
+    }
 
     list_for_every_entry(&vb->vdev_list, vd, struct vdev, node) {
         if (vd->devid == devid) {
@@ -334,23 +632,32 @@
             break;
         }
     }
+err_bad_state:
+    release_bus_ref(vb, &tmp_ref);
     return ret;
 }
 
 /*
  *  Kick vq for virtio device with specified device id
  */
-status_t virtio_kick_vq(uint devid, uint vqid) {
+status_t virtio_kick_vq(ext_mem_client_id_t client_id, uint devid, uint vqid) {
     struct vdev* vd;
     status_t ret = ERR_NOT_FOUND;
-    struct trusty_virtio_bus* vb = &_virtio_bus;
+    struct obj_ref tmp_ref = OBJ_REF_INITIAL_VALUE(tmp_ref);
+    struct trusty_virtio_bus* vb = get_client_bus(client_id, &tmp_ref);
+    if (!vb) {
+        LTRACEF("Could not get virtio bus for client %" PRId64 "\n", client_id);
+        return ERR_BAD_STATE;
+    }
 
 #if WITH_CHATTY_LTRACE
     LTRACEF("dev=%d\n", devid);
 #endif
 
-    if (vb->state != VIRTIO_BUS_STATE_ACTIVE)
-        return ERR_BAD_STATE;
+    if (vb->state != VIRTIO_BUS_STATE_ACTIVE) {
+        ret = ERR_BAD_STATE;
+        goto err_bad_state;
+    }
 
     list_for_every_entry(&vb->vdev_list, vd, struct vdev, node) {
         if (vd->devid == devid) {
@@ -358,5 +665,7 @@
             break;
         }
     }
+err_bad_state:
+    release_bus_ref(vb, &tmp_ref);
     return ret;
 }
diff --git a/lib/trusty/trusty_virtio.h b/lib/trusty/trusty_virtio.h
index f7a6938..6de968b 100644
--- a/lib/trusty/trusty_virtio.h
+++ b/lib/trusty/trusty_virtio.h
@@ -54,12 +54,14 @@
     const struct vdev_ops* ops;
 };
 
+struct trusty_virtio_bus;
+
 typedef uint64_t ns_paddr_t;
 
 /*
  * Register virtio device
  */
-status_t virtio_register_device(struct vdev* vd);
+status_t virtio_register_device(struct trusty_virtio_bus* vb, struct vdev* vd);
 
 /*
  * Retrieve device description to be shared with NS side
@@ -88,11 +90,18 @@
 /*
  *  Reset virtio device with specified device id
  */
-status_t virtio_device_reset(uint devid);
+status_t virtio_device_reset(ext_mem_client_id_t client_id, uint devid);
 
 /*
  *  Kick vq for specified device
  */
-status_t virtio_kick_vq(uint devid, uint vqid);
+status_t virtio_kick_vq(ext_mem_client_id_t client_id, uint devid, uint vqid);
+
+struct trusty_virtio_bus_notifier {
+    struct list_node node;
+    status_t (*on_create)(struct trusty_virtio_bus* vb);
+};
+
+void trusty_virtio_register_bus_notifier(struct trusty_virtio_bus_notifier* n);
 
 __END_CDECLS
diff --git a/lib/trusty/vqueue.c b/lib/trusty/vqueue.c
index 8f4cd59..437fd14 100644
--- a/lib/trusty/vqueue.c
+++ b/lib/trusty/vqueue.c
@@ -211,11 +211,64 @@
     return ret;
 }
 
+struct vqueue_mem_obj {
+    ext_mem_client_id_t client_id;
+    ext_mem_obj_id_t id;
+    void* iov_base;
+    size_t size;
+    struct bst_node node;
+};
+
+static struct vqueue_mem_obj* vqueue_mem_obj_from_bst_node(
+        struct bst_node* node) {
+    return containerof(node, struct vqueue_mem_obj, node);
+}
+
+static int vqueue_mem_obj_cmp(struct bst_node* a_bst, struct bst_node* b_bst) {
+    struct vqueue_mem_obj* a = vqueue_mem_obj_from_bst_node(a_bst);
+    struct vqueue_mem_obj* b = vqueue_mem_obj_from_bst_node(b_bst);
+
+    return a->id < b->id ? 1 : a->id > b->id ? -1 : 0;
+}
+
+static void vqueue_mem_obj_initialize(struct vqueue_mem_obj* obj,
+                                      ext_mem_client_id_t client_id,
+                                      ext_mem_obj_id_t id,
+                                      void* iov_base,
+                                      size_t size) {
+    obj->client_id = client_id;
+    obj->id = id;
+    obj->iov_base = iov_base;
+    obj->size = size;
+    bst_node_initialize(&obj->node);
+}
+
+static bool vqueue_mem_insert(struct bst_root* objs,
+                              struct vqueue_mem_obj* obj) {
+    return bst_insert(objs, &obj->node, vqueue_mem_obj_cmp);
+}
+
+static struct vqueue_mem_obj* vqueue_mem_lookup(struct bst_root* objs,
+                                                ext_mem_obj_id_t id) {
+    struct vqueue_mem_obj ref_obj;
+    ref_obj.id = id;
+    return bst_search_type(objs, &ref_obj, vqueue_mem_obj_cmp,
+                           struct vqueue_mem_obj, node);
+}
+
+static inline void vqueue_mem_delete(struct bst_root* objs,
+                                     struct vqueue_mem_obj* obj) {
+    bst_delete(objs, &obj->node);
+}
+
 int vqueue_map_iovs(ext_mem_client_id_t client_id,
                     struct vqueue_iovs* vqiovs,
-                    u_int flags) {
+                    u_int flags,
+                    struct vqueue_mapped_list* mapped_list) {
     uint i;
     int ret;
+    size_t size;
+    struct vqueue_mem_obj* obj;
 
     DEBUG_ASSERT(vqiovs);
     DEBUG_ASSERT(vqiovs->shared_mem_id);
@@ -223,14 +276,61 @@
     DEBUG_ASSERT(vqiovs->used <= vqiovs->cnt);
 
     for (i = 0; i < vqiovs->used; i++) {
+        /* see if it's already been mapped */
+        mutex_acquire(&mapped_list->lock);
+        obj = vqueue_mem_lookup(&mapped_list->list, vqiovs->shared_mem_id[i]);
+        mutex_release(&mapped_list->lock);
+
+        if (obj && obj->client_id == client_id &&
+            vqiovs->iovs[i].iov_len <= obj->size) {
+            LTRACEF("iov restored %s id= %lu (base= %p, size= %lu)\n",
+                    mapped_list->in_direction ? "IN" : "OUT",
+                    (unsigned long)vqiovs->shared_mem_id[i], obj->iov_base,
+                    (unsigned long)obj->size);
+            vqiovs->iovs[i].iov_base = obj->iov_base;
+            continue; /* use the previously mapped */
+        } else if (obj) {
+            /* otherwise, we need to drop old mapping and remap  */
+            TRACEF("iov needs remapped for id= %lu\n",
+                   (unsigned long)vqiovs->shared_mem_id[i]);
+            mutex_acquire(&mapped_list->lock);
+            vqueue_mem_delete(&mapped_list->list, obj);
+            mutex_release(&mapped_list->lock);
+            free(obj);
+        }
+
+        /* allocate since it may be reused instead of unmapped after use */
+        obj = calloc(1, sizeof(struct vqueue_mem_obj));
+        if (unlikely(!obj)) {
+            TRACEF("calloc failure for vqueue_mem_obj for iov\n");
+            ret = ERR_NO_MEMORY;
+            goto err;
+        }
+
+        /* map it */
         vqiovs->iovs[i].iov_base = NULL;
+        size = round_up(vqiovs->iovs[i].iov_len, PAGE_SIZE);
         ret = ext_mem_map_obj_id(vmm_get_kernel_aspace(), "vqueue-buf",
                                  client_id, vqiovs->shared_mem_id[i], 0, 0,
-                                 round_up(vqiovs->iovs[i].iov_len, PAGE_SIZE),
-                                 &vqiovs->iovs[i].iov_base, PAGE_SIZE_SHIFT, 0,
-                                 flags);
-        if (ret)
+                                 size, &vqiovs->iovs[i].iov_base,
+                                 PAGE_SIZE_SHIFT, 0, flags);
+        if (ret) {
+            free(obj);
             goto err;
+        }
+
+        vqueue_mem_obj_initialize(obj, client_id, vqiovs->shared_mem_id[i],
+                                  vqiovs->iovs[i].iov_base, size);
+
+        mutex_acquire(&mapped_list->lock);
+        if (unlikely(!vqueue_mem_insert(&mapped_list->list, obj)))
+            panic("Unhandled duplicate entry in ext_mem for iov\n");
+        mutex_release(&mapped_list->lock);
+
+        LTRACEF("iov saved %s id= %lu (base= %p, size= %lu)\n",
+                mapped_list->in_direction ? "IN" : "OUT",
+                (unsigned long)vqiovs->shared_mem_id[i],
+                vqiovs->iovs[i].iov_base, (unsigned long)size);
     }
 
     return NO_ERROR;
@@ -245,7 +345,10 @@
     return ret;
 }
 
-void vqueue_unmap_iovs(struct vqueue_iovs* vqiovs) {
+void vqueue_unmap_iovs(struct vqueue_iovs* vqiovs,
+                       struct vqueue_mapped_list* mapped_list) {
+    struct vqueue_mem_obj* obj;
+
     DEBUG_ASSERT(vqiovs);
     DEBUG_ASSERT(vqiovs->shared_mem_id);
     DEBUG_ASSERT(vqiovs->iovs);
@@ -257,9 +360,66 @@
         vmm_free_region(vmm_get_kernel_aspace(),
                         (vaddr_t)vqiovs->iovs[i].iov_base);
         vqiovs->iovs[i].iov_base = NULL;
+
+        /* remove from list since it has been unmapped */
+        mutex_acquire(&mapped_list->lock);
+        obj = vqueue_mem_lookup(&mapped_list->list, vqiovs->shared_mem_id[i]);
+        if (obj) {
+            LTRACEF("iov removed %s id= %lu (base= %p, size= %lu)\n",
+                    mapped_list->in_direction ? "IN" : "OUT",
+                    (unsigned long)vqiovs->shared_mem_id[i],
+                    vqiovs->iovs[i].iov_base,
+                    (unsigned long)vqiovs->iovs[i].iov_len);
+            vqueue_mem_delete(&mapped_list->list, obj);
+            free(obj);
+        } else {
+            TRACEF("iov mapping not found for id= %lu (base= %p, size= %lu)\n",
+                   (unsigned long)vqiovs->shared_mem_id[i],
+                   vqiovs->iovs[i].iov_base,
+                   (unsigned long)vqiovs->iovs[i].iov_len);
+        }
+        mutex_release(&mapped_list->lock);
     }
 }
 
+int vqueue_unmap_memid(ext_mem_obj_id_t id,
+                       struct vqueue_mapped_list* mapped_list[],
+                       int list_cnt) {
+    struct vqueue_mapped_list* mapped;
+    struct vqueue_mem_obj* obj;
+    struct vqueue_iovs fake_vqiovs;
+    ext_mem_obj_id_t fake_shared_mem_id[1];
+    struct iovec_kern fake_iovs[1];
+
+    /* determine which list this entry is in */
+    for (int i = 0; i < list_cnt; i++) {
+        mapped = mapped_list[i];
+        obj = vqueue_mem_lookup(&mapped->list, id);
+        if (obj)
+            break;
+        mapped = NULL;
+    }
+
+    if (mapped) {
+        /* fake a vqueue_iovs struct to use common interface */
+        memset(&fake_vqiovs, 0, sizeof(fake_vqiovs));
+        fake_vqiovs.iovs = fake_iovs;
+        fake_vqiovs.shared_mem_id = fake_shared_mem_id;
+        fake_vqiovs.used = 1;
+        fake_vqiovs.cnt = 1;
+        fake_vqiovs.iovs[0].iov_base = obj->iov_base;
+        fake_vqiovs.iovs[0].iov_len = obj->size;
+        fake_vqiovs.shared_mem_id[0] = id;
+
+        /* unmap */
+        vqueue_unmap_iovs(&fake_vqiovs, mapped);
+
+        return NO_ERROR;
+    }
+
+    return ERR_NOT_FOUND;
+}
+
 static int _vqueue_add_buf_locked(struct vqueue* vq,
                                   struct vqueue_buf* buf,
                                   uint32_t len) {
diff --git a/lib/trusty/vqueue.h b/lib/trusty/vqueue.h
index 6c75e67..05c095f 100644
--- a/lib/trusty/vqueue.h
+++ b/lib/trusty/vqueue.h
@@ -26,6 +26,7 @@
 
 #include <arch/ops.h>
 #include <kernel/event.h>
+#include <kernel/mutex.h>
 #include <lib/extmem/extmem.h>
 #include <lib/trusty/uio.h>
 #include <stdint.h>
@@ -74,6 +75,12 @@
     struct vqueue_iovs out_iovs;
 };
 
+struct vqueue_mapped_list {
+    struct bst_root list;
+    mutex_t lock;
+    bool in_direction;
+};
+
 int vqueue_init(struct vqueue* vq,
                 uint32_t id,
                 ext_mem_client_id_t client_id,
@@ -90,8 +97,14 @@
 
 int vqueue_map_iovs(ext_mem_client_id_t client_id,
                     struct vqueue_iovs* vqiovs,
-                    u_int flags);
-void vqueue_unmap_iovs(struct vqueue_iovs* vqiovs);
+                    u_int flags,
+                    struct vqueue_mapped_list* mapped_list);
+void vqueue_unmap_iovs(struct vqueue_iovs* vqiovs,
+                       struct vqueue_mapped_list* mapped_list);
+
+int vqueue_unmap_memid(ext_mem_obj_id_t id,
+                       struct vqueue_mapped_list* mapped_list[],
+                       int list_cnt);
 
 int vqueue_add_buf(struct vqueue* vq, struct vqueue_buf* buf, uint32_t len);
 
diff --git a/lib/ubsan/exemptlist b/lib/ubsan/exemptlist
index a5773da..a8395a0 100644
--- a/lib/ubsan/exemptlist
+++ b/lib/ubsan/exemptlist
@@ -71,3 +71,13 @@
 # Exempt libfdt from implicit integer sign changes
 [implicit-integer-sign-change]
 src:external/dtc/libfdt/*
+
+# Exempt some libbinder functions from CFI because they make indirect
+# calls from C++ into Rust and the latter does not support CFI.
+# TODO(b/181755948): Remove these lines when that changes.
+[cfi-icall]
+# This is the file path as seen by clang
+src:frameworks/native/libs/binder/trusty/binder_rpc_unstable/../../libbinder_rpc_unstable.cpp
+src:frameworks/native/libs/binder/ndk/ibinder.cpp
+src:frameworks/native/libs/binder/ndk/parcel.cpp
+src:frameworks/native/libs/binder/trusty/rust/binder_rpc_server/sys/cpp/RpcServerTrustyRust.cpp
diff --git a/make/generic_compile.mk b/make/generic_compile.mk
index 036026e..dde729c 100644
--- a/make/generic_compile.mk
+++ b/make/generic_compile.mk
@@ -28,6 +28,7 @@
 # GENERIC_FLAGS : list of flags for the compiler
 # GENERIC_CFLAGS : list of flags for the compiler, when compiling C files.
 # GENERIC_CPPFLAGS : list of flags for the compiler, when compiling C++ files.
+# GENERIC_SRCDEPS : extra source dependencies
 
 # Validate arguments.
 ifeq ($(GENERIC_CC), )
@@ -63,29 +64,31 @@
 $(GENERIC_OBJS): CFLAGS := $(GENERIC_CFLAGS)
 $(GENERIC_OBJS): CPPFLAGS := $(GENERIC_CPPFLAGS)
 $(GENERIC_OBJS): ASMFLAGS := $(GENERIC_ASMFLAGS)
+$(GENERIC_OBJS): LOG_NAME := $(GENERIC_LOG_NAME)
 
-$(GENERIC_C_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.c
-	@echo building $@
+$(GENERIC_C_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.c $(GENERIC_SRCDEPS)
+	@$(call ECHO,$(LOG_NAME),building,$@)
 	@$(MKDIR)
 	$(NOECHO)$(CC) $(FLAGS) $(CFLAGS) -c $< -MMD -o $@
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building,$@)
 
-$(GENERIC_CC_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.cc
-	@echo building $@
+$(GENERIC_CC_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.cc $(GENERIC_SRCDEPS)
+	@$(call ECHO,$(LOG_NAME),building,$@)
 	@$(MKDIR)
 	$(NOECHO)$(CC) $(FLAGS) $(CPPFLAGS) -c $< -MMD -o $@
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building,$@)
 
-$(GENERIC_CPP_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.cpp
-	@echo building $@
+$(GENERIC_CPP_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.cpp $(GENERIC_SRCDEPS)
+	@$(call ECHO,$(LOG_NAME),building,$@)
 	@$(MKDIR)
 	$(NOECHO)$(CC) $(FLAGS) $(CPPFLAGS) -c $< -MMD -o $@
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building,$@)
 
-$(GENERIC_ASM_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.S
-	@echo building $@
+$(GENERIC_ASM_OBJS): $(GENERIC_OBJ_DIR)/%.o: %.S $(GENERIC_SRCDEPS)
+	@$(call ECHO,$(LOG_NAME),building,$@)
 	@$(MKDIR)
 	$(NOECHO)$(CC) $(FLAGS) $(ASMFLAGS) -c $< -MMD -o $@
-
-# Ensure recompilation on header file change.
--include $(GENERIC_OBJS:.o=.d)
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building,$@)
 
 # Cleanup inputs
 GENERIC_CC :=
@@ -95,6 +98,8 @@
 GENERIC_CFLAGS :=
 GENERIC_CPPFLAGS :=
 GENERIC_ASMFLAGS :=
+GENERIC_SRCDEPS :=
+GENERIC_LOG_NAME :=
 # Cleanup internal
 GENERIC_C_SRCS :=
 GENERIC_C_OBJS :=
diff --git a/make/host_lib.mk b/make/host_lib.mk
index b304a36..16e95bb 100644
--- a/make/host_lib.mk
+++ b/make/host_lib.mk
@@ -30,6 +30,7 @@
 # HOST_LIB_NAME : name of the library (required)
 # HOST_LIB_SRCS : list of source files (required)
 # HOST_LIB_FLAGS : list of flags for the compiler
+# HOST_LIB_VARIANT : suffix for the host lib to support build variants
 # HOST_INCLUDE_DIRS : list of include directories that all of the host tool/test depends on
 
 # output
@@ -44,29 +45,44 @@
 $(error HOST_LIB_SRCS must be specified)
 endif
 
+# Build a static archive variant if requested
+ifeq (true, $(call TOBOOL,$(HOST_STATIC_LINK)))
+HOST_LIB_FLAGS += -static
+HOST_LIB_VARIANT += -static
+endif
+
+HOST_LIB_ARCHIVE := $(BUILDDIR)/host_libs/lib$(HOST_LIB_NAME)$(HOST_LIB_VARIANT).a
+
+# Guard against multiple rules for the same targets which produces make warnings
+ifndef HEADER_GUARD_HOST_LIB_$(BUILDDIR)_$(HOST_LIB_NAME)_$(HOST_LIB_VARIANT)
+HEADER_GUARD_HOST_LIB_$(BUILDDIR)_$(HOST_LIB_NAME)_$(HOST_LIB_VARIANT):=1
+
 # Compile library sources.
 GENERIC_CC := $(HOST_CC)
 GENERIC_SRCS := $(HOST_LIB_SRCS)
-GENERIC_OBJ_DIR := $(BUILDDIR)/host_libs/obj/$(HOST_LIB_NAME)
-GENERIC_FLAGS := $(HOST_LIB_FLAGS) -O1 -g -Wall -Wextra -Wno-unused-parameter -Werror $(HOST_SANITIZER_FLAGS) $(addprefix -I, $(HOST_INCLUDE_DIRS))
+GENERIC_OBJ_DIR := $(BUILDDIR)/host_libs/obj/$(HOST_LIB_NAME)$(HOST_LIB_VARIANT)
+GENERIC_FLAGS := -O1 -g -Wall -Wextra -Wno-unused-parameter -Werror $(HOST_SANITIZER_FLAGS) $(HOST_LIB_FLAGS) $(addprefix -I, $(HOST_INCLUDE_DIRS))
 GENERIC_CFLAGS := -std=c11 -D_POSIX_C_SOURCE=200809 -Wno-missing-field-initializers
-GENERIC_CPPFLAGS := -std=c++17 $(HOST_LIBCXX_CPPFLAGS)
+GENERIC_CPPFLAGS := -std=c++20 $(HOST_LIBCXX_CPPFLAGS)
+GENERIC_LOG_NAME := $(HOST_LIB_NAME)
 include make/generic_compile.mk
 
 # Build static library
-HOST_LIB_ARCHIVE := $(BUILDDIR)/host_libs/lib$(HOST_LIB_NAME).a
+$(HOST_LIB_ARCHIVE): HOST_LIB_NAME := $(HOST_LIB_NAME)
 $(HOST_LIB_ARCHIVE): $(GENERIC_OBJS)
-	@echo linking $@
+	@$(call ECHO,$(HOST_LIB_NAME),aring,$@)
 	@$(MKDIR)
 	$(NOECHO)$(AR) crs $@ $^
+	@$(call ECHO_DONE_SILENT,$(HOST_LIB_NAME),aring,$@)
 
+endif
 HOST_LIB_ARCHIVES += $(HOST_LIB_ARCHIVE)
 
 # cleanup input variables
 HOST_LIB_NAME :=
 HOST_LIB_SRCS :=
 HOST_LIB_FLAGS :=
+HOST_LIB_VARIANT :=
 # cleanup internal variables
 HOST_LIB_ARCHIVE :=
 GENERIC_OBJS :=
-
diff --git a/make/host_test.mk b/make/host_test.mk
index 3a7e166..e377d1d 100644
--- a/make/host_test.mk
+++ b/make/host_test.mk
@@ -84,19 +84,22 @@
 GENERIC_CC := $(HOST_CC)
 GENERIC_SRCS := $(HOST_SRCS)
 GENERIC_OBJ_DIR := $(HOST_TEST_BUILDDIR)/host_tests/obj/$(HOST_TEST)
-GENERIC_FLAGS := $(addprefix -I, $(HOST_INCLUDE_DIRS)) $(HOST_FLAGS) -O1 -g -Wall -Wextra -Wno-unused-parameter -Werror $(HOST_SANITIZER_FLAGS)
+GENERIC_FLAGS := -O1 -g -Wall -Wextra -Wno-unused-parameter -Werror $(HOST_SANITIZER_FLAGS) $(HOST_FLAGS) $(addprefix -I, $(HOST_INCLUDE_DIRS))
 GENERIC_CFLAGS := -std=c11 -D_POSIX_C_SOURCE=200809 -Wno-missing-field-initializers
-GENERIC_CPPFLAGS := -std=c++17 -Wno-c99-designator $(HOST_LIBCXX_CPPFLAGS)
+GENERIC_CPPFLAGS := -std=c++20 -Wno-c99-designator $(HOST_LIBCXX_CPPFLAGS)
+GENERIC_LOG_NAME := $(HOST_TEST)
 include make/generic_compile.mk
 
 # Link
 HOST_TEST_BIN := $(HOST_TEST_BUILDDIR)/host_tests/$(HOST_TEST)
 $(HOST_TEST_BIN): CC := $(HOST_CC)
 $(HOST_TEST_BIN): LDFLAGS := -g $(HOST_SANITIZER_FLAGS) $(HOST_LDFLAGS) $(HOST_LIBCXX_LDFLAGS) $(addprefix -l, $(HOST_LIBS))
+$(HOST_TEST_BIN): HOST_TEST := $(HOST_TEST)
 $(HOST_TEST_BIN): $(GENERIC_OBJS) $(HOST_LIB_ARCHIVES)
-	@echo linking $@
+	@$(call ECHO,$(HOST_TEST),linking,$@)
 	@$(MKDIR)
 	$(NOECHO)$(CC) $^ $(LDFLAGS) -o $@
+	@$(call ECHO_DONE_SILENT,$(HOST_TEST),linking,$@)
 
 # Build host test by default
 all:: $(HOST_TEST_BIN)
diff --git a/make/host_tool.mk b/make/host_tool.mk
index e1a276a..dbcf26a 100644
--- a/make/host_tool.mk
+++ b/make/host_tool.mk
@@ -30,6 +30,9 @@
 # HOST_LIBS : list of host-provided libraries to link against
 # HOST_DEPS : list of libraries to build and link against. Recursive
 #             dependencies are not supported.
+# HOST_SRCDEPS : extra source dependencies
+# HOST_STATIC_LINK : statically link the host tool
+# HOST_COVERAGE_ENABLED : true/false enable LLVM Source-based code coverage
 
 # Validate arguments.
 ifeq ($(HOST_TOOL_NAME), )
@@ -41,8 +44,18 @@
 endif
 
 HOST_CC := $(CLANG_BINDIR)/clang
-# ASAN is not compatable with GDB.
+
+ifeq (false, $(call TOBOOL,$(HOST_STATIC_LINK)))
+# ASAN is not compatible with GDB or static linking.
 HOST_SANITIZER_FLAGS := -fsanitize=address -fno-omit-frame-pointer
+else
+HOST_FLAGS += -static
+HOST_LDFLAGS += -static
+HOST_SANITIZER_FLAGS :=
+# b/319927400: There is a bug that causes a linker conflict if pthread is
+# linked _after_ libc.  Add pthread explicitly to avoid this possibility.
+HOST_LDFLAGS += -lpthread
+endif
 
 # We should use the prebuilt linker rather than the host linker
 HOST_LDFLAGS += -B$(CLANG_BINDIR) -fuse-ld=lld
@@ -65,6 +78,18 @@
 
 HOST_INCLUDE_DIRS += $(GLOBAL_UAPI_INCLUDES) $(GLOBAL_SHARED_INCLUDES) $(GLOBAL_USER_INCLUDES)
 
+# Enable LLVM Source-based Code Coverage
+# https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
+ifeq (true,$(call TOBOOL,$(HOST_COVERAGE_ENABLED)))
+HOST_FLAGS += \
+	-fprofile-instr-generate=$(HOST_TOOL_NAME).profraw \
+	-fcoverage-mapping
+
+HOST_LDFLAGS += \
+	-fprofile-instr-generate=$(HOST_TOOL_NAME).profraw \
+	-fcoverage-mapping
+endif
+
 # Compile tool library dependencies
 HOST_LIB_ARCHIVES :=
 include $(addsuffix /rules.mk, $(HOST_DEPS))
@@ -73,19 +98,23 @@
 GENERIC_CC := $(HOST_CC)
 GENERIC_SRCS := $(HOST_SRCS)
 GENERIC_OBJ_DIR := $(BUILDDIR)/host_tools/obj/$(HOST_TOOL_NAME)
-GENERIC_FLAGS := $(addprefix -I, $(HOST_INCLUDE_DIRS)) $(HOST_FLAGS) -O1 -g -Wall -Wextra -Wno-unused-parameter -Werror $(HOST_SANITIZER_FLAGS)
+GENERIC_FLAGS := -O1 -g -Wall -Wextra -Wno-unused-parameter -Werror $(HOST_SANITIZER_FLAGS) $(HOST_FLAGS) $(addprefix -I, $(HOST_INCLUDE_DIRS))
 GENERIC_CFLAGS := -std=c11 -D_POSIX_C_SOURCE=200809 -Wno-missing-field-initializers
-GENERIC_CPPFLAGS := -std=c++17 $(HOST_LIBCXX_CPPFLAGS)
+GENERIC_CPPFLAGS := -std=c++20 $(HOST_LIBCXX_CPPFLAGS)
+GENERIC_SRCDEPS := $(HOST_SRCDEPS)
+GENERIC_LOG_NAME := $(HOST_TOOL_NAME)
 include make/generic_compile.mk
 
 # Link
 HOST_TOOL_BIN := $(BUILDDIR)/host_tools/$(HOST_TOOL_NAME)
 $(HOST_TOOL_BIN): CC := $(HOST_CC)
 $(HOST_TOOL_BIN): LDFLAGS := -g $(HOST_SANITIZER_FLAGS) $(HOST_LDFLAGS) $(HOST_LIBCXX_LDFLAGS) $(addprefix -l, $(HOST_LIBS))
+$(HOST_TOOL_BIN): HOST_TOOL_NAME := $(HOST_TOOL_NAME)
 $(HOST_TOOL_BIN): $(GENERIC_OBJS) $(HOST_LIB_ARCHIVES)
-	@echo linking $@
+	@$(call ECHO,$(HOST_TOOL_NAME),linking,$@)
 	@$(MKDIR)
 	$(NOECHO)$(CC) $^ $(LDFLAGS) -o $@
+	@$(call ECHO_DONE_SILENT,$(HOST_TOOL_NAME),linking,$@)
 
 EXTRA_BUILDDEPS += $(HOST_TOOL_BIN)
 
@@ -97,6 +126,8 @@
 HOST_LDFLAGS :=
 HOST_LIBS :=
 HOST_DEPS :=
+HOST_SRCDEPS :=
+HOST_STATIC_LINK :=
 # Cleanup internal
 HOST_CC :=
 HOST_SANITIZER_FLAGS :=
diff --git a/make/loadable_app.mk b/make/loadable_app.mk
index a2e9054..e2ec170 100644
--- a/make/loadable_app.mk
+++ b/make/loadable_app.mk
@@ -53,10 +53,12 @@
 LOADABLE_APP := $(patsubst %.elf,%.app,$(APP_ELF))
 
 $(INITIAL_APP): LOADABLE_APP_TOOL := $(LOADABLE_APP_TOOL)
+$(INITIAL_APP): LOG_NAME := $(APP_TOP_MODULE)
 $(INITIAL_APP): $(APP_ELF) $(APP_MANIFEST) $(LOADABLE_APP_TOOL)
 	@$(MKDIR)
-	@echo building $@ from $<
+	@$(call ECHO,$(LOG_NAME),building app,$@)
 	$(NOECHO)$(LOADABLE_APP_TOOL) -m build $@ $< $(word 2,$^)
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building app,$@)
 
 ifneq ($(APPLOADER_ENCRYPT_KEY_ID_FOR_$(APP_TOP_MODULE)),)
 APP_ENCRYPT_KEY_ID := $(APPLOADER_ENCRYPT_KEY_ID_FOR_$(APP_TOP_MODULE))
@@ -69,11 +71,13 @@
 $(ENCRYPTED_APP): LOADABLE_APP_TOOL := $(LOADABLE_APP_TOOL)
 $(ENCRYPTED_APP): APP_ENCRYPT_KEY_FILE := $(APP_ENCRYPT_KEY_FILE)
 $(ENCRYPTED_APP): APP_ENCRYPT_KEY_ID := $(APP_ENCRYPT_KEY_ID)
+$(ENCRYPTED_APP): LOG_NAME := $(APP_TOP_MODULE)
 $(ENCRYPTED_APP): $(INITIAL_APP) $(APP_ENCRYPT_KEY_FILE) $(LOADABLE_APP_TOOL)
 	@$(MKDIR)
-	@echo building $@ from $<
+	@$(call ECHO,$(LOG_NAME),building app,$@)
 	$(NOECHO)$(LOADABLE_APP_TOOL) -m encrypt $@ $< \
 		$(APP_ENCRYPT_KEY_FILE) $(APP_ENCRYPT_KEY_ID)
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building app,$@)
 
 UNSIGNED_APP := $(ENCRYPTED_APP)
 else
@@ -96,20 +100,24 @@
 $(LOADABLE_APP): LOADABLE_APP_TOOL := $(LOADABLE_APP_TOOL)
 $(LOADABLE_APP): APP_SIGN_KEY_FILE := $(APP_SIGN_KEY_FILE)
 $(LOADABLE_APP): APP_SIGN_KEY_ID := $(APP_SIGN_KEY_ID)
+$(LOADABLE_APP): LOG_NAME := $(APP_TOP_MODULE)
 $(LOADABLE_APP): $(UNSIGNED_APP) $(APP_SIGN_KEY_FILE) $(LOADABLE_APP_TOOL)
 	@$(MKDIR)
-	@echo building $@ from $<
+	@$(call ECHO,$(LOG_NAME),building app,$@)
 	$(NOECHO)$(LOADABLE_APP_TOOL) -m sign $@ $< \
 		$(APP_SIGN_KEY_FILE) $(APP_SIGN_KEY_ID)
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building app,$@)
 else
 # If we don't have a signature file, just use the unsigned file as the output
 # This is needed because modules that import loadable apps, e.g.,
 # app-mgmt-test, need the app files to exist
 # Note: apploader will refuse to load the unsigned application
+$(LOADABLE_APP): LOG_NAME := $(APP_TOP_MODULE)
 $(LOADABLE_APP): $(UNSIGNED_APP)
 	@$(MKDIR)
-	@echo copying $< to $@
+	@$(call ECHO,$(LOG_NAME),building app,$@)
 	@cp $< $@
+	@$(call ECHO_DONE_SILENT,$(LOG_NAME),building app,$@)
 
 $(warning Loadable application is not signed: $(LOADABLE_APP))
 endif
diff --git a/platform/generic-arm64/platform.c b/platform/generic-arm64/platform.c
index f36d61c..22dff25 100644
--- a/platform/generic-arm64/platform.c
+++ b/platform/generic-arm64/platform.c
@@ -53,7 +53,7 @@
 #if GIC_VERSION < 4
 #define GICR_SIZE (0x20000 * SMP_MAX_CPUS)
 #else
-#define GICR_SIZE (0x30000 * SMP_MAX_CPUS)
+#define GICR_SIZE (0x40000 * SMP_MAX_CPUS)
 #endif
 #endif
 
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..cefaa42
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,5 @@
+# Android Format Style
+
+edition = "2021"
+use_small_heuristics = "Max"
+newline_style = "Unix"
diff --git a/services/smc/smc_service.c b/services/smc/smc_service.c
index 247fa3a..8f1a5b2 100644
--- a/services/smc/smc_service.c
+++ b/services/smc/smc_service.c
@@ -93,7 +93,7 @@
     struct smc_channel_ctx* channel_ctx = ctx;
     int rc;
     struct smc_msg request;
-    struct smc_msg response = {0};
+    struct smc_response response = {0};
     struct smc_regs ret;
     uint32_t smc_nr;
 
@@ -108,7 +108,13 @@
     if (rc != NO_ERROR) {
         TRACEF("%s: failed (%d) client not allowed to call SMC number %x\n",
                __func__, rc, smc_nr);
-        response.params[0] = (ulong)ERR_ACCESS_DENIED;
+        /*
+         * callers of smc_read_response should not consume the struct smc_msg
+         * out parameter if the status code is negative but we write the error
+         * code into the message anyway out of an over-abundance of caution.
+         */
+        response.msg.params[0] = (ulong)ERR_ACCESS_DENIED;
+        response.rc = ERR_ACCESS_DENIED;
         goto send_response;
     }
 
@@ -117,7 +123,9 @@
     if (rc != NO_ERROR) {
         TRACEF("%s: failed (%d) invalid request for SMC number %x\n", __func__,
                rc, smc_nr);
-        response.params[0] = (ulong)ERR_INVALID_ARGS;
+        /* same reasoning as the ERR_ACCESS_DENIED case above */
+        response.msg.params[0] = (ulong)ERR_INVALID_ARGS;
+        response.rc = ERR_INVALID_ARGS;
         goto send_response;
     }
 
@@ -129,10 +137,15 @@
     };
     ret = smc(&args);
 
-    response.params[0] = ret.r0;
-    response.params[1] = ret.r1;
-    response.params[2] = ret.r2;
-    response.params[3] = ret.r3;
+    response.msg.params[0] = ret.r0;
+    response.msg.params[1] = ret.r1;
+    response.msg.params[2] = ret.r2;
+    response.msg.params[3] = ret.r3;
+
+    if ((int32_t)ret.r0 == SM_ERR_UNDEFINED_SMC) {
+        TRACEF("%s: unknown or failed smcall: %x\n", __func__, smc_nr);
+        response.rc = ERR_GENERIC;
+    }
 
 send_response:
     rc = ktipc_send(channel, &response, sizeof(response));
@@ -183,7 +196,7 @@
 const static struct ktipc_port smc_service_port = {
         .name = SMC_SERVICE_PORT,
         .uuid = &kernel_uuid,
-        .msg_max_size = sizeof(struct smc_msg),
+        .msg_max_size = sizeof(struct smc_response),
         .msg_queue_len = 1,
         .acl = &smc_service_port_acl,
         .priv = NULL,