libbrillo: lock SecureAllocator object memory

In order to make sure that the SecureAllocator doesn't leak
memory to other processes or gets swapped out, make sure that

 1. Use page-aligned memory so that it can be locked (therefore, use mmap()
    instead of malloc()).

 2. Always allocate memory in multiples of pages: this adds a memory overhead
    of ~1 page for each object. Moreover, the extra memory is not available
    for the allocated object to expand into: the container expects that the
    memory allocated to it matches the size set in reserve().

 3. Mark the pages as undumpable and unmergeable. We also use MADV_WIPEONFORK
    to allow child processes to function normally: otherwise they would either
    leak secure memory through copy-on-write (MADV_DOFORK) or SEGFAULT if the
    secure memory mappings were not copied to the forked process
    (MADV_DONTFORK). MADV_WIPEONFORK also wipes the anonymous private mmap
    for the forked process and ensures that the forked process cannot access
    the secure object.

    [This CL depends on MADV_WIPEONFORK backport to all kernels < 4.14.]

BUG=chromium:728047
TEST=unittests.

Cq-Depend: chromium:1372458,chromium:1372580,chromium:1372369
Cq-Depend: chromium:1372809,chromium:1372808,chromium:1504816
Change-Id: Ibb67e3c9582cb79905ad5d52b1843814266cd33d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1340437
Tested-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
Commit-Queue: Sarthak Kukreti <sarthakkukreti@chromium.org>
Reviewed-by: Eric Caruso <ejcaruso@chromium.org>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Cr-Mirrored-From: https://chromium.googlesource.com/chromiumos/platform2
Cr-Mirrored-Commit: d0b5f425c2c2868307bb27bbab11aef8199810b2
diff --git a/brillo/secure_allocator.h b/brillo/secure_allocator.h
index 0cbb8d9..9f05ded 100644
--- a/brillo/secure_allocator.h
+++ b/brillo/secure_allocator.h
@@ -5,13 +5,51 @@
 #ifndef LIBBRILLO_BRILLO_SECURE_ALLOCATOR_H_
 #define LIBBRILLO_BRILLO_SECURE_ALLOCATOR_H_
 
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <limits>
 #include <memory>
 
+#include <base/callback_helpers.h>
+#include <base/logging.h>
 #include <brillo/brillo_export.h>
 
 namespace brillo {
 // SecureAllocator is a stateless derivation of std::allocator that clears
-// the contents of the object on deallocation.
+// the contents of the object on deallocation. Additionally, to prevent the
+// memory from being leaked, we use the following defensive mechanisms:
+//
+// 1. Use page-aligned memory so that it can be locked (therefore, use mmap()
+//    instead of malloc()). Note that mlock()s are not inherited over fork(),
+//
+// 2. Always allocate memory in multiples of pages: this adds a memory overhead
+//    of ~1 page for each object. Moreover, the extra memory is not available
+//    for the allocated object to expand into: the container expects that the
+//    memory allocated to it matches the size set in reserve().
+// TODO(sarthakkukreti): Figure out if it is possible to propagate the real
+// capacity to the container without an intrusive change to the STL.
+// [Example: allow __recommend() override in allocators for containers.]
+//
+// 3. Mark the memory segments as undumpable, unmergeable.
+//
+// 4. Use MADV_WIPEONFORK:
+//    this results in a new anonymous vma instead of copying over the contents
+//    of the secure object after a fork(). By default [MADV_DOFORK], the vma is
+//    marked as copy-on-write, and the first process which writes to the secure
+//    object after fork get a new copy. This may break the security guarantees
+//    setup above. Another alternative is to use MADV_DONTFORK which results in
+//    the memory mapping not getting copied over to child process at all: this
+//    may result in cases where if the child process gets segmentation faults
+//    on attempts to access virtual addresses in the secure object's address
+//    range,
+//
+//    With MADV_WIPEONFORK, the child processes can access the secure object
+//    memory safely, but the contents of the secure object appear as zero to
+//    the child process. Note that threads share the virtual address space and
+//    secure objects would be transparent across threads.
+// TODO(sarthakkukreti): Figure out patterns to pass secure data over fork().
 template <typename T>
 class BRILLO_PRIVATE SecureAllocator : public std::allocator<T> {
  public:
@@ -19,21 +57,113 @@
   using typename std::allocator<T>::size_type;
   using typename std::allocator<T>::value_type;
 
-  // Implicit std::allocator constructors.
+  // Constructors that wrap over std::allocator.
+  // Make sure that the allocator's static members are only allocated once.
+  SecureAllocator() noexcept : std::allocator<T>() {}
+  SecureAllocator(const SecureAllocator& other) noexcept
+      : std::allocator<T>(other) {}
+
+  template <class U>
+  SecureAllocator(const SecureAllocator<U>& other) noexcept
+      : std::allocator<T>(other) {}
 
   template <typename U> struct rebind {
     typedef SecureAllocator<U> other;
   };
 
-  // Allocation/deallocation: use the std::allocation functions but make sure
-  // that on deallocation, the contents of the element are cleared out.
-  pointer allocate(size_type  n, pointer = {}) {
-    return std::allocator<T>::allocate(n);
+  // Return cached max_size. Deprecated in C++17, removed in C++20.
+  size_type max_size() const { return max_size_; }
+
+  // Allocation: allocate ceil(size/pagesize) for holding the data.
+  pointer allocate(size_type n, pointer hint = nullptr) {
+    pointer buffer = nullptr;
+    // Check if n can be theoretically allocated.
+    CHECK_LT(n, max_size());
+
+    // Check if n = 0: there's nothing to allocate;
+    if (n == 0)
+      return nullptr;
+
+    // Calculate the page-aligned buffer size.
+    size_type buffer_size = CalculatePageAlignedBufferSize(n);
+
+    // Memory locking granularity is per-page: mmap ceil(size/page size) pages.
+    buffer = reinterpret_cast<pointer>(
+        mmap(nullptr, buffer_size, PROT_READ | PROT_WRITE,
+             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+    if (buffer == MAP_FAILED)
+      return nullptr;
+
+    // Lock buffer into physical memory.
+    if (mlock(buffer, buffer_size)) {
+      munmap(buffer, buffer_size);
+      return nullptr;
+    }
+
+    // Deallocate buffer on the return path if any operation fails.
+    base::ScopedClosureRunner buffer_cleanup_closure_runner(
+        base::Bind(&SecureAllocator<T>::deallocate, base::Unretained(this),
+                   buffer, buffer_size));
+
+    // Mark memory as non dumpable in a core dump.
+    if (madvise(buffer, buffer_size, MADV_DONTDUMP))
+      return nullptr;
+
+    // Mark memory as non mergeable with another page, even if the contents
+    // are the same.
+    if (madvise(buffer, buffer_size, MADV_UNMERGEABLE)) {
+      // MADV_UNMERGEABLE is only available if the kernel has been configured
+      // with CONFIG_KSM set. If the CONFIG_KSM flag has not been set, then
+      // pages are not mergeable so this madvise option is not necessary.
+      //
+      // In the case where CONFIG_KSM is not set, EINVAL is the error set.
+      // Since this error value is expected in some cases, we don't return a
+      // nullptr.
+      if (errno != EINVAL)
+        return nullptr;
+    }
+
+    // Make this mapping available to child processes but don't copy data from
+    // the secure object's pages during fork. With MADV_DONTFORK, the
+    // vma is not mapped in the child process which leads to segmentation
+    // faults if the child process tries to access this address. For example,
+    // if the parent process creates a SecureObject, forks() and the child
+    // process tries to call the destructor at the virtual address.
+    if (madvise(buffer, buffer_size, MADV_WIPEONFORK))
+      return nullptr;
+
+    ignore_result(buffer_cleanup_closure_runner.Release());
+
+    // Allocation was successful.
+    return buffer;
+  }
+
+  // Destroy object before deallocation. Deprecated in C++17, removed in C++20.
+  // After destroying the object, clear the contents of where the object was
+  // stored.
+  template <class U>
+  void destroy(U* p) {
+    // Return if the pointer is invalid.
+    if (!p)
+      return;
+    std::allocator<U>::destroy(p);
+    clear_contents(p, sizeof(U));
   }
 
   virtual void deallocate(pointer p, size_type n) {
-    clear_contents(p, n * sizeof(value_type));
-    std::allocator<T>::deallocate(p, n);
+    // Check if n can be theoretically deallocated.
+    CHECK_LT(n, max_size());
+
+    // Check if n = 0 or p is a nullptr: there's nothing to deallocate;
+    if (n == 0 || !p)
+      return;
+
+    // Calculate the page-aligned buffer size.
+    size_type buffer_size = CalculatePageAlignedBufferSize(n);
+
+    clear_contents(p, buffer_size);
+    munlock(p, buffer_size);
+    munmap(p, buffer_size);
   }
 
  protected:
@@ -54,8 +184,53 @@
   }
 
 #undef __attribute_no_opt
+
+ private:
+  // Calculate the page-aligned buffer size.
+  size_t CalculatePageAlignedBufferSize(size_type n) {
+    size_type real_size = n * sizeof(value_type);
+    size_type page_aligned_remainder = real_size % page_size_;
+    size_type padding =
+        page_aligned_remainder != 0 ? page_size_ - page_aligned_remainder : 0;
+    return real_size + padding;
+  }
+
+  static size_t CalculatePageSize() {
+    long ret = sysconf(_SC_PAGESIZE);  // NOLINT [runtime/int]
+
+    // Initialize page size.
+    CHECK_GT(ret, 0L);
+    return ret;
+  }
+
+  // Since the allocator reuses page size and max size consistently,
+  // cache these values initially and reuse.
+  static size_t GetMaxSizeForType(size_t page_size) {
+    // Initialize max size that can be theoretically allocated.
+    // Calculate the max size that is page-aligned.
+    size_t max_theoretical_size = std::numeric_limits<size_type>::max();
+    size_t max_page_aligned_size =
+        max_theoretical_size - (max_theoretical_size % page_size);
+
+    return max_page_aligned_size / sizeof(value_type);
+  }
+
+  // Page size on system.
+  static const size_type page_size_;
+  // Max theoretical count for type on system.
+  static const size_type max_size_;
 };
 
+// Inline definitions are only allowed for static const members with integral
+// constexpr initializers, define static members of SecureAllocator types here.
+template <typename T>
+const typename SecureAllocator<T>::size_type SecureAllocator<T>::page_size_ =
+    SecureAllocator<T>::CalculatePageSize();
+
+template <typename T>
+const typename SecureAllocator<T>::size_type SecureAllocator<T>::max_size_ =
+    SecureAllocator<T>::GetMaxSizeForType(SecureAllocator<T>::page_size_);
+
 }  // namespace brillo
 
 #endif  // LIBBRILLO_BRILLO_SECURE_ALLOCATOR_H_
diff --git a/brillo/secure_blob.h b/brillo/secure_blob.h
index e06646d..8099c56 100644
--- a/brillo/secure_blob.h
+++ b/brillo/secure_blob.h
@@ -16,6 +16,9 @@
 namespace brillo {
 
 using Blob = std::vector<uint8_t>;
+
+// Define types based on the SecureAllocator for instantiation.
+// ------------------------------------------------------------
 // Define SecureVector as a vector using a SecureAllocator.
 // Over time, the goal is to remove the differentiating functions
 // from SecureBlob (to_string(), char_data()) till it converges with
diff --git a/brillo/secure_blob_test.cc b/brillo/secure_blob_test.cc
index 75f3cfb..7242e86 100644
--- a/brillo/secure_blob_test.cc
+++ b/brillo/secure_blob_test.cc
@@ -234,16 +234,18 @@
  public:
   using typename SecureAllocator<T>::pointer;
   using typename SecureAllocator<T>::size_type;
+  using typename SecureAllocator<T>::value_type;
 
   int GetErasedCount() { return erased_count; }
 
  protected:
   void clear_contents(pointer p, size_type n) override {
     SecureAllocator<T>::clear_contents(p, n);
+    unsigned char *v = reinterpret_cast<unsigned char*>(p);
     for (int i = 0; i < n; i++) {
-      EXPECT_EQ(p[i], 0);
+      EXPECT_EQ(v[i], 0);
+      erased_count++;
     }
-    erased_count++;
   }
 
  private:
@@ -259,7 +261,54 @@
 
   // Deallocate memory; the mock class should check for cleared data.
   e.deallocate(test_string_addr, 15);
-  EXPECT_EQ(e.GetErasedCount(), 1);
+  // The deallocation should have traversed the complete page.
+  EXPECT_EQ(e.GetErasedCount(), 4096);
 }
 
+TEST(SecureAllocator, MultiPageCorrectness) {
+  // Make sure that the contents are cleared on deallocation.
+  TestSecureAllocator<uint64_t> e;
+
+  // Allocate 4100*8 bytes.
+  uint64_t *test_array = e.allocate(4100);
+
+  // Check if the space was correctly allocated for long long.
+  for (int i = 0; i < 4100; i++)
+      test_array[i] = 0xF0F0F0F0F0F0F0F0;
+
+  // Deallocate memory; the mock class should check for cleared data.
+  e.deallocate(test_array, 4100);
+  // 36864 bytes is the next highest size that is a multiple of the page size.
+  EXPECT_EQ(e.GetErasedCount(), 36864);
+}
+
+// DeathTests fork a new process and check how it proceeds. Take advantage
+// of this and check if the value of SecureString is passed on to
+// forked children.
+#if GTEST_IS_THREADSAFE
+// Check if the contents of the container are zeroed out.
+void CheckPropagationOnFork(const brillo::SecureBlob& forked_blob,
+                            const Blob& reference) {
+  LOG(INFO) << forked_blob.to_string();
+  for (int i = 0; i < forked_blob.size(); i++) {
+    CHECK_NE(reference[i], forked_blob[i]);
+    CHECK_EQ(forked_blob[i], 0);
+  }
+  exit(0);
+}
+
+TEST(SecureAllocatorDeathTest, ErasureOnFork) {
+  Blob reference = BlobFromString("Test String");
+  SecureBlob erasable_blob(reference.begin(), reference.end());
+
+  EXPECT_EXIT(CheckPropagationOnFork(erasable_blob, reference),
+              ::testing::ExitedWithCode(0), "");
+
+  // In the original process, check the SecureBlob to see if it has not
+  // changed.
+  for (int i = 0; i < erasable_blob.size(); i++)
+    EXPECT_EQ(erasable_blob[i], reference[i]);
+}
+#endif  // GTEST_IS_THREADSAFE
+
 }  // namespace brillo