ART: Fast copy stack mask

StackMap::SetStackMask will currently copy a BitVector into a Memory-
Region bit by bit. This patch adds a new function for copying the data
with memcpy.

This is resubmission of CL I28d45a590b35a4a854cca2f57db864cf8a081487
but with a fix for a broken test which it revealed.

Change-Id: Ib65aa614d3ab7b5c99c6719fdc8e436466a4213d
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 98e14ea..666fb60 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -136,7 +136,7 @@
 
   ArenaBitVector sp_mask2(&arena, 0, true);
   sp_mask2.SetBit(3);
-  sp_mask1.SetBit(8);
+  sp_mask2.SetBit(8);
   stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(Kind::kInRegister, 18);     // Short location.
   stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3);   // Short location.
@@ -148,7 +148,7 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  ASSERT_EQ(1u, code_info.GetStackMaskSize());
+  ASSERT_EQ(2u, code_info.GetStackMaskSize());
   ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
   uint32_t number_of_location_catalog_entries =
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 17835f5..afa8dc1 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -21,6 +21,7 @@
 #include <iterator>
 
 #include "base/bit_utils.h"
+#include "globals.h"
 
 namespace art {
 
@@ -229,6 +230,19 @@
   // Number of bits set in range [0, end) in storage. (No range check.)
   static uint32_t NumSetBits(const uint32_t* storage, uint32_t end);
 
+  // Fill given memory region with the contents of the vector and zero padding.
+  void CopyTo(void* dst, size_t len) const {
+    DCHECK_LE(static_cast<size_t>(GetHighestBitSet() + 1), len * kBitsPerByte);
+    size_t vec_len = GetSizeOf();
+    if (vec_len < len) {
+      void* dst_padding = reinterpret_cast<uint8_t*>(dst) + vec_len;
+      memcpy(dst, storage_, vec_len);
+      memset(dst_padding, 0, len - vec_len);
+    } else {
+      memcpy(dst, storage_, len);
+    }
+  }
+
   void Dump(std::ostream& os, const char* prefix) const;
 
  private:
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index c51b9b0..19c01f2 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -211,4 +211,62 @@
   }
 }
 
+TEST(BitVector, CopyTo) {
+  {
+    // Test copying an empty BitVector. Padding should fill `buf` with zeroes.
+    BitVector bv(0, true, Allocator::GetMallocAllocator());
+    uint32_t buf;
+
+    bv.CopyTo(&buf, sizeof(buf));
+    EXPECT_EQ(0u, bv.GetSizeOf());
+    EXPECT_EQ(0u, buf);
+  }
+
+  {
+    // Test copying when `bv.storage_` and `buf` are of equal lengths.
+    BitVector bv(0, true, Allocator::GetMallocAllocator());
+    uint32_t buf;
+
+    bv.SetBit(0);
+    bv.SetBit(17);
+    bv.SetBit(26);
+    EXPECT_EQ(sizeof(buf), bv.GetSizeOf());
+
+    bv.CopyTo(&buf, sizeof(buf));
+    EXPECT_EQ(0x04020001u, buf);
+  }
+
+  {
+    // Test copying when the `bv.storage_` is longer than `buf`. As long as
+    // `buf` is long enough to hold all set bits, copying should succeed.
+    BitVector bv(0, true, Allocator::GetMallocAllocator());
+    uint8_t buf[5];
+
+    bv.SetBit(18);
+    bv.SetBit(39);
+    EXPECT_LT(sizeof(buf), bv.GetSizeOf());
+
+    bv.CopyTo(buf, sizeof(buf));
+    EXPECT_EQ(0x00u, buf[0]);
+    EXPECT_EQ(0x00u, buf[1]);
+    EXPECT_EQ(0x04u, buf[2]);
+    EXPECT_EQ(0x00u, buf[3]);
+    EXPECT_EQ(0x80u, buf[4]);
+  }
+
+  {
+    // Test zero padding when `bv.storage_` is shorter than `buf`.
+    BitVector bv(0, true, Allocator::GetMallocAllocator());
+    uint32_t buf[2];
+
+    bv.SetBit(18);
+    bv.SetBit(31);
+    EXPECT_GT(sizeof(buf), bv.GetSizeOf());
+
+    bv.CopyTo(buf, sizeof(buf));
+    EXPECT_EQ(0x80040000U, buf[0]);
+    EXPECT_EQ(0x00000000U, buf[1]);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index eefdaa7..ba0b6d6 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -671,9 +671,7 @@
 
   void SetStackMask(const CodeInfo& info, const BitVector& sp_map) {
     MemoryRegion region = GetStackMask(info);
-    for (size_t i = 0; i < region.size_in_bits(); i++) {
-      region.StoreBit(i, sp_map.IsBitSet(i));
-    }
+    sp_map.CopyTo(region.start(), region.size());
   }
 
   bool HasDexRegisterMap(const CodeInfo& info) const {