VIXL Release 1.6

Refer to the README.md and LICENCE files for details.

Change-Id: Ieefe83cf5cf5e1ab8c924b0e7dc03af6a55053ae
Signed-off-by: Alexandre Rames <alexandre.rames@arm.com>
diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py
new file mode 100644
index 0000000..0525e4f
--- /dev/null
+++ b/.ycm_extra_conf.py
@@ -0,0 +1,35 @@
+# Vim YouCompleteMe completion configuration.
+#
+# See doc/topics/ycm.md for details.
+
+import os
+import platform
+
+repo_root = os.path.dirname(os.path.abspath(__file__))
+
+# Paths in the compilation flags must be absolute to allow ycm to find them from
+# any working directory.
+def AbsolutePath(path):
+  return os.path.join(repo_root, path)
+
+flags = [
+  '-I', AbsolutePath('src'),
+  '-Wall',
+  '-Werror',
+  '-Wextra',
+  '-pedantic',
+  '-Wwrite-strings',
+  '-std=c++',
+  '-x', 'c++'
+]
+
+if platform.machine() != 'aarch64':
+  flags.append('-DUSE_SIMULATOR')
+
+
+def FlagsForFile(filename, **kwargs):
+  return {
+    'flags': flags,
+    'do_cache': True
+  }
+
diff --git a/Android.mk b/Android.mk
index 5ebdf78..018337b 100644
--- a/Android.mk
+++ b/Android.mk
@@ -58,6 +58,7 @@
 
 vixl_src_files := \
 	src/a64/assembler-a64.cc \
+	src/code-buffer.cc  \
 	src/a64/cpu-a64.cc \
 	src/a64/debugger-a64.cc \
 	src/a64/decoder-a64.cc \
diff --git a/README.md b/README.md
index 19f2ffd..d4baa86 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-VIXL: AArch64 Runtime Code Generation Library Version 1.5
+VIXL: AArch64 Runtime Code Generation Library Version 1.6
 =========================================================
 
 Contents:
@@ -150,7 +150,17 @@
 A short introduction to using VIXL can be found [here](doc/getting-started.md).
 Example source code is provided in the `examples` directory. You can build all
 the examples with `scons examples` from the root directory, or use
-`scons list_targets=yes` to get a detailed list of available build targets.
+`scons --help` to get a detailed list of available build targets.
+
+
+Using VIXL
+----------
+
+On top of the [here](doc/getting-started) page and the examples, you can find
+documentation and guides on various topics that may be of help
+[here](doc/topics/index.md).
+
+
 
 
 
diff --git a/SConstruct b/SConstruct
index ad0f899..4b9ab8e 100644
--- a/SConstruct
+++ b/SConstruct
@@ -26,6 +26,7 @@
 
 import os
 import os.path
+import platform
 import subprocess
 import sys
 
@@ -33,10 +34,24 @@
 sys.path.insert(0, os.path.join(root_dir, 'tools'))
 import util
 
+
+Help('''
+Build system for the VIXL project.
+See README.md for documentation and details about the build system.
+Some common build targets are:
+    scons            # Build the VIXL library and test utility.
+    scons examples   # Build all the examples.
+    scons benchmarks # Build all the benchmarks.
+    scons all        # Build everything.
+
+''')
+
+
 # Global configuration.
 PROJ_SRC_DIR   = 'src'
 PROJ_SRC_FILES = '''
 src/utils-vixl.cc
+src/code-buffer.cc
 src/a64/assembler-a64.cc
 src/a64/macro-assembler-a64.cc
 src/a64/instructions-a64.cc
@@ -60,6 +75,8 @@
 examples/swap-int32.cc
 examples/check-bounds.cc
 examples/getting-started.cc
+examples/non-const-visitor.cc
+examples/custom-disassembler.cc
 '''.split()
 # List target specific files.
 # Target names are used as dictionary entries.
@@ -114,9 +131,9 @@
 args = Variables()
 args.Add(EnumVariable('mode', 'Build mode', 'release',
                       allowed_values = ['release', 'debug']))
-args.Add(EnumVariable('simulator', 'build for the simulator', 'on',
+sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
+args.Add(EnumVariable('simulator', 'build for the simulator', sim_default,
                       allowed_values = ['on', 'off']))
-args.Add(BoolVariable('list_targets', 'List top level targets available.', 0))
 
 # Configure the environment.
 create_variant(RELEASE_OBJ_DIR, TARGET_SRC_DIR)
@@ -124,7 +141,7 @@
 env = Environment(variables=args)
 
 # Commandline help.
-Help(args.GenerateHelpText(env))
+Help(args.GenerateHelpText(env) + '\n')
 
 # Abort if any invalid argument was passed.
 # This check must happened after an environment is created.
@@ -237,10 +254,7 @@
 # Create a simple alias to build everything with the current options.
 create_alias('all', targets)
 
-if env['list_targets']:
-  print 'Available targets:'
-  print '\t' + '\n\t'.join(target_alias_names)
-  sys.exit(0);
+Help('Available top level targets:\n' + '\t' + '\n\t'.join(target_alias_names) + '\n')
 
 # By default, only build the cctests.
 Default(libvixl, cctest)
diff --git a/doc/changelog.md b/doc/changelog.md
index cbb1de6..bc249a2 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,6 +1,20 @@
 VIXL Change Log
 ===============
 
+* 1.6
+    + Make literal pool management the responsibility of the macro assembler.
+    + Move code buffer management out of the Assembler.
+    + Support `ldrsw` for literals.
+    + Support binding a label to a specific offset.
+    + Add macro assembler support for load/store pair with arbitrary offset.
+    + Support Peek and Poke for CPURegLists.
+    + Fix disassembly of branch targets.
+    + Fix Decoder visitor insertion order.
+    + Separate Decoder visitors into const and non-const variants.
+    + Fix simulator for branches to tagged addresses.
+    + Add a VIM YouCompleteMe configuration file.
+    + Other small bug fixes and build system improvements.
+
 * 1.5
     + Tagged pointer support.
     + Implement support for exclusive access instructions.
diff --git a/doc/supported-instructions.md b/doc/supported-instructions.md
index 26d80cf..a5bde8b 100644
--- a/doc/supported-instructions.md
+++ b/doc/supported-instructions.md
@@ -507,9 +507,16 @@
 
 ### ldr ###
 
-Load double precision floating point literal to FP register.
+Load integer or FP register from literal pool.
 
-    void ldr(const FPRegister& ft, double imm)
+    void ldr(const CPURegister& rt, RawLiteral* literal)
+
+
+### ldr ###
+
+Load integer or FP register from pc + imm19 << 2.
+
+    void ldr(const CPURegister& rt, int imm19)
 
 
 ### ldr ###
@@ -520,20 +527,6 @@
              LoadStoreScalingOption option = PreferScaledOffset)
 
 
-### ldr ###
-
-Load literal to register.
-
-    void ldr(const Register& rt, uint64_t imm)
-
-
-### ldr ###
-
-Load single precision floating point literal to FP register.
-
-    void ldr(const FPRegister& ft, float imm)
-
-
 ### ldrb ###
 
 Load byte.
@@ -568,6 +561,20 @@
 
 ### ldrsw ###
 
+Load word with sign extension from literal pool.
+
+    void ldrsw(const Register& rt, RawLiteral* literal)
+
+
+### ldrsw ###
+
+Load word with sign extension from pc + imm19 << 2.
+
+    void ldrsw(const Register& rt, int imm19)
+
+
+### ldrsw ###
+
 Load word with sign extension.
 
     void ldrsw(const Register& rt, const MemOperand& src,
@@ -1578,4 +1585,11 @@
     inline void dci(Instr raw_inst)
 
 
+### place ###
+
+Place a literal at the current PC.
+
+    void place(RawLiteral* literal)
+
+
 
diff --git a/doc/topics/extending-the-disassembler.md b/doc/topics/extending-the-disassembler.md
new file mode 100644
index 0000000..d30770d
--- /dev/null
+++ b/doc/topics/extending-the-disassembler.md
@@ -0,0 +1,54 @@
+Extending the disassembler
+==========================
+
+The output of the disassembler can be extended and customized. This may be
+useful for example to add comments and annotations to the disassembly or print
+aliases for register names.
+
+The general procedure to achieve this is to create a sub-class of
+`Disassembler` and override the appropriate virtual functions.
+
+The `Disassembler` class provides virtual methods that implement how specific
+disassembly elements are printed. See
+[src/a64/disasm-a64.h](/src/a64/disasm-a64.h) for details.  At the time of
+writing, these are
+
+    virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                            CPURegister::RegisterType reg_type,
+                                            unsigned reg_code,
+                                            unsigned reg_size);
+    virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                                int64_t offset);
+    virtual void AppendAddressToOutput(const Instruction* instr,
+                                       const void* addr);
+    virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                           const void* addr);
+    virtual void AppendDataAddressToOutput(const Instruction* instr,
+                                           const void* addr);
+
+They can be overridden for example to use different register names and annotate
+code addresses.
+
+More complex modifications can be performed by overriding the visitor functions
+of the disassembler. The VIXL `Decoder` uses a visitor pattern implementation,
+so the `Disassembler` (as a sub-class of `DecoderVisitor`) must provide a
+visitor function for each sub-type of instructions. The complete list of
+visitors is defined by the macro `VISITOR_LIST` in
+[src/a64/decoder-a64.h](/src/a64/decoder-a64.h).
+
+The [/examples/custom-disassembler.h](/examples/custom-disassembler.h) and
+[/examples/custom-disassembler.cc](/examples/custom-disassembler.cc) example
+files show how the methods can be overridden to use different register names,
+annotate code addresses, and add comments:
+
+    VIXL disasm:   add x10, x16, x17
+    custom disasm: add x10, ip0, ip1 // add/sub to x10
+
+    VIXL disasm:   cbz x10, #+0x28 (addr 0x7fff8843bf6c)
+    custom disasm: cbz x10, #+0x28 (addr 0x7fff8843bf6c) (function: foo)
+
+
+One can refer to the implementation of visitor functions for the `Disassembler`
+(in [src/a64/disasm-a64.cc](/src/a64/disasm-a64.cc)) or even for the `Simulator`
+(in [src/a64/simulator-a64.cc](/src/a64/simulator-a64.cc)) to see how to extract
+information from instructions.
diff --git a/doc/topics/index.md b/doc/topics/index.md
new file mode 100644
index 0000000..d41074d
--- /dev/null
+++ b/doc/topics/index.md
@@ -0,0 +1,8 @@
+We will try to add documentation for topics that may be useful to VIXL users. If
+you think of any topic that may be useful and is not listed here, please contact
+us at <vixl@arm.com>.
+
+You can also have a look at the ['getting started' page](doc/getting-started).
+
+* [Extending and customizing the disassembler](extending-the-disassembler.md)
+* [Using VIM YouCompleteMe with VIXL](ycm.md)
diff --git a/doc/topics/ycm.md b/doc/topics/ycm.md
new file mode 100644
index 0000000..36371cf
--- /dev/null
+++ b/doc/topics/ycm.md
@@ -0,0 +1,9 @@
+VIM YouCompleteMe for VIXL
+==========================
+
+[YouCompleteMe](https://github.com/Valloric/YouCompleteMe) is a code completion
+engine for VIM. VIXL includes a `.ycm_extra_conf.py` to configure YCM to work in
+the VIXL repository.
+
+All you need to do to get things working is to [install YCM](https://github.com/Valloric/YouCompleteMe#full-installation-guide),
+preferably with semantic completion for C-family languages.
diff --git a/examples/abs.cc b/examples/abs.cc
index 187ef43..319b8f2 100644
--- a/examples/abs.cc
+++ b/examples/abs.cc
@@ -43,6 +43,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -64,4 +65,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/add3-double.cc b/examples/add3-double.cc
index 088144f..4b5b889 100644
--- a/examples/add3-double.cc
+++ b/examples/add3-double.cc
@@ -44,6 +44,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -69,4 +70,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/add4-double.cc b/examples/add4-double.cc
index b2a2ed8..bce23d1 100644
--- a/examples/add4-double.cc
+++ b/examples/add4-double.cc
@@ -52,6 +52,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -79,4 +80,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/check-bounds.cc b/examples/check-bounds.cc
index 393edf3..8530f10 100644
--- a/examples/check-bounds.cc
+++ b/examples/check-bounds.cc
@@ -58,6 +58,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 void run_function(Simulator *simulator, Instruction * function,
                   uint64_t value, uint64_t low, uint64_t high) {
   simulator->set_xreg(0, value);
@@ -93,4 +94,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/custom-disassembler.cc b/examples/custom-disassembler.cc
new file mode 100644
index 0000000..ce381af
--- /dev/null
+++ b/examples/custom-disassembler.cc
@@ -0,0 +1,162 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "examples.h"
+#include "custom-disassembler.h"
+
+
+#define BUF_SIZE (4096)
+#define __ masm->
+
+
+void CustomDisassembler::AppendRegisterNameToOutput(
+    const Instruction* instr,
+    const CPURegister& reg) {
+  USE(instr);
+  if (reg.IsRegister()) {
+    switch (reg.code()) {
+      case 16:
+        AppendToOutput(reg.Is64Bits() ? "ip0" : "wip0");
+        return;
+      case 17:
+        AppendToOutput(reg.Is64Bits() ? "ip1" : "wip1");
+        return;
+      case 30:
+        AppendToOutput(reg.Is64Bits() ? "lr" : "w30");
+        return;
+      case kSPRegInternalCode:
+        AppendToOutput(reg.Is64Bits() ? "x_stack_pointer" : "w_stack_pointer");
+        return;
+      case 31:
+        AppendToOutput(reg.Is64Bits() ? "x_zero_reg" : "w_zero-reg");
+        return;
+      default:
+        // Fall through.
+        break;
+    }
+  }
+  // Print other register names as usual.
+  Disassembler::AppendRegisterNameToOutput(instr, reg);
+}
+
+
+static const char* FakeLookupAddressDescription(const void* address) {
+  USE(address);
+  // We fake looking up the address in a table. We behave as if the first and
+  // third address we are asked about were function entries.
+  static int i = 0;
+  const char* desc = NULL;
+  if (i == 0) {
+    desc = "function: foo";
+  } else if (i == 2) {
+    desc = "function: bar";
+  }
+  i++;
+  return desc;
+}
+
+
+void CustomDisassembler::AppendCodeAddressToOutput(
+    const Instruction* instr, const void* addr) {
+  USE(instr);
+  const char* address_desc = FakeLookupAddressDescription(addr);
+  // Print the raw address and - if available - its description.
+  AppendToOutput("(addr %p", addr);
+  if (address_desc != NULL) {
+    Disassembler::AppendToOutput(" ; %s", address_desc);
+  }
+  AppendToOutput(")");
+}
+
+
+void CustomDisassembler::VisitAddSubShifted(const Instruction* instr) {
+  vixl::Disassembler::VisitAddSubShifted(instr);
+  if (instr->Rd() == vixl::x10.code()) {
+    AppendToOutput(" // add/sub to x10");
+  }
+  ProcessOutput(instr);
+}
+
+
+void GenerateCustomDisassemblerTestCode(MacroAssembler* masm) {
+  // Generate some code to illustrate how the modified disassembler changes the
+  // disassembly output.
+  Label begin, end;
+  __ Bind(&begin);
+  __ Add(x10, x16, x17);
+  __ Cbz(x10, &end);
+  __ Add(x11, ip0, ip1);
+  __ Add(w5, w6, w30);
+  __ Tbz(x10, 2, &begin);
+  __ Tbnz(x10, 3, &begin);
+  __ Br(x30);
+  __ Br(lr);
+  __ Fadd(d30, d16, d17);
+  __ Push(xzr, xzr);
+  __ Pop(x16, x20);
+  __ Bind(&end);
+}
+
+
+void TestCustomDisassembler() {
+  // Create and initialize the assembler.
+  byte assm_buf[BUF_SIZE];
+  MacroAssembler masm(assm_buf, BUF_SIZE);
+
+  // Generate the code.
+  Label code_start, code_end;
+  masm.Bind(&code_start);
+  GenerateCustomDisassemblerTestCode(&masm);
+  masm.Bind(&code_end);
+  masm.FinalizeCode();
+  Instruction* instr_start = masm.GetLabelAddress<Instruction*>(&code_start);
+  Instruction* instr_end = masm.GetLabelAddress<Instruction*>(&code_end);
+
+  // Instantiate a standard disassembler, our custom disassembler, and register
+  // them with a decoder.
+  Decoder decoder;
+  Disassembler disasm;
+  CustomDisassembler custom_disasm;
+  decoder.AppendVisitor(&disasm);
+  decoder.AppendVisitor(&custom_disasm);
+
+  // Iterate through the instructions to show the difference in the disassembly.
+  Instruction* instr;
+  for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
+    decoder.Decode(instr);
+    printf("\n");
+    printf("VIXL disasm:   %s\n", disasm.GetOutput());
+    printf("custom disasm: %s\n", custom_disasm.GetOutput());
+  }
+}
+
+
+#ifndef TEST_EXAMPLES
+int main() {
+  TestCustomDisassembler();
+  return 0;
+}
+#endif
diff --git a/examples/custom-disassembler.h b/examples/custom-disassembler.h
new file mode 100644
index 0000000..12d1a7f
--- /dev/null
+++ b/examples/custom-disassembler.h
@@ -0,0 +1,54 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_EXAMPLES_CUSTOM_DISASSEMBLER_H_
+#define VIXL_EXAMPLES_CUSTOM_DISASSEMBLER_H_
+
+#include "a64/disasm-a64.h"
+
+using namespace vixl;
+
+void TestCustomDisassembler();
+
+class CustomDisassembler: public Disassembler {
+ public:
+  CustomDisassembler() : Disassembler() { }
+  virtual ~CustomDisassembler() { }
+
+  virtual void VisitAddSubShifted(const Instruction* instr);
+
+ protected:
+  // We print custom register names.
+  virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                          const CPURegister& reg);
+
+  // We fake looking up addresses in a table and printing useful names.
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+};
+
+
+#endif
diff --git a/examples/debugger.cc b/examples/debugger.cc
index 64db258..4c69522 100644
--- a/examples/debugger.cc
+++ b/examples/debugger.cc
@@ -26,6 +26,7 @@
 
 #include "examples.h"
 
+
 // This is an interactive example, not to be used for testing.
 #ifndef TEST_EXAMPLES
 
@@ -48,6 +49,7 @@
 }
 
 
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the debugger.
   byte assm_buf[BUF_SIZE];
@@ -67,4 +69,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/factorial-rec.cc b/examples/factorial-rec.cc
index 6cd1103..6a11518 100644
--- a/examples/factorial-rec.cc
+++ b/examples/factorial-rec.cc
@@ -55,6 +55,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -76,4 +77,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/factorial.cc b/examples/factorial.cc
index 9a077f4..b71c2d0 100644
--- a/examples/factorial.cc
+++ b/examples/factorial.cc
@@ -53,6 +53,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -74,4 +75,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/getting-started.cc b/examples/getting-started.cc
index a04945c..d62aea6 100644
--- a/examples/getting-started.cc
+++ b/examples/getting-started.cc
@@ -41,6 +41,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main() {
   byte assm_buf[BUF_SIZE];
   MacroAssembler masm(assm_buf, BUF_SIZE);
@@ -58,4 +59,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/non-const-visitor.cc b/examples/non-const-visitor.cc
new file mode 100644
index 0000000..9e07cc1
--- /dev/null
+++ b/examples/non-const-visitor.cc
@@ -0,0 +1,129 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "examples.h"
+#include "non-const-visitor.h"
+
+#define BUF_SIZE (4096)
+#define __ masm->
+
+
+void GenerateNonConstVisitorTestCode(MacroAssembler* masm) {
+  // int64_t foo(int64_t a, int64_t b)
+  //  Argument locations:
+  //    a -> x0
+  //    b -> x1
+  __ Sub(x0, x0, x1);
+  // The return value is in x0.
+  __ Ret();
+}
+
+
+int64_t RunNonConstVisitorTestGeneratedCode(const Instruction* start_instr) {
+#ifdef USE_SIMULATOR
+  Decoder simulator_decoder;
+  Simulator simulator(&simulator_decoder);
+
+  int64_t a = 5;
+  int64_t b = 2;
+  simulator.set_xreg(0, a);
+  simulator.set_xreg(1, b);
+  simulator.RunFrom(start_instr);
+  int64_t res = simulator.xreg(0);
+  printf("foo(%ld, %ld) = %ld\n", a, b, res);
+
+  return res;
+#else
+  // Without the simulator there is nothing to test.
+  USE(start_instr);
+  return 0;
+#endif
+}
+
+
+#ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
+int main(void) {
+  // Create and initialize the assembler.
+  byte assm_buf[BUF_SIZE];
+  MacroAssembler masm(assm_buf, BUF_SIZE);
+
+  // Generate the code.
+  Label code_start, code_end;
+  masm.Bind(&code_start);
+  GenerateNonConstVisitorTestCode(&masm);
+  masm.Bind(&code_end);
+  masm.FinalizeCode();
+  Instruction* instr_start = masm.GetLabelAddress<Instruction*>(&code_start);
+  Instruction* instr_end = masm.GetLabelAddress<Instruction*>(&code_end);
+
+  // Run the code a first time.
+  RunNonConstVisitorTestGeneratedCode(instr_start);
+
+  // Instantiate a decoder, disassembler, and our custom modifying visitor.
+  Decoder decoder;
+  PrintDisassembler disasm(stdout);
+  SwitchAddSubRegisterSources modifying_visitor;
+
+  // Register visitors in such a way that when visiting instructions, the
+  // decoder will first disassemble the original instruction, modify it, and
+  // then disassemble the modified instruction.
+  decoder.AppendVisitor(&disasm);
+  decoder.AppendVisitor(&modifying_visitor);
+  decoder.AppendVisitor(&disasm);
+
+  // Iterate through the instructions.
+  Instruction* instr;
+  for (instr = instr_start; instr < instr_end; instr += kInstructionSize) {
+    printf("---\n");
+    decoder.Decode(instr);
+  }
+
+  // Run the modified code and observe the different output from before.
+  RunNonConstVisitorTestGeneratedCode(instr_start);
+
+  return 0;
+}
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
+
+
+// This is only used by the testing code.
+void ModifyNonConstVisitorTestGeneratedCode(Instruction* start,
+                                            Instruction* end) {
+  Decoder decoder;
+  SwitchAddSubRegisterSources modifying_visitor;
+  decoder.AppendVisitor(&modifying_visitor);
+
+  Instruction* instr;
+  for (instr = start; instr < end; instr += kInstructionSize) {
+    printf("---\n");
+    decoder.Decode(instr);
+  }
+}
diff --git a/examples/non-const-visitor.h b/examples/non-const-visitor.h
new file mode 100644
index 0000000..ca396d8
--- /dev/null
+++ b/examples/non-const-visitor.h
@@ -0,0 +1,120 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_EXAMPLES_NON_CONST_VISITOR_H_
+#define VIXL_EXAMPLES_NON_CONST_VISITOR_H_
+
+using namespace vixl;
+
+class SwitchAddSubRegisterSources : public DecoderVisitor {
+ public:
+  SwitchAddSubRegisterSources()
+      : DecoderVisitor(DecoderVisitor::kNonConstVisitor) {}
+
+  // Our visitor switches the register sources for some add and sub instructions
+  // (not all add and sub instructions). Visitors are listed by the macro
+  // `VISITOR_LIST` in a64/decoder-a64.h.
+  virtual void VisitAddSubShifted(const Instruction* instr) {
+    int rn = instr->Rn();
+    int rm = instr->Rm();
+    // Only non-const visitors are allowed to discard constness of the visited
+    // instruction.
+    Instruction* mutable_instr = MutableInstruction(instr);
+    Instr instr_bits = mutable_instr->InstructionBits();
+
+    // Switch the bitfields for the `rn` and `rm` registers.
+    instr_bits &= ~(Rn_mask | Rm_mask);
+    instr_bits |= (rn << Rm_offset) | (rm << Rn_offset);
+
+    // Rewrite the instruction.
+    mutable_instr->SetInstructionBits(instr_bits);
+  }
+
+  // Define the remaining visitors to do nothing.
+#define UNUSED_VISITOR_LIST(V)      \
+  V(PCRelAddressing)                \
+  V(AddSubImmediate)                \
+  V(LogicalImmediate)               \
+  V(MoveWideImmediate)              \
+  V(Bitfield)                       \
+  V(Extract)                        \
+  V(UnconditionalBranch)            \
+  V(UnconditionalBranchToRegister)  \
+  V(CompareBranch)                  \
+  V(TestBranch)                     \
+  V(ConditionalBranch)              \
+  V(System)                         \
+  V(Exception)                      \
+  V(LoadStorePairPostIndex)         \
+  V(LoadStorePairOffset)            \
+  V(LoadStorePairPreIndex)          \
+  V(LoadStorePairNonTemporal)       \
+  V(LoadLiteral)                    \
+  V(LoadStoreUnscaledOffset)        \
+  V(LoadStorePostIndex)             \
+  V(LoadStorePreIndex)              \
+  V(LoadStoreRegisterOffset)        \
+  V(LoadStoreUnsignedOffset)        \
+  V(LoadStoreExclusive)             \
+  V(LogicalShifted)                 \
+  V(AddSubExtended)                 \
+  V(AddSubWithCarry)                \
+  V(ConditionalCompareRegister)     \
+  V(ConditionalCompareImmediate)    \
+  V(ConditionalSelect)              \
+  V(DataProcessing1Source)          \
+  V(DataProcessing2Source)          \
+  V(DataProcessing3Source)          \
+  V(FPCompare)                      \
+  V(FPConditionalCompare)           \
+  V(FPConditionalSelect)            \
+  V(FPImmediate)                    \
+  V(FPDataProcessing1Source)        \
+  V(FPDataProcessing2Source)        \
+  V(FPDataProcessing3Source)        \
+  V(FPIntegerConvert)               \
+  V(FPFixedPointConvert)            \
+  V(Unallocated)                    \
+  V(Unimplemented)
+#define DEFINE_UNUSED_VISITOR(Name)                                  \
+  virtual void Visit##Name(const Instruction* i) {                   \
+    USE(i); /* Prevents compiler warnings about unused variables. */ \
+  }
+  UNUSED_VISITOR_LIST(DEFINE_UNUSED_VISITOR)
+#undef DEFINE_UNUSED_VISITOR
+#undef UNUSED_VISITOR_LIST
+};
+
+
+void GenerateNonConstVisitorTestCode(MacroAssembler* masm);
+
+int64_t RunNonConstVisitorTestGeneratedCode(const Instruction* start_instr);
+
+void ModifyNonConstVisitorTestGeneratedCode(Instruction* start,
+                                            Instruction* end);
+
+
+#endif
diff --git a/examples/sum-array.cc b/examples/sum-array.cc
index f503136..bf4fdd0 100644
--- a/examples/sum-array.cc
+++ b/examples/sum-array.cc
@@ -59,6 +59,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -87,4 +88,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/swap-int32.cc b/examples/swap-int32.cc
index 3090b55..617ca50 100644
--- a/examples/swap-int32.cc
+++ b/examples/swap-int32.cc
@@ -61,6 +61,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -92,4 +93,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/examples/swap4.cc b/examples/swap4.cc
index dc1b0bd..04037b0 100644
--- a/examples/swap4.cc
+++ b/examples/swap4.cc
@@ -47,6 +47,7 @@
 
 
 #ifndef TEST_EXAMPLES
+#ifdef USE_SIMULATOR
 int main(void) {
   // Create and initialize the assembler and the simulator.
   byte assm_buf[BUF_SIZE];
@@ -86,4 +87,8 @@
 
   return 0;
 }
-#endif
+#else
+// Without the simulator there is nothing to test.
+int main(void) { return 0; }
+#endif  // USE_SIMULATOR
+#endif  // TEST_EXAMPLES
diff --git a/src/a64/assembler-a64.cc b/src/a64/assembler-a64.cc
index b9a8418..f169bd6 100644
--- a/src/a64/assembler-a64.cc
+++ b/src/a64/assembler-a64.cc
@@ -268,7 +268,7 @@
 
 
 // MemOperand
-MemOperand::MemOperand(Register base, ptrdiff_t offset, AddrMode addrmode)
+MemOperand::MemOperand(Register base, int64_t offset, AddrMode addrmode)
   : base_(base), regoffset_(NoReg), offset_(offset), addrmode_(addrmode) {
   VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
 }
@@ -360,50 +360,55 @@
 
 
 // Assembler
-Assembler::Assembler(byte* buffer, unsigned buffer_size,
+Assembler::Assembler(byte* buffer, size_t capacity,
                      PositionIndependentCodeOption pic)
-    : buffer_size_(buffer_size), literal_pool_monitor_(0), pic_(pic) {
-
-  buffer_ = reinterpret_cast<Instruction*>(buffer);
-  pc_ = buffer_;
-  Reset();
+    : pic_(pic) {
+#ifdef DEBUG
+  buffer_monitor_ = 0;
+#endif
+  buffer_ = new CodeBuffer(buffer, capacity);
 }
 
 
+Assembler::Assembler(size_t capacity, PositionIndependentCodeOption pic)
+    : pic_(pic) {
+#ifdef DEBUG
+  buffer_monitor_ = 0;
+#endif
+  buffer_ = new CodeBuffer(capacity);
+}
+
 Assembler::~Assembler() {
-  VIXL_ASSERT(finalized_ || (pc_ == buffer_));
-  VIXL_ASSERT(literals_.empty());
+  VIXL_ASSERT(buffer_monitor_ == 0);
+  delete buffer_;
 }
 
 
 void Assembler::Reset() {
-#ifdef DEBUG
-  VIXL_ASSERT((pc_ >= buffer_) && (pc_ < buffer_ + buffer_size_));
-  VIXL_ASSERT(literal_pool_monitor_ == 0);
-  memset(buffer_, 0, pc_ - buffer_);
-  finalized_ = false;
-#endif
-  pc_ = buffer_;
-  literals_.clear();
-  next_literal_pool_check_ = pc_ + kLiteralPoolCheckInterval;
+  buffer_->Reset();
 }
 
 
 void Assembler::FinalizeCode() {
-  EmitLiteralPool();
-#ifdef DEBUG
-  finalized_ = true;
-#endif
+  buffer_->SetClean();
 }
 
 
 void Assembler::bind(Label* label) {
-  label->Bind(pc_ - buffer_);
-  VIXL_ASSERT(GetLabelAddress<Instruction*>(label) == pc_);
+  BindToOffset(label, buffer_->CursorOffset());
+}
+
+
+void Assembler::BindToOffset(Label* label, ptrdiff_t offset) {
+  VIXL_ASSERT((offset >= 0) && (offset <= buffer_->CursorOffset()));
+  VIXL_ASSERT(offset % kInstructionSize == 0);
+
+  label->Bind(offset);
 
   while (label->IsLinked()) {
-    Instruction * link = buffer_ + label->GetAndRemoveNextLink();
-    link->SetImmPCOffsetTarget(buffer_ + label->location());
+    Instruction* link =
+        GetOffsetAddress<Instruction*>(label->GetAndRemoveNextLink());
+    link->SetImmPCOffsetTarget(GetLabelAddress<Instruction*>(label));
   }
 }
 
@@ -411,33 +416,83 @@
 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
 //
 // The offset is calculated by aligning the PC and label addresses down to a
-// multiple of element_size, then calculating the (scaled) offset between them.
-// This matches the semantics of adrp, for example.
-template <int element_size>
+// multiple of 1 << element_shift, then calculating the (scaled) offset between
+// them. This matches the semantics of adrp, for example.
+template <int element_shift>
 ptrdiff_t Assembler::LinkAndGetOffsetTo(Label* label) {
+  VIXL_STATIC_ASSERT(element_shift < (sizeof(ptrdiff_t) * 8));
+
   if (label->IsBound()) {
-    uintptr_t pc_offset = reinterpret_cast<uintptr_t>(pc_) / element_size;
-    uintptr_t label_offset = GetLabelAddress<uintptr_t>(label) / element_size;
+    uintptr_t pc_offset = GetCursorAddress<uintptr_t>() >> element_shift;
+    uintptr_t label_offset =
+        GetLabelAddress<uintptr_t>(label) >> element_shift;
     return label_offset - pc_offset;
   } else {
-    label->AddLink(pc_ - buffer_);
+    label->AddLink(buffer_->CursorOffset());
     return 0;
   }
 }
 
 
 ptrdiff_t Assembler::LinkAndGetByteOffsetTo(Label* label) {
-  return LinkAndGetOffsetTo<1>(label);
+  return LinkAndGetOffsetTo<0>(label);
 }
 
 
 ptrdiff_t Assembler::LinkAndGetInstructionOffsetTo(Label* label) {
-  return LinkAndGetOffsetTo<kInstructionSize>(label);
+  return LinkAndGetOffsetTo<kInstructionSizeLog2>(label);
 }
 
 
 ptrdiff_t Assembler::LinkAndGetPageOffsetTo(Label* label) {
-  return LinkAndGetOffsetTo<kPageSize>(label);
+  return LinkAndGetOffsetTo<kPageSizeLog2>(label);
+}
+
+
+void Assembler::place(RawLiteral* literal) {
+  VIXL_ASSERT(!literal->IsPlaced());
+
+  // Patch instructions using this literal.
+  if (literal->IsUsed()) {
+    Instruction* target = GetCursorAddress<Instruction*>();
+    ptrdiff_t offset = literal->last_use();
+
+    while (offset != 0) {
+      Instruction* ldr = GetOffsetAddress<Instruction*>(offset);
+      offset = ldr->ImmLLiteral();
+      ldr->SetImmLLiteral(target);
+    }
+  }
+
+  // "bind" the literal.
+  literal->set_offset(CursorOffset());
+  // Copy the data into the pool.
+  if (literal->size() == kXRegSizeInBytes) {
+    dc64(literal->raw_value64());
+  } else {
+    VIXL_ASSERT(literal->size() == kWRegSizeInBytes);
+    dc32(literal->raw_value32());
+  }
+}
+
+
+ptrdiff_t Assembler::LinkAndGetWordOffsetTo(RawLiteral* literal) {
+  VIXL_ASSERT(IsWordAligned(CursorOffset()));
+
+  if (literal->IsPlaced()) {
+    // The literal is "behind", the offset will be negative.
+    VIXL_ASSERT((literal->offset() - CursorOffset()) <= 0);
+    return (literal->offset() - CursorOffset()) >> kLiteralEntrySizeLog2;
+  }
+
+  ptrdiff_t offset = 0;
+  // Link all uses together.
+  if (literal->IsUsed()) {
+    offset = (literal->last_use() - CursorOffset()) >> kLiteralEntrySizeLog2;
+  }
+  literal->set_last_use(CursorOffset());
+
+  return offset;
 }
 
 
@@ -1274,20 +1329,27 @@
 }
 
 
-void Assembler::ldr(const Register& rt, uint64_t imm) {
-  LoadLiteral(rt, imm, rt.Is64Bits() ? LDR_x_lit : LDR_w_lit);
+void Assembler::ldrsw(const Register& rt, RawLiteral* literal) {
+  VIXL_ASSERT(rt.Is64Bits());
+  VIXL_ASSERT(literal->size() == kWRegSizeInBytes);
+  ldrsw(rt, LinkAndGetWordOffsetTo(literal));
 }
 
 
-void Assembler::ldr(const FPRegister& ft, double imm) {
-  VIXL_ASSERT(ft.Is64Bits());
-  LoadLiteral(ft, double_to_rawbits(imm), LDR_d_lit);
+void Assembler::ldr(const CPURegister& rt, RawLiteral* literal) {
+  VIXL_ASSERT(literal->size() == static_cast<size_t>(rt.SizeInBytes()));
+  ldr(rt, LinkAndGetWordOffsetTo(literal));
 }
 
 
-void Assembler::ldr(const FPRegister& ft, float imm) {
-  VIXL_ASSERT(ft.Is32Bits());
-  LoadLiteral(ft, float_to_rawbits(imm), LDR_s_lit);
+void Assembler::ldrsw(const Register& rt, int imm19) {
+  Emit(LDRSW_x_lit | ImmLLiteral(imm19) | Rt(rt));
+}
+
+
+void Assembler::ldr(const CPURegister& rt, int imm19) {
+  LoadLiteralOp op = LoadLiteralOpFor(rt);
+  Emit(op | ImmLLiteral(imm19) | Rt(rt));
 }
 
 
@@ -1961,10 +2023,11 @@
   Emit(BRK | ImmException(code));
 }
 
-
+// TODO(all): The third parameter should be passed by reference but gcc 4.8.2
+// reports a bogus uninitialised warning then.
 void Assembler::Logical(const Register& rd,
                         const Register& rn,
-                        const Operand& operand,
+                        const Operand operand,
                         LogicalOp op) {
   VIXL_ASSERT(rd.size() == rn.size());
   if (operand.IsImmediate()) {
@@ -2161,7 +2224,7 @@
                           LoadStoreOp op,
                           LoadStoreScalingOption option) {
   Instr memop = op | Rt(rt) | RnSP(addr.base());
-  ptrdiff_t offset = addr.offset();
+  int64_t offset = addr.offset();
   LSDataSize size = CalcLSDataSize(op);
 
   if (addr.IsImmediateOffset()) {
@@ -2226,25 +2289,20 @@
 }
 
 
-bool Assembler::IsImmLSUnscaled(ptrdiff_t offset) {
+bool Assembler::IsImmLSUnscaled(int64_t offset) {
   return is_int9(offset);
 }
 
 
-bool Assembler::IsImmLSScaled(ptrdiff_t offset, LSDataSize size) {
+bool Assembler::IsImmLSScaled(int64_t offset, LSDataSize size) {
   bool offset_is_size_multiple = (((offset >> size) << size) == offset);
   return offset_is_size_multiple && is_uint12(offset >> size);
 }
 
 
-void Assembler::LoadLiteral(const CPURegister& rt,
-                            uint64_t imm,
-                            LoadLiteralOp op) {
-  VIXL_ASSERT(is_int32(imm) || is_uint32(imm) || (rt.Is64Bits()));
-
-  BlockLiteralPoolScope scope(this);
-  RecordLiteral(imm, rt.SizeInBytes());
-  Emit(op | ImmLLiteral(0) | Rt(rt));
+bool Assembler::IsImmLSPair(int64_t offset, LSDataSize size) {
+  bool offset_is_size_multiple = (((offset >> size) << size) == offset);
+  return offset_is_size_multiple && is_int7(offset >> size);
 }
 
 
@@ -2582,112 +2640,13 @@
 }
 
 
-void Assembler::RecordLiteral(int64_t imm, unsigned size) {
-  literals_.push_front(new Literal(pc_, imm, size));
-}
-
-
-// Check if a literal pool should be emitted. Currently a literal is emitted
-// when:
-//  * the distance to the first literal load handled by this pool is greater
-//    than the recommended distance and the literal pool can be emitted without
-//    generating a jump over it.
-//  * the distance to the first literal load handled by this pool is greater
-//    than twice the recommended distance.
-// TODO: refine this heuristic using real world data.
-void Assembler::CheckLiteralPool(LiteralPoolEmitOption option) {
-  if (IsLiteralPoolBlocked()) {
-    // Literal pool emission is forbidden, no point in doing further checks.
-    return;
+LoadLiteralOp Assembler::LoadLiteralOpFor(const CPURegister& rt) {
+  if (rt.IsRegister()) {
+    return rt.Is64Bits() ? LDR_x_lit : LDR_w_lit;
+  } else {
+    VIXL_ASSERT(rt.IsFPRegister());
+    return rt.Is64Bits() ? LDR_d_lit : LDR_s_lit;
   }
-
-  if (literals_.empty()) {
-    // No literal pool to emit.
-    next_literal_pool_check_ += kLiteralPoolCheckInterval;
-    return;
-  }
-
-  intptr_t distance = pc_ - literals_.back()->pc_;
-  if ((distance < kRecommendedLiteralPoolRange) ||
-      ((option == JumpRequired) &&
-       (distance < (2 * kRecommendedLiteralPoolRange)))) {
-    // We prefer not to have to jump over the literal pool.
-    next_literal_pool_check_ += kLiteralPoolCheckInterval;
-    return;
-  }
-
-  EmitLiteralPool(option);
-}
-
-
-void Assembler::EmitLiteralPool(LiteralPoolEmitOption option) {
-  // Exit early if there are no literals to emit.
-  if (literals_.empty()) return;
-
-  // Prevent recursive calls while emitting the literal pool.
-  BlockLiteralPoolScope scope(this);
-
-  Label marker;
-  Label start_of_pool;
-  Label end_of_pool;
-
-  if (option == JumpRequired) {
-    b(&end_of_pool);
-  }
-
-  // Leave space for a literal pool marker. This is populated later, once the
-  // size of the pool is known.
-  bind(&marker);
-  nop();
-
-  // Now populate the literal pool.
-  bind(&start_of_pool);
-  std::list<Literal*>::iterator it;
-  for (it = literals_.begin(); it != literals_.end(); it++) {
-    // Update the load-literal instruction to point to this pool entry.
-    Instruction* load_literal = (*it)->pc_;
-    load_literal->SetImmLLiteral(pc_);
-    // Copy the data into the pool.
-    uint64_t value= (*it)->value_;
-    unsigned size = (*it)->size_;
-    VIXL_ASSERT((size == kXRegSizeInBytes) || (size == kWRegSizeInBytes));
-    VIXL_ASSERT((pc_ + size) <= (buffer_ + buffer_size_));
-    memcpy(pc_, &value, size);
-    pc_ += size;
-    delete *it;
-  }
-  literals_.clear();
-  bind(&end_of_pool);
-
-  // The pool size should always be a multiple of four bytes because that is the
-  // scaling applied by the LDR(literal) instruction, even for X-register loads.
-  VIXL_ASSERT((SizeOfCodeGeneratedSince(&start_of_pool) % 4) == 0);
-  uint64_t pool_size = SizeOfCodeGeneratedSince(&start_of_pool) / 4;
-
-  // Literal pool marker indicating the size in words of the literal pool.
-  // We use a literal load to the zero register, the offset indicating the
-  // size in words. This instruction can encode a large enough offset to span
-  // the entire pool at its maximum size.
-  Instr marker_instruction = LDR_x_lit | ImmLLiteral(pool_size) | Rt(xzr);
-  memcpy(GetLabelAddress<void*>(&marker),
-         &marker_instruction,
-         kInstructionSize);
-
-  next_literal_pool_check_ = pc_ + kLiteralPoolCheckInterval;
-}
-
-
-// Return the size in bytes, required by the literal pool entries. This does
-// not include any marker or branch over the literal pool itself.
-size_t Assembler::LiteralPoolSize() {
-  size_t size = 0;
-
-  std::list<Literal*>::iterator it;
-  for (it = literals_.begin(); it != literals_.end(); it++) {
-    size += (*it)->size_;
-  }
-
-  return size;
 }
 
 
diff --git a/src/a64/assembler-a64.h b/src/a64/assembler-a64.h
index b01fb20..2cf2d54 100644
--- a/src/a64/assembler-a64.h
+++ b/src/a64/assembler-a64.h
@@ -32,6 +32,7 @@
 
 #include "globals-vixl.h"
 #include "utils-vixl.h"
+#include "code-buffer.h"
 #include "a64/instructions-a64.h"
 
 namespace vixl {
@@ -168,6 +169,11 @@
     return type_ == kFPRegister;
   }
 
+  bool IsW() const { return IsValidRegister() && Is32Bits(); }
+  bool IsX() const { return IsValidRegister() && Is64Bits(); }
+  bool IsS() const { return IsValidFPRegister() && Is32Bits(); }
+  bool IsD() const { return IsValidFPRegister() && Is64Bits(); }
+
   const Register& W() const;
   const Register& X() const;
   const FPRegister& S() const;
@@ -191,12 +197,12 @@
 
 class Register : public CPURegister {
  public:
-  explicit Register() : CPURegister() {}
+  Register() : CPURegister() {}
   inline explicit Register(const CPURegister& other)
       : CPURegister(other.code(), other.size(), other.type()) {
     VIXL_ASSERT(IsValidRegister());
   }
-  explicit Register(unsigned code, unsigned size)
+  Register(unsigned code, unsigned size)
       : CPURegister(code, size, kRegister) {}
 
   bool IsValid() const {
@@ -536,7 +542,7 @@
 class MemOperand {
  public:
   explicit MemOperand(Register base,
-                      ptrdiff_t offset = 0,
+                      int64_t offset = 0,
                       AddrMode addrmode = Offset);
   explicit MemOperand(Register base,
                       Register regoffset,
@@ -552,7 +558,7 @@
 
   const Register& base() const { return base_; }
   const Register& regoffset() const { return regoffset_; }
-  ptrdiff_t offset() const { return offset_; }
+  int64_t offset() const { return offset_; }
   AddrMode addrmode() const { return addrmode_; }
   Shift shift() const { return shift_; }
   Extend extend() const { return extend_; }
@@ -565,7 +571,7 @@
  private:
   Register base_;
   Register regoffset_;
-  ptrdiff_t offset_;
+  int64_t offset_;
   AddrMode addrmode_;
   Shift shift_;
   Extend extend_;
@@ -680,32 +686,80 @@
 };
 
 
-// TODO: Obtain better values for these, based on real-world data.
-const int kLiteralPoolCheckInterval = 4 * KBytes;
-const int kRecommendedLiteralPoolRange = 2 * kLiteralPoolCheckInterval;
+// A literal is a 32-bit or 64-bit piece of data stored in the instruction
+// stream and loaded through a pc relative load. The same literal can be
+// referred to by multiple instructions but a literal can only reside at one
+// place in memory. A literal can be used by a load before or after being
+// placed in memory.
+//
+// Internally an offset of 0 is associated with a literal which has been
+// neither used nor placed. Then two possibilities arise:
+//  1) the label is placed, the offset (stored as offset + 1) is used to
+//     resolve any subsequent load using the label.
+//  2) the label is not placed and offset is the offset of the last load using
+//     the literal (stored as -offset -1). If multiple loads refer to this
+//     literal then the last load holds the offset of the preceding load and
+//     all loads form a chain. Once the offset is placed all the loads in the
+//     chain are resolved and future loads fall back to possibility 1.
+class RawLiteral {
+ public:
+  RawLiteral() : size_(0), offset_(0), raw_value_(0) {}
 
+  size_t size() {
+    VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
+    VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
+    VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes));
+    return size_;
+  }
+  uint64_t raw_value64() {
+    VIXL_ASSERT(size_ == kXRegSizeInBytes);
+    return raw_value_;
+  }
+  uint32_t raw_value32() {
+    VIXL_ASSERT(size_ == kWRegSizeInBytes);
+    VIXL_ASSERT(is_uint32(raw_value_) || is_int32(raw_value_));
+    return static_cast<uint32_t>(raw_value_);
+  }
+  bool IsUsed() { return offset_ < 0; }
+  bool IsPlaced() { return offset_ > 0; }
 
-// Control whether a branch over the literal pool should also be emitted. This
-// is needed if the literal pool has to be emitted in the middle of the JITted
-// code.
-enum LiteralPoolEmitOption {
-  JumpRequired,
-  NoJumpRequired
+ protected:
+  ptrdiff_t offset() {
+    VIXL_ASSERT(IsPlaced());
+    return offset_ - 1;
+  }
+  void set_offset(ptrdiff_t offset) {
+    VIXL_ASSERT(offset >= 0);
+    VIXL_ASSERT(IsWordAligned(offset));
+    VIXL_ASSERT(!IsPlaced());
+    offset_ = offset + 1;
+  }
+  ptrdiff_t last_use() {
+    VIXL_ASSERT(IsUsed());
+    return -offset_ - 1;
+  }
+  void set_last_use(ptrdiff_t offset) {
+    VIXL_ASSERT(offset >= 0);
+    VIXL_ASSERT(IsWordAligned(offset));
+    VIXL_ASSERT(!IsPlaced());
+    offset_ = -offset - 1;
+  }
+
+  size_t size_;
+  ptrdiff_t offset_;
+  uint64_t raw_value_;
+
+  friend class Assembler;
 };
 
 
-// Literal pool entry.
-class Literal {
+template <typename T>
+class Literal : public RawLiteral {
  public:
-  Literal(Instruction* pc, uint64_t imm, unsigned size)
-      : pc_(pc), value_(imm), size_(size) {}
-
- private:
-  Instruction* pc_;
-  int64_t value_;
-  unsigned size_;
-
-  friend class Assembler;
+  explicit Literal(T value) {
+    size_ = sizeof(value);
+    memcpy(&raw_value_, &value, sizeof(value));
+  }
 };
 
 
@@ -750,7 +804,9 @@
 // Assembler.
 class Assembler {
  public:
-  Assembler(byte* buffer, unsigned buffer_size,
+  Assembler(size_t capacity,
+            PositionIndependentCodeOption pic = PositionIndependentCode);
+  Assembler(byte* buffer, size_t capacity,
             PositionIndependentCodeOption pic = PositionIndependentCode);
 
   // The destructor asserts that one of the following is true:
@@ -763,9 +819,6 @@
 
   // Start generating code from the beginning of the buffer, discarding any code
   // and data that has already been emitted into the buffer.
-  //
-  // In order to avoid any accidental transfer of state, Reset ASSERTs that the
-  // constant pool is not blocked.
   void Reset();
 
   // Finalize a code buffer of generated instructions. This function must be
@@ -776,13 +829,47 @@
   // Bind a label to the current PC.
   void bind(Label* label);
 
+  // Bind a label to a specified offset from the start of the buffer.
+  void BindToOffset(Label* label, ptrdiff_t offset);
+
+  // Place a literal at the current PC.
+  void place(RawLiteral* literal);
+
+  ptrdiff_t CursorOffset() const {
+    return buffer_->CursorOffset();
+  }
+
+  ptrdiff_t BufferEndOffset() const {
+    return static_cast<ptrdiff_t>(buffer_->capacity());
+  }
+
+  // Return the address of an offset in the buffer.
+  template <typename T>
+  inline T GetOffsetAddress(ptrdiff_t offset) {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return buffer_->GetOffsetAddress<T>(offset);
+  }
+
   // Return the address of a bound label.
   template <typename T>
   inline T GetLabelAddress(const Label * label) {
     VIXL_ASSERT(label->IsBound());
     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
-    VIXL_STATIC_ASSERT(sizeof(*buffer_) == 1);
-    return reinterpret_cast<T>(buffer_ + label->location());
+    return GetOffsetAddress<T>(label->location());
+  }
+
+  // Return the address of the cursor.
+  template <typename T>
+  inline T GetCursorAddress() {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(CursorOffset());
+  }
+
+  // Return the address of the start of the buffer.
+  template <typename T>
+  inline T GetStartAddress() {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(0);
   }
 
   // Instruction set functions.
@@ -1324,14 +1411,17 @@
   void stnp(const CPURegister& rt, const CPURegister& rt2,
             const MemOperand& dst);
 
-  // Load literal to register.
-  void ldr(const Register& rt, uint64_t imm);
+  // Load integer or FP register from literal pool.
+  void ldr(const CPURegister& rt, RawLiteral* literal);
 
-  // Load double precision floating point literal to FP register.
-  void ldr(const FPRegister& ft, double imm);
+  // Load word with sign extension from literal pool.
+  void ldrsw(const Register& rt, RawLiteral* literal);
 
-  // Load single precision floating point literal to FP register.
-  void ldr(const FPRegister& ft, float imm);
+  // Load integer or FP register from pc + imm19 << 2.
+  void ldr(const CPURegister& rt, int imm19);
+
+  // Load word with sign extension from pc + imm19 << 2.
+  void ldrsw(const Register& rt, int imm19);
 
   // Store exclusive byte.
   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
@@ -1618,25 +1708,26 @@
   inline void dci(Instr raw_inst) { Emit(raw_inst); }
 
   // Emit 32 bits of data into the instruction stream.
-  inline void dc32(uint32_t data) { EmitData(&data, sizeof(data)); }
+  inline void dc32(uint32_t data) {
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit32(data);
+  }
 
   // Emit 64 bits of data into the instruction stream.
-  inline void dc64(uint64_t data) { EmitData(&data, sizeof(data)); }
+  inline void dc64(uint64_t data) {
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit64(data);
+  }
 
   // Copy a string into the instruction stream, including the terminating NULL
-  // character. The instruction pointer (pc_) is then aligned correctly for
+  // character. The instruction pointer is then aligned correctly for
   // subsequent instructions.
-  void EmitStringData(const char * string) {
+  void EmitString(const char * string) {
     VIXL_ASSERT(string != NULL);
+    VIXL_ASSERT(buffer_monitor_ > 0);
 
-    size_t len = strlen(string) + 1;
-    EmitData(string, len);
-
-    // Pad with NULL characters until pc_ is aligned.
-    const char pad[] = {'\0', '\0', '\0', '\0'};
-    VIXL_STATIC_ASSERT(sizeof(pad) == kInstructionSize);
-    Instruction* next_pc = AlignUp(pc_, kInstructionSize);
-    EmitData(&pad, next_pc - pc_);
+    buffer_->EmitString(string);
+    buffer_->Align();
   }
 
   // Code generation helpers.
@@ -1912,43 +2003,39 @@
     return scale << FPScale_offset;
   }
 
-  // Size of the code generated in bytes
-  size_t SizeOfCodeGenerated() const {
-    VIXL_ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
-    return pc_ - buffer_;
-  }
-
   // Size of the code generated since label to the current position.
   size_t SizeOfCodeGeneratedSince(Label* label) const {
-    size_t pc_offset = SizeOfCodeGenerated();
-
     VIXL_ASSERT(label->IsBound());
-    VIXL_ASSERT(pc_offset >= static_cast<size_t>(label->location()));
-    VIXL_ASSERT(pc_offset < buffer_size_);
-
-    return pc_offset - label->location();
+    return buffer_->OffsetFrom(label->location());
   }
 
+  size_t BufferCapacity() const { return buffer_->capacity(); }
 
-  inline void BlockLiteralPool() {
-    literal_pool_monitor_++;
-  }
+  size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
 
-  inline void ReleaseLiteralPool() {
-    if (--literal_pool_monitor_ == 0) {
-      // Has the literal pool been blocked for too long?
-      VIXL_ASSERT(literals_.empty() ||
-             (pc_ < (literals_.back()->pc_ + kMaxLoadLiteralRange)));
+  void EnsureSpaceFor(size_t amount) {
+    if (buffer_->RemainingBytes() < amount) {
+      size_t capacity = buffer_->capacity();
+      size_t size = buffer_->CursorOffset();
+      do {
+        // TODO(all): refine.
+        capacity *= 2;
+      } while ((capacity - size) <  amount);
+      buffer_->Grow(capacity);
     }
   }
 
-  inline bool IsLiteralPoolBlocked() {
-    return literal_pool_monitor_ != 0;
+#ifdef DEBUG
+  void AcquireBuffer() {
+    VIXL_ASSERT(buffer_monitor_ >= 0);
+    buffer_monitor_++;
   }
 
-  void CheckLiteralPool(LiteralPoolEmitOption option = JumpRequired);
-  void EmitLiteralPool(LiteralPoolEmitOption option = NoJumpRequired);
-  size_t LiteralPoolSize();
+  void ReleaseBuffer() {
+    buffer_monitor_--;
+    VIXL_ASSERT(buffer_monitor_ >= 0);
+  }
+#endif
 
   inline PositionIndependentCodeOption pic() {
     return pic_;
@@ -1959,22 +2046,30 @@
            (pic() == PositionDependentCode);
   }
 
- protected:
-  inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const {
+  static inline const Register& AppropriateZeroRegFor(const CPURegister& reg) {
     return reg.Is64Bits() ? xzr : wzr;
   }
 
 
+ protected:
   void LoadStore(const CPURegister& rt,
                  const MemOperand& addr,
                  LoadStoreOp op,
                  LoadStoreScalingOption option = PreferScaledOffset);
-  static bool IsImmLSUnscaled(ptrdiff_t offset);
-  static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size);
+  static bool IsImmLSUnscaled(int64_t offset);
+  static bool IsImmLSScaled(int64_t offset, LSDataSize size);
 
+  void LoadStorePair(const CPURegister& rt,
+                     const CPURegister& rt2,
+                     const MemOperand& addr,
+                     LoadStorePairOp op);
+  static bool IsImmLSPair(int64_t offset, LSDataSize size);
+
+  // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
+  // reports a bogus uninitialised warning then.
   void Logical(const Register& rd,
                const Register& rn,
-               const Operand& operand,
+               const Operand operand,
                LogicalOp op);
   void LogicalImmediate(const Register& rd,
                         const Register& rn,
@@ -2035,6 +2130,7 @@
     const CPURegister& rt, const CPURegister& rt2);
   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
     const CPURegister& rt, const CPURegister& rt2);
+  static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
 
 
  private:
@@ -2053,10 +2149,6 @@
                                 const Operand& operand,
                                 FlagsUpdate S,
                                 Instr op);
-  void LoadStorePair(const CPURegister& rt,
-                     const CPURegister& rt2,
-                     const MemOperand& addr,
-                     LoadStorePairOp op);
   void LoadStorePairNonTemporal(const CPURegister& rt,
                                 const CPURegister& rt2,
                                 const MemOperand& addr,
@@ -2088,8 +2180,6 @@
                                const FPRegister& fa,
                                FPDataProcessing3SourceOp op);
 
-  void RecordLiteral(int64_t imm, unsigned size);
-
   // Link the current (not-yet-emitted) instruction to the specified label, then
   // return an offset to be encoded in the instruction. If the label is not yet
   // bound, an offset of 0 is returned.
@@ -2098,79 +2188,102 @@
   ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
 
   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
-  template <int element_size>
+  template <int element_shift>
   ptrdiff_t LinkAndGetOffsetTo(Label* label);
 
-  // Emit the instruction at pc_.
+  // Literal load offset are in words (32-bit).
+  ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
+
+  // Emit the instruction in buffer_.
   void Emit(Instr instruction) {
-    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
-    VIXL_ASSERT((pc_ + sizeof(instruction)) <= (buffer_ + buffer_size_));
-
-#ifdef DEBUG
-    finalized_ = false;
-#endif
-
-    memcpy(pc_, &instruction, sizeof(instruction));
-    pc_ += sizeof(instruction);
-    CheckBufferSpace();
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit32(instruction);
   }
 
-  // Emit data inline in the instruction stream.
-  void EmitData(void const * data, unsigned size) {
-    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
-    VIXL_ASSERT((pc_ + size) <= (buffer_ + buffer_size_));
-
-#ifdef DEBUG
-    finalized_ = false;
-#endif
-
-    // TODO: Record this 'instruction' as data, so that it can be disassembled
-    // correctly.
-    memcpy(pc_, data, size);
-    pc_ += size;
-    CheckBufferSpace();
-  }
-
-  inline void CheckBufferSpace() {
-    VIXL_ASSERT(pc_ < (buffer_ + buffer_size_));
-    if (pc_ > next_literal_pool_check_) {
-      CheckLiteralPool();
-    }
-  }
-
-  // The buffer into which code and relocation info are generated.
-  Instruction* buffer_;
-  // Buffer size, in bytes.
-  size_t buffer_size_;
-  Instruction* pc_;
-  std::list<Literal*> literals_;
-  Instruction* next_literal_pool_check_;
-  unsigned literal_pool_monitor_;
-
+  // Buffer where the code is emitted.
+  CodeBuffer* buffer_;
   PositionIndependentCodeOption pic_;
 
-  friend class Label;
-  friend class BlockLiteralPoolScope;
-
 #ifdef DEBUG
-  bool finalized_;
+  int64_t buffer_monitor_;
 #endif
 };
 
-class BlockLiteralPoolScope {
+
+// All Assembler emits MUST acquire/release the underlying code buffer. The
+// helper scope below will do so and optionally ensure the buffer is big enough
+// to receive the emit. It is possible to request the scope not to perform any
+// checks (kNoCheck) if for example it is known in advance the buffer size is
+// adequate or there is some other size checking mechanism in place.
+class CodeBufferCheckScope {
  public:
-  explicit BlockLiteralPoolScope(Assembler* assm) : assm_(assm) {
-    assm_->BlockLiteralPool();
+  // Tell whether or not the scope needs to ensure the associated CodeBuffer
+  // has enough space for the requested size.
+  enum CheckPolicy {
+    kNoCheck,
+    kCheck
+  };
+
+  // Tell whether or not the scope should assert the amount of code emitted
+  // within the scope is consistent with the requested amount.
+  enum AssertPolicy {
+    kNoAssert,    // No assert required.
+    kExactSize,   // The code emitted must be exactly size bytes.
+    kMaximumSize  // The code emitted must be at most size bytes.
+  };
+
+  CodeBufferCheckScope(Assembler* assm,
+                       size_t size,
+                       CheckPolicy check_policy = kCheck,
+                       AssertPolicy assert_policy = kMaximumSize)
+      : assm_(assm) {
+    if (check_policy == kCheck) assm->EnsureSpaceFor(size);
+#ifdef DEBUG
+    assm->bind(&start_);
+    size_ = size;
+    assert_policy_ = assert_policy;
+    assm->AcquireBuffer();
+#else
+    USE(assert_policy);
+#endif
   }
 
-  ~BlockLiteralPoolScope() {
-    assm_->ReleaseLiteralPool();
+  // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
+  explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
+#ifdef DEBUG
+    size_ = 0;
+    assert_policy_ = kNoAssert;
+    assm->AcquireBuffer();
+#endif
   }
 
- private:
+  ~CodeBufferCheckScope() {
+#ifdef DEBUG
+    assm_->ReleaseBuffer();
+    switch (assert_policy_) {
+      case kNoAssert: break;
+      case kExactSize:
+        VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
+        break;
+      case kMaximumSize:
+        VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+    }
+#endif
+  }
+
+ protected:
   Assembler* assm_;
+#ifdef DEBUG
+  Label start_;
+  size_t size_;
+  AssertPolicy assert_policy_;
+#endif
 };
+
 }  // namespace vixl
 
 #endif  // VIXL_A64_ASSEMBLER_A64_H_
diff --git a/src/a64/debugger-a64.cc b/src/a64/debugger-a64.cc
index a90e962..9406ec6 100644
--- a/src/a64/debugger-a64.cc
+++ b/src/a64/debugger-a64.cc
@@ -548,19 +548,19 @@
 }
 
 
-void Debugger::PrintInstructions(void* address, int64_t count) {
+void Debugger::PrintInstructions(const void* address, int64_t count) {
   if (count == 0) {
     return;
   }
 
-  Instruction* from = Instruction::Cast(address);
+  const Instruction* from = Instruction::CastConst(address);
   if (count < 0) {
     count = -count;
     from -= (count - 1) * kInstructionSize;
   }
-  Instruction* to = from + count * kInstructionSize;
+  const Instruction* to = from + count * kInstructionSize;
 
-  for (Instruction* current = from;
+  for (const Instruction* current = from;
        current < to;
        current = current->NextInstruction()) {
     printer_->Decode(current);
@@ -644,7 +644,7 @@
 }
 
 
-void Debugger::VisitException(Instruction* instr) {
+void Debugger::VisitException(const Instruction* instr) {
   switch (instr->Mask(ExceptionMask)) {
     case BRK:
       DoBreakpoint(instr);
@@ -761,27 +761,27 @@
 }
 
 
-void Debugger::DoBreakpoint(Instruction* instr) {
+void Debugger::DoBreakpoint(const Instruction* instr) {
   VIXL_ASSERT(instr->Mask(ExceptionMask) == BRK);
 
-  printf("Hit breakpoint at pc=%p.\n", reinterpret_cast<void*>(instr));
+  printf("Hit breakpoint at pc=%p.\n", reinterpret_cast<const void*>(instr));
   set_debug_parameters(debug_parameters() | DBG_BREAK | DBG_ACTIVE);
   // Make the shell point to the brk instruction.
   set_pc(instr);
 }
 
 
-void Debugger::DoUnreachable(Instruction* instr) {
+void Debugger::DoUnreachable(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
               (instr->ImmException() == kUnreachableOpcode));
 
   fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
-          reinterpret_cast<void*>(instr));
+          reinterpret_cast<const void*>(instr));
   abort();
 }
 
 
-void Debugger::DoTrace(Instruction* instr) {
+void Debugger::DoTrace(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
               (instr->ImmException() == kTraceOpcode));
 
@@ -808,7 +808,7 @@
 }
 
 
-void Debugger::DoLog(Instruction* instr) {
+void Debugger::DoLog(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
               (instr->ImmException() == kLogOpcode));
 
@@ -1009,7 +1009,7 @@
 
 uint8_t* IdentifierToken::ToAddress(Debugger* debugger) const {
   VIXL_ASSERT(CanAddressMemory());
-  Instruction* pc_value = debugger->pc();
+  const Instruction* pc_value = debugger->pc();
   uint8_t* address = NULL;
   memcpy(&address, &pc_value, sizeof(address));
   return address;
@@ -1370,7 +1370,7 @@
     } else if (strcmp(identifier, "sysregs") == 0) {
       debugger->PrintSystemRegisters(true);
     } else if (strcmp(identifier, "pc") == 0) {
-      printf("pc = %16p\n", reinterpret_cast<void*>(debugger->pc()));
+      printf("pc = %16p\n", reinterpret_cast<const void*>(debugger->pc()));
     } else {
       printf(" ** Unknown identifier to print: %s **\n", identifier);
     }
diff --git a/src/a64/debugger-a64.h b/src/a64/debugger-a64.h
index 8e3d60b..d30cb6c 100644
--- a/src/a64/debugger-a64.h
+++ b/src/a64/debugger-a64.h
@@ -106,10 +106,10 @@
 
 class Debugger : public Simulator {
  public:
-  Debugger(Decoder* decoder, FILE* stream = stdout);
+  explicit Debugger(Decoder* decoder, FILE* stream = stdout);
 
   virtual void Run();
-  void VisitException(Instruction* instr);
+  void VisitException(const Instruction* instr);
 
   inline int log_parameters() {
     // The simulator can control disassembly, so make sure that the Debugger's
@@ -154,7 +154,7 @@
     pending_request_ = logging || debugging;
   }
 
-  void PrintInstructions(void* address, int64_t count = 1);
+  void PrintInstructions(const void* address, int64_t count = 1);
   void PrintMemory(const uint8_t* address,
                    const FormatToken* format,
                    int64_t count = 1);
@@ -171,10 +171,10 @@
   void LogProcessorState();
   char* ReadCommandLine(const char* prompt, char* buffer, int length);
   void RunDebuggerShell();
-  void DoBreakpoint(Instruction* instr);
-  void DoUnreachable(Instruction* instr);
-  void DoTrace(Instruction* instr);
-  void DoLog(Instruction* instr);
+  void DoBreakpoint(const Instruction* instr);
+  void DoUnreachable(const Instruction* instr);
+  void DoTrace(const Instruction* instr);
+  void DoLog(const Instruction* instr);
 
   int  log_parameters_;
   int  debug_parameters_;
diff --git a/src/a64/decoder-a64.cc b/src/a64/decoder-a64.cc
index b9ab1d3..419285d 100644
--- a/src/a64/decoder-a64.cc
+++ b/src/a64/decoder-a64.cc
@@ -29,8 +29,8 @@
 #include "a64/decoder-a64.h"
 
 namespace vixl {
-// Top-level instruction decode function.
-void Decoder::Decode(Instruction *instr) {
+
+void Decoder::DecodeInstruction(const Instruction *instr) {
   if (instr->Bits(28, 27) == 0) {
     VisitUnallocated(instr);
   } else {
@@ -109,20 +109,17 @@
 }
 
 void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
-  visitors_.remove(new_visitor);
-  visitors_.push_front(new_visitor);
+  visitors_.push_back(new_visitor);
 }
 
 
 void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
-  visitors_.remove(new_visitor);
-  visitors_.push_back(new_visitor);
+  visitors_.push_front(new_visitor);
 }
 
 
 void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
                                   DecoderVisitor* registered_visitor) {
-  visitors_.remove(new_visitor);
   std::list<DecoderVisitor*>::iterator it;
   for (it = visitors_.begin(); it != visitors_.end(); it++) {
     if (*it == registered_visitor) {
@@ -139,7 +136,6 @@
 
 void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
                                  DecoderVisitor* registered_visitor) {
-  visitors_.remove(new_visitor);
   std::list<DecoderVisitor*>::iterator it;
   for (it = visitors_.begin(); it != visitors_.end(); it++) {
     if (*it == registered_visitor) {
@@ -160,7 +156,7 @@
 }
 
 
-void Decoder::DecodePCRelAddressing(Instruction* instr) {
+void Decoder::DecodePCRelAddressing(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x0);
   // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
   // decode.
@@ -169,7 +165,7 @@
 }
 
 
-void Decoder::DecodeBranchSystemException(Instruction* instr) {
+void Decoder::DecodeBranchSystemException(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
               (instr->Bits(27, 24) == 0x5) ||
               (instr->Bits(27, 24) == 0x6) ||
@@ -270,7 +266,7 @@
 }
 
 
-void Decoder::DecodeLoadStore(Instruction* instr) {
+void Decoder::DecodeLoadStore(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
               (instr->Bits(27, 24) == 0x9) ||
               (instr->Bits(27, 24) == 0xC) ||
@@ -388,7 +384,7 @@
 }
 
 
-void Decoder::DecodeLogical(Instruction* instr) {
+void Decoder::DecodeLogical(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x2);
 
   if (instr->Mask(0x80400000) == 0x00400000) {
@@ -407,7 +403,7 @@
 }
 
 
-void Decoder::DecodeBitfieldExtract(Instruction* instr) {
+void Decoder::DecodeBitfieldExtract(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x3);
 
   if ((instr->Mask(0x80400000) == 0x80000000) ||
@@ -432,7 +428,7 @@
 }
 
 
-void Decoder::DecodeAddSubImmediate(Instruction* instr) {
+void Decoder::DecodeAddSubImmediate(const Instruction* instr) {
   VIXL_ASSERT(instr->Bits(27, 24) == 0x1);
   if (instr->Bit(23) == 1) {
     VisitUnallocated(instr);
@@ -442,7 +438,7 @@
 }
 
 
-void Decoder::DecodeDataProcessing(Instruction* instr) {
+void Decoder::DecodeDataProcessing(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0xA) ||
               (instr->Bits(27, 24) == 0xB));
 
@@ -557,7 +553,7 @@
 }
 
 
-void Decoder::DecodeFP(Instruction* instr) {
+void Decoder::DecodeFP(const Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0xE) ||
               (instr->Bits(27, 24) == 0xF));
 
@@ -684,14 +680,14 @@
 }
 
 
-void Decoder::DecodeAdvSIMDLoadStore(Instruction* instr) {
+void Decoder::DecodeAdvSIMDLoadStore(const Instruction* instr) {
   // TODO: Implement Advanced SIMD load/store instruction decode.
   VIXL_ASSERT(instr->Bits(29, 25) == 0x6);
   VisitUnimplemented(instr);
 }
 
 
-void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {
+void Decoder::DecodeAdvSIMDDataProcessing(const Instruction* instr) {
   // TODO: Implement Advanced SIMD data processing instruction decode.
   VIXL_ASSERT(instr->Bits(27, 25) == 0x7);
   VisitUnimplemented(instr);
@@ -699,7 +695,7 @@
 
 
 #define DEFINE_VISITOR_CALLERS(A)                                              \
-  void Decoder::Visit##A(Instruction *instr) {                                 \
+  void Decoder::Visit##A(const Instruction *instr) {                           \
     VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed);                            \
     std::list<DecoderVisitor*>::iterator it;                                   \
     for (it = visitors_.begin(); it != visitors_.end(); it++) {                \
diff --git a/src/a64/decoder-a64.h b/src/a64/decoder-a64.h
index 1e7c9b1..36ed47a 100644
--- a/src/a64/decoder-a64.h
+++ b/src/a64/decoder-a64.h
@@ -88,112 +88,152 @@
 // must provide implementations for all of these functions.
 class DecoderVisitor {
  public:
-  #define DECLARE(A) virtual void Visit##A(Instruction* instr) = 0;
-  VISITOR_LIST(DECLARE)
-  #undef DECLARE
+  enum VisitorConstness {
+    kConstVisitor,
+    kNonConstVisitor
+  };
+  explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
+      : constness_(constness) {}
 
   virtual ~DecoderVisitor() {}
 
- private:
-  // Visitors are registered in a list.
-  std::list<DecoderVisitor*> visitors_;
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
 
-  friend class Decoder;
+  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
+  Instruction* MutableInstruction(const Instruction* instr) {
+    VIXL_ASSERT(!IsConstVisitor());
+    return const_cast<Instruction*>(instr);
+  }
+
+ private:
+  VisitorConstness constness_;
 };
 
 
-class Decoder: public DecoderVisitor {
+class Decoder {
  public:
   Decoder() {}
 
-  // Top-level instruction decoder function. Decodes an instruction and calls
-  // the visitor functions registered with the Decoder class.
-  void Decode(Instruction *instr);
+  // Top-level wrappers around the actual decoding function.
+  void Decode(const Instruction* instr) {
+    std::list<DecoderVisitor*>::iterator it;
+    for (it = visitors_.begin(); it != visitors_.end(); it++) {
+      VIXL_ASSERT((*it)->IsConstVisitor());
+    }
+    DecodeInstruction(instr);
+  }
+  void Decode(Instruction* instr) {
+    DecodeInstruction(const_cast<const Instruction*>(instr));
+  }
 
   // Register a new visitor class with the decoder.
   // Decode() will call the corresponding visitor method from all registered
   // visitor classes when decoding reaches the leaf node of the instruction
   // decode tree.
-  // Visitors are called in the order.
-  // A visitor can only be registered once.
-  // Registering an already registered visitor will update its position.
+  // Visitors are called in order.
+  // A visitor can be registered multiple times.
   //
   //   d.AppendVisitor(V1);
   //   d.AppendVisitor(V2);
-  //   d.PrependVisitor(V2);            // Move V2 at the start of the list.
-  //   d.InsertVisitorBefore(V3, V2);
-  //   d.AppendVisitor(V4);
-  //   d.AppendVisitor(V4);             // No effect.
+  //   d.PrependVisitor(V2);
+  //   d.AppendVisitor(V3);
   //
   //   d.Decode(i);
   //
-  // will call in order visitor methods in V3, V2, V1, V4.
+  // will call in order visitor methods in V2, V1, V2, V3.
   void AppendVisitor(DecoderVisitor* visitor);
   void PrependVisitor(DecoderVisitor* visitor);
+  // These helpers register `new_visitor` before or after the first instance of
+  // `registered_visiter` in the list.
+  // So if
+  //   V1, V2, V1, V2
+  // are registered in this order in the decoder, calls to
+  //   d.InsertVisitorAfter(V3, V1);
+  //   d.InsertVisitorBefore(V4, V2);
+  // will yield the order
+  //   V1, V3, V4, V2, V1, V2
+  //
+  // For more complex modifications of the order of registered visitors, one can
+  // directly access and modify the list of visitors via the `visitors()'
+  // accessor.
   void InsertVisitorBefore(DecoderVisitor* new_visitor,
                            DecoderVisitor* registered_visitor);
   void InsertVisitorAfter(DecoderVisitor* new_visitor,
                           DecoderVisitor* registered_visitor);
 
-  // Remove a previously registered visitor class from the list of visitors
-  // stored by the decoder.
+  // Remove all instances of a previously registered visitor class from the list
+  // of visitors stored by the decoder.
   void RemoveVisitor(DecoderVisitor* visitor);
 
-  #define DECLARE(A) void Visit##A(Instruction* instr);
+  #define DECLARE(A) void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
+
+  std::list<DecoderVisitor*>* visitors() { return &visitors_; }
+
  private:
+  // Decodes an instruction and calls the visitor functions registered with the
+  // Decoder class.
+  void DecodeInstruction(const Instruction* instr);
+
   // Decode the PC relative addressing instruction, and call the corresponding
   // visitors.
   // On entry, instruction bits 27:24 = 0x0.
-  void DecodePCRelAddressing(Instruction* instr);
+  void DecodePCRelAddressing(const Instruction* instr);
 
   // Decode the add/subtract immediate instruction, and call the correspoding
   // visitors.
   // On entry, instruction bits 27:24 = 0x1.
-  void DecodeAddSubImmediate(Instruction* instr);
+  void DecodeAddSubImmediate(const Instruction* instr);
 
   // Decode the branch, system command, and exception generation parts of
   // the instruction tree, and call the corresponding visitors.
   // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
-  void DecodeBranchSystemException(Instruction* instr);
+  void DecodeBranchSystemException(const Instruction* instr);
 
   // Decode the load and store parts of the instruction tree, and call
   // the corresponding visitors.
   // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
-  void DecodeLoadStore(Instruction* instr);
+  void DecodeLoadStore(const Instruction* instr);
 
   // Decode the logical immediate and move wide immediate parts of the
   // instruction tree, and call the corresponding visitors.
   // On entry, instruction bits 27:24 = 0x2.
-  void DecodeLogical(Instruction* instr);
+  void DecodeLogical(const Instruction* instr);
 
   // Decode the bitfield and extraction parts of the instruction tree,
   // and call the corresponding visitors.
   // On entry, instruction bits 27:24 = 0x3.
-  void DecodeBitfieldExtract(Instruction* instr);
+  void DecodeBitfieldExtract(const Instruction* instr);
 
   // Decode the data processing parts of the instruction tree, and call the
   // corresponding visitors.
   // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
-  void DecodeDataProcessing(Instruction* instr);
+  void DecodeDataProcessing(const Instruction* instr);
 
   // Decode the floating point parts of the instruction tree, and call the
   // corresponding visitors.
   // On entry, instruction bits 27:24 = {0xE, 0xF}.
-  void DecodeFP(Instruction* instr);
+  void DecodeFP(const Instruction* instr);
 
   // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
   // and call the corresponding visitors.
   // On entry, instruction bits 29:25 = 0x6.
-  void DecodeAdvSIMDLoadStore(Instruction* instr);
+  void DecodeAdvSIMDLoadStore(const Instruction* instr);
 
   // Decode the Advanced SIMD (NEON) data processing part of the instruction
   // tree, and call the corresponding visitors.
   // On entry, instruction bits 27:25 = 0x7.
-  void DecodeAdvSIMDDataProcessing(Instruction* instr);
+  void DecodeAdvSIMDDataProcessing(const Instruction* instr);
+
+ private:
+  // Visitors are registered in a list.
+  std::list<DecoderVisitor*> visitors_;
 };
+
 }  // namespace vixl
 
 #endif  // VIXL_A64_DECODER_A64_H_
diff --git a/src/a64/disasm-a64.cc b/src/a64/disasm-a64.cc
index 71e6356..c267580 100644
--- a/src/a64/disasm-a64.cc
+++ b/src/a64/disasm-a64.cc
@@ -57,7 +57,7 @@
 }
 
 
-void Disassembler::VisitAddSubImmediate(Instruction* instr) {
+void Disassembler::VisitAddSubImmediate(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) &&
                   (instr->ImmAddSub() == 0) ? true : false;
@@ -102,7 +102,7 @@
 }
 
 
-void Disassembler::VisitAddSubShifted(Instruction* instr) {
+void Disassembler::VisitAddSubShifted(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
@@ -149,7 +149,7 @@
 }
 
 
-void Disassembler::VisitAddSubExtended(Instruction* instr) {
+void Disassembler::VisitAddSubExtended(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   const char *mnemonic = "";
   Extend mode = static_cast<Extend>(instr->ExtendMode());
@@ -187,7 +187,7 @@
 }
 
 
-void Disassembler::VisitAddSubWithCarry(Instruction* instr) {
+void Disassembler::VisitAddSubWithCarry(const Instruction* instr) {
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
   const char *form = "'Rd, 'Rn, 'Rm";
@@ -222,7 +222,7 @@
 }
 
 
-void Disassembler::VisitLogicalImmediate(Instruction* instr) {
+void Disassembler::VisitLogicalImmediate(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
@@ -294,7 +294,7 @@
 }
 
 
-void Disassembler::VisitLogicalShifted(Instruction* instr) {
+void Disassembler::VisitLogicalShifted(const Instruction* instr) {
   bool rd_is_zr = RdIsZROrSP(instr);
   bool rn_is_zr = RnIsZROrSP(instr);
   const char *mnemonic = "";
@@ -345,7 +345,7 @@
 }
 
 
-void Disassembler::VisitConditionalCompareRegister(Instruction* instr) {
+void Disassembler::VisitConditionalCompareRegister(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rn, 'Rm, 'INzcv, 'Cond";
 
@@ -360,7 +360,7 @@
 }
 
 
-void Disassembler::VisitConditionalCompareImmediate(Instruction* instr) {
+void Disassembler::VisitConditionalCompareImmediate(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rn, 'IP, 'INzcv, 'Cond";
 
@@ -375,7 +375,7 @@
 }
 
 
-void Disassembler::VisitConditionalSelect(Instruction* instr) {
+void Disassembler::VisitConditionalSelect(const Instruction* instr) {
   bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr));
   bool rn_is_rm = (instr->Rn() == instr->Rm());
   const char *mnemonic = "";
@@ -428,7 +428,7 @@
 }
 
 
-void Disassembler::VisitBitfield(Instruction* instr) {
+void Disassembler::VisitBitfield(const Instruction* instr) {
   unsigned s = instr->ImmS();
   unsigned r = instr->ImmR();
   unsigned rd_size_minus_1 =
@@ -506,7 +506,7 @@
 }
 
 
-void Disassembler::VisitExtract(Instruction* instr) {
+void Disassembler::VisitExtract(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'Rn, 'Rm, 'IExtract";
 
@@ -527,7 +527,7 @@
 }
 
 
-void Disassembler::VisitPCRelAddressing(Instruction* instr) {
+void Disassembler::VisitPCRelAddressing(const Instruction* instr) {
   switch (instr->Mask(PCRelAddressingMask)) {
     case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
     case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
@@ -536,7 +536,7 @@
 }
 
 
-void Disassembler::VisitConditionalBranch(Instruction* instr) {
+void Disassembler::VisitConditionalBranch(const Instruction* instr) {
   switch (instr->Mask(ConditionalBranchMask)) {
     case B_cond: Format(instr, "b.'CBrn", "'BImmCond"); break;
     default: VIXL_UNREACHABLE();
@@ -544,7 +544,8 @@
 }
 
 
-void Disassembler::VisitUnconditionalBranchToRegister(Instruction* instr) {
+void Disassembler::VisitUnconditionalBranchToRegister(
+    const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Xn";
 
@@ -564,7 +565,7 @@
 }
 
 
-void Disassembler::VisitUnconditionalBranch(Instruction* instr) {
+void Disassembler::VisitUnconditionalBranch(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'BImmUncn";
 
@@ -577,7 +578,7 @@
 }
 
 
-void Disassembler::VisitDataProcessing1Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing1Source(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'Rn";
 
@@ -598,7 +599,7 @@
 }
 
 
-void Disassembler::VisitDataProcessing2Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing2Source(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Rd, 'Rn, 'Rm";
 
@@ -619,7 +620,7 @@
 }
 
 
-void Disassembler::VisitDataProcessing3Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing3Source(const Instruction* instr) {
   bool ra_is_zr = RaIsZROrSP(instr);
   const char *mnemonic = "";
   const char *form = "'Xd, 'Wn, 'Wm, 'Xa";
@@ -697,7 +698,7 @@
 }
 
 
-void Disassembler::VisitCompareBranch(Instruction* instr) {
+void Disassembler::VisitCompareBranch(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rt, 'BImmCmpa";
 
@@ -712,7 +713,7 @@
 }
 
 
-void Disassembler::VisitTestBranch(Instruction* instr) {
+void Disassembler::VisitTestBranch(const Instruction* instr) {
   const char *mnemonic = "";
   // If the top bit of the immediate is clear, the tested register is
   // disassembled as Wt, otherwise Xt. As the top bit of the immediate is
@@ -729,7 +730,7 @@
 }
 
 
-void Disassembler::VisitMoveWideImmediate(Instruction* instr) {
+void Disassembler::VisitMoveWideImmediate(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'IMoveImm";
 
@@ -768,7 +769,7 @@
   V(LDR_s, "ldr", "'St")      \
   V(LDR_d, "ldr", "'Dt")
 
-void Disassembler::VisitLoadStorePreIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePreIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePreIndex)";
 
@@ -782,7 +783,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePostIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePostIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePostIndex)";
 
@@ -796,7 +797,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreUnsignedOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStoreUnsignedOffset)";
 
@@ -811,7 +812,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreRegisterOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStoreRegisterOffset)";
 
@@ -826,7 +827,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreUnscaledOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Wt, ['Xns'ILS]";
   const char *form_x = "'Xt, ['Xns'ILS]";
@@ -857,7 +858,7 @@
 }
 
 
-void Disassembler::VisitLoadLiteral(Instruction* instr) {
+void Disassembler::VisitLoadLiteral(const Instruction* instr) {
   const char *mnemonic = "ldr";
   const char *form = "(LoadLiteral)";
 
@@ -866,6 +867,11 @@
     case LDR_x_lit: form = "'Xt, 'ILLiteral 'LValue"; break;
     case LDR_s_lit: form = "'St, 'ILLiteral 'LValue"; break;
     case LDR_d_lit: form = "'Dt, 'ILLiteral 'LValue"; break;
+    case LDRSW_x_lit: {
+      mnemonic = "ldrsw";
+      form = "'Xt, 'ILLiteral 'LValue";
+      break;
+    }
     default: mnemonic = "unimplemented";
   }
   Format(instr, mnemonic, form);
@@ -883,7 +889,7 @@
   V(STP_d, "stp", "'Dt, 'Dt2", "8")     \
   V(LDP_d, "ldp", "'Dt, 'Dt2", "8")
 
-void Disassembler::VisitLoadStorePairPostIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePairPostIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePairPostIndex)";
 
@@ -897,7 +903,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePairPreIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePairPreIndex(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePairPreIndex)";
 
@@ -911,7 +917,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePairOffset(Instruction* instr) {
+void Disassembler::VisitLoadStorePairOffset(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(LoadStorePairOffset)";
 
@@ -925,7 +931,7 @@
 }
 
 
-void Disassembler::VisitLoadStorePairNonTemporal(Instruction* instr) {
+void Disassembler::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form;
 
@@ -944,7 +950,7 @@
 }
 
 
-void Disassembler::VisitLoadStoreExclusive(Instruction* instr) {
+void Disassembler::VisitLoadStoreExclusive(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form;
 
@@ -987,7 +993,7 @@
 }
 
 
-void Disassembler::VisitFPCompare(Instruction* instr) {
+void Disassembler::VisitFPCompare(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Fn, 'Fm";
   const char *form_zero = "'Fn, #0.0";
@@ -1003,7 +1009,7 @@
 }
 
 
-void Disassembler::VisitFPConditionalCompare(Instruction* instr) {
+void Disassembler::VisitFPConditionalCompare(const Instruction* instr) {
   const char *mnemonic = "unmplemented";
   const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";
 
@@ -1018,7 +1024,7 @@
 }
 
 
-void Disassembler::VisitFPConditionalSelect(Instruction* instr) {
+void Disassembler::VisitFPConditionalSelect(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Fd, 'Fn, 'Fm, 'Cond";
 
@@ -1031,7 +1037,7 @@
 }
 
 
-void Disassembler::VisitFPDataProcessing1Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing1Source(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Fd, 'Fn";
 
@@ -1059,7 +1065,7 @@
 }
 
 
-void Disassembler::VisitFPDataProcessing2Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing2Source(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Fd, 'Fn, 'Fm";
 
@@ -1083,7 +1089,7 @@
 }
 
 
-void Disassembler::VisitFPDataProcessing3Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing3Source(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Fd, 'Fn, 'Fm, 'Fa";
 
@@ -1102,7 +1108,7 @@
 }
 
 
-void Disassembler::VisitFPImmediate(Instruction* instr) {
+void Disassembler::VisitFPImmediate(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "(FPImmediate)";
 
@@ -1115,7 +1121,7 @@
 }
 
 
-void Disassembler::VisitFPIntegerConvert(Instruction* instr) {
+void Disassembler::VisitFPIntegerConvert(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(FPIntegerConvert)";
   const char *form_rf = "'Rd, 'Fn";
@@ -1171,7 +1177,7 @@
 }
 
 
-void Disassembler::VisitFPFixedPointConvert(Instruction* instr) {
+void Disassembler::VisitFPFixedPointConvert(const Instruction* instr) {
   const char *mnemonic = "";
   const char *form = "'Rd, 'Fn, 'IFPFBits";
   const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
@@ -1199,7 +1205,7 @@
 }
 
 
-void Disassembler::VisitSystem(Instruction* instr) {
+void Disassembler::VisitSystem(const Instruction* instr) {
   // Some system instructions hijack their Op and Cp fields to represent a
   // range of immediates instead of indicating a different instruction. This
   // makes the decoding tricky.
@@ -1267,7 +1273,7 @@
 }
 
 
-void Disassembler::VisitException(Instruction* instr) {
+void Disassembler::VisitException(const Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'IDebug";
 
@@ -1286,22 +1292,75 @@
 }
 
 
-void Disassembler::VisitUnimplemented(Instruction* instr) {
+void Disassembler::VisitUnimplemented(const Instruction* instr) {
   Format(instr, "unimplemented", "(Unimplemented)");
 }
 
 
-void Disassembler::VisitUnallocated(Instruction* instr) {
+void Disassembler::VisitUnallocated(const Instruction* instr) {
   Format(instr, "unallocated", "(Unallocated)");
 }
 
 
-void Disassembler::ProcessOutput(Instruction* /*instr*/) {
+void Disassembler::ProcessOutput(const Instruction* /*instr*/) {
   // The base disasm does nothing more than disassembling into a buffer.
 }
 
 
-void Disassembler::Format(Instruction* instr, const char* mnemonic,
+void Disassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                              const CPURegister& reg) {
+  USE(instr);
+  VIXL_ASSERT(reg.IsValid());
+  char reg_char;
+
+  if (reg.IsRegister()) {
+    reg_char = reg.Is64Bits() ? 'x' : 'w';
+  } else {
+    VIXL_ASSERT(reg.IsFPRegister());
+    reg_char = reg.Is64Bits() ? 'd' : 's';
+  }
+
+  if (reg.IsFPRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) {
+    // A normal register: w0 - w30, x0 - x30, s0 - s31, d0 - d31.
+    AppendToOutput("%c%d", reg_char, reg.code());
+  } else if (reg.Aliases(sp)) {
+    // Disassemble w31/x31 as stack pointer wsp/sp.
+    AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp");
+  } else {
+    // Disassemble w31/x31 as zero register wzr/xzr.
+    AppendToOutput("%czr", reg_char);
+  }
+}
+
+
+void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                                  int64_t offset) {
+  USE(instr);
+  char sign = (offset < 0) ? '-' : '+';
+  AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset));
+}
+
+
+void Disassembler::AppendAddressToOutput(const Instruction* instr,
+                                         const void* addr) {
+  USE(instr);
+  AppendToOutput("(addr %p)", addr);
+}
+
+
+void Disassembler::AppendCodeAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendDataAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::Format(const Instruction* instr, const char* mnemonic,
                           const char* format) {
   VIXL_ASSERT(mnemonic != NULL);
   ResetOutput();
@@ -1315,7 +1374,7 @@
 }
 
 
-void Disassembler::Substitute(Instruction* instr, const char* string) {
+void Disassembler::Substitute(const Instruction* instr, const char* string) {
   char chr = *string++;
   while (chr != '\0') {
     if (chr == '\'') {
@@ -1328,7 +1387,8 @@
 }
 
 
-int Disassembler::SubstituteField(Instruction* instr, const char* format) {
+int Disassembler::SubstituteField(const Instruction* instr,
+                                  const char* format) {
   switch (format[0]) {
     case 'R':  // Register. X or W, selected by sf bit.
     case 'F':  // FP Register. S or D, selected by type field.
@@ -1354,7 +1414,7 @@
 }
 
 
-int Disassembler::SubstituteRegisterField(Instruction* instr,
+int Disassembler::SubstituteRegisterField(const Instruction* instr,
                                           const char* format) {
   unsigned reg_num = 0;
   unsigned field_len = 2;
@@ -1381,34 +1441,47 @@
     field_len = 3;
   }
 
-  char reg_type;
+  CPURegister::RegisterType reg_type;
+  unsigned reg_size;
+
   if (format[0] == 'R') {
     // Register type is R: use sf bit to choose X and W.
-    reg_type = instr->SixtyFourBits() ? 'x' : 'w';
+    reg_type = CPURegister::kRegister;
+    reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   } else if (format[0] == 'F') {
     // Floating-point register: use type field to choose S or D.
-    reg_type = ((instr->FPType() & 1) == 0) ? 's' : 'd';
+    reg_type = CPURegister::kFPRegister;
+    reg_size = ((instr->FPType() & 1) == 0) ? kSRegSize : kDRegSize;
   } else {
-    // Register type is specified. Make it lower case.
-    reg_type = format[0] + 0x20;
+    // The register type is specified.
+    switch (format[0]) {
+      case 'W':
+        reg_type = CPURegister::kRegister; reg_size = kWRegSize; break;
+      case 'X':
+        reg_type = CPURegister::kRegister; reg_size = kXRegSize; break;
+      case 'S':
+        reg_type = CPURegister::kFPRegister; reg_size = kSRegSize; break;
+      case 'D':
+        reg_type = CPURegister::kFPRegister; reg_size = kDRegSize; break;
+      default:
+        VIXL_UNREACHABLE();
+        reg_type = CPURegister::kRegister;
+        reg_size = kXRegSize;
+    }
   }
 
-  if ((reg_num != kZeroRegCode) || (reg_type == 's') || (reg_type == 'd')) {
-    // A normal register: w0 - w30, x0 - x30, s0 - s31, d0 - d31.
-    AppendToOutput("%c%d", reg_type, reg_num);
-  } else if (format[2] == 's') {
-    // Disassemble w31/x31 as stack pointer wsp/sp.
-    AppendToOutput("%s", (reg_type == 'w') ? "wsp" : "sp");
-  } else {
-    // Disassemble w31/x31 as zero register wzr/xzr.
-    AppendToOutput("%czr", reg_type);
+  if ((reg_type == CPURegister::kRegister) &&
+      (reg_num == kZeroRegCode) && (format[2] == 's')) {
+    reg_num = kSPRegInternalCode;
   }
 
+  AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
+
   return field_len;
 }
 
 
-int Disassembler::SubstituteImmediateField(Instruction* instr,
+int Disassembler::SubstituteImmediateField(const Instruction* instr,
                                            const char* format) {
   VIXL_ASSERT(format[0] == 'I');
 
@@ -1458,8 +1531,7 @@
     }
     case 'C': {  // ICondB - Immediate Conditional Branch.
       int64_t offset = instr->ImmCondBranch() << 2;
-      char sign = (offset >= 0) ? '+' : '-';
-      AppendToOutput("#%c0x%" PRIx64, sign, offset);
+      AppendPCRelativeOffsetToOutput(instr, offset);
       return 6;
     }
     case 'A': {  // IAddSub.
@@ -1522,7 +1594,7 @@
 }
 
 
-int Disassembler::SubstituteBitfieldImmediateField(Instruction* instr,
+int Disassembler::SubstituteBitfieldImmediateField(const Instruction* instr,
                                                    const char* format) {
   VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B'));
   unsigned r = instr->ImmR();
@@ -1557,7 +1629,7 @@
 }
 
 
-int Disassembler::SubstituteLiteralField(Instruction* instr,
+int Disassembler::SubstituteLiteralField(const Instruction* instr,
                                          const char* format) {
   VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
   USE(format);
@@ -1565,16 +1637,21 @@
   switch (instr->Mask(LoadLiteralMask)) {
     case LDR_w_lit:
     case LDR_x_lit:
+    case LDRSW_x_lit:
     case LDR_s_lit:
-    case LDR_d_lit: AppendToOutput("(addr %p)", instr->LiteralAddress()); break;
-    default: VIXL_UNREACHABLE();
+    case LDR_d_lit:
+      AppendDataAddressToOutput(instr, instr->LiteralAddress());
+      break;
+    default:
+      VIXL_UNREACHABLE();
   }
 
   return 6;
 }
 
 
-int Disassembler::SubstituteShiftField(Instruction* instr, const char* format) {
+int Disassembler::SubstituteShiftField(const Instruction* instr,
+                                       const char* format) {
   VIXL_ASSERT(format[0] == 'H');
   VIXL_ASSERT(instr->ShiftDP() <= 0x3);
 
@@ -1597,7 +1674,7 @@
 }
 
 
-int Disassembler::SubstituteConditionField(Instruction* instr,
+int Disassembler::SubstituteConditionField(const Instruction* instr,
                                            const char* format) {
   VIXL_ASSERT(format[0] == 'C');
   const char* condition_code[] = { "eq", "ne", "hs", "lo",
@@ -1618,27 +1695,28 @@
 }
 
 
-int Disassembler::SubstitutePCRelAddressField(Instruction* instr,
+int Disassembler::SubstitutePCRelAddressField(const Instruction* instr,
                                               const char* format) {
   VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) ||   // Used by `adr`.
               (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.
 
   int64_t offset = instr->ImmPCRel();
-  Instruction * base = instr;
+  const Instruction * base = instr;
 
   if (format[9] == 'P') {
     offset *= kPageSize;
     base = AlignDown(base, kPageSize);
   }
 
-  char sign = (offset < 0) ? '-' : '+';
-  void * target = reinterpret_cast<void *>(base + offset);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, std::abs(offset), target);
+  const void* target = reinterpret_cast<const void*>(base + offset);
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendAddressToOutput(instr, target);
   return 13;
 }
 
 
-int Disassembler::SubstituteBranchTargetField(Instruction* instr,
+int Disassembler::SubstituteBranchTargetField(const Instruction* instr,
                                               const char* format) {
   VIXL_ASSERT(strncmp(format, "BImm", 4) == 0);
 
@@ -1655,19 +1733,18 @@
     default: VIXL_UNIMPLEMENTED();
   }
   offset <<= kInstructionSizeLog2;
-  char sign = '+';
-  if (offset < 0) {
-    offset = -offset;
-    sign = '-';
-  }
+  const void* target_address = reinterpret_cast<const void*>(instr + offset);
   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  void * address = reinterpret_cast<void *>(instr + offset);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, address);
+
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendCodeAddressToOutput(instr, target_address);
+
   return 8;
 }
 
 
-int Disassembler::SubstituteExtendField(Instruction* instr,
+int Disassembler::SubstituteExtendField(const Instruction* instr,
                                         const char* format) {
   VIXL_ASSERT(strncmp(format, "Ext", 3) == 0);
   VIXL_ASSERT(instr->ExtendMode() <= 7);
@@ -1694,7 +1771,7 @@
 }
 
 
-int Disassembler::SubstituteLSRegOffsetField(Instruction* instr,
+int Disassembler::SubstituteLSRegOffsetField(const Instruction* instr,
                                              const char* format) {
   VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0);
   const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl",
@@ -1723,7 +1800,7 @@
 }
 
 
-int Disassembler::SubstitutePrefetchField(Instruction* instr,
+int Disassembler::SubstitutePrefetchField(const Instruction* instr,
                                           const char* format) {
   VIXL_ASSERT(format[0] == 'P');
   USE(format);
@@ -1738,7 +1815,7 @@
   return 6;
 }
 
-int Disassembler::SubstituteBarrierField(Instruction* instr,
+int Disassembler::SubstituteBarrierField(const Instruction* instr,
                                          const char* format) {
   VIXL_ASSERT(format[0] == 'M');
   USE(format);
@@ -1770,7 +1847,7 @@
 }
 
 
-void PrintDisassembler::ProcessOutput(Instruction* instr) {
+void PrintDisassembler::ProcessOutput(const Instruction* instr) {
   fprintf(stream_, "0x%016" PRIx64 "  %08" PRIx32 "\t\t%s\n",
           reinterpret_cast<uint64_t>(instr),
           instr->InstructionBits(),
diff --git a/src/a64/disasm-a64.h b/src/a64/disasm-a64.h
index 0dc9d28..11a142b 100644
--- a/src/a64/disasm-a64.h
+++ b/src/a64/disasm-a64.h
@@ -31,6 +31,7 @@
 #include "utils-vixl.h"
 #include "instructions-a64.h"
 #include "decoder-a64.h"
+#include "assembler-a64.h"
 
 namespace vixl {
 
@@ -42,48 +43,83 @@
   char* GetOutput();
 
   // Declare all Visitor functions.
-  #define DECLARE(A)  void Visit##A(Instruction* instr);
+  #define DECLARE(A)  void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
  protected:
-  virtual void ProcessOutput(Instruction* instr);
+  virtual void ProcessOutput(const Instruction* instr);
+
+  // Default output functions.  The functions below implement a default way of
+  // printing elements in the disassembly. A sub-class can override these to
+  // customize the disassembly output.
+
+  // Prints the name of a register.
+  virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                          const CPURegister& reg);
+
+  // Prints a PC-relative offset. This is used for example when disassembling
+  // branches to immediate offsets.
+  virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                              int64_t offset);
+
+  // Prints an address, in the general case. It can be code or data. This is
+  // used for example to print the target address of an ADR instruction.
+  virtual void AppendAddressToOutput(const Instruction* instr,
+                                     const void* addr);
+
+  // Prints the address of some code.
+  // This is used for example to print the target address of a branch to an
+  // immediate offset.
+  // A sub-class can for example override this method to lookup the address and
+  // print an appropriate name.
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+
+  // Prints the address of some data.
+  // This is used for example to print the source address of a load literal
+  // instruction.
+  virtual void AppendDataAddressToOutput(const Instruction* instr,
+                                         const void* addr);
 
  private:
-  void Format(Instruction* instr, const char* mnemonic, const char* format);
-  void Substitute(Instruction* instr, const char* string);
-  int SubstituteField(Instruction* instr, const char* format);
-  int SubstituteRegisterField(Instruction* instr, const char* format);
-  int SubstituteImmediateField(Instruction* instr, const char* format);
-  int SubstituteLiteralField(Instruction* instr, const char* format);
-  int SubstituteBitfieldImmediateField(Instruction* instr, const char* format);
-  int SubstituteShiftField(Instruction* instr, const char* format);
-  int SubstituteExtendField(Instruction* instr, const char* format);
-  int SubstituteConditionField(Instruction* instr, const char* format);
-  int SubstitutePCRelAddressField(Instruction* instr, const char* format);
-  int SubstituteBranchTargetField(Instruction* instr, const char* format);
-  int SubstituteLSRegOffsetField(Instruction* instr, const char* format);
-  int SubstitutePrefetchField(Instruction* instr, const char* format);
-  int SubstituteBarrierField(Instruction* instr, const char* format);
+  void Format(
+      const Instruction* instr, const char* mnemonic, const char* format);
+  void Substitute(const Instruction* instr, const char* string);
+  int SubstituteField(const Instruction* instr, const char* format);
+  int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstituteImmediateField(const Instruction* instr, const char* format);
+  int SubstituteLiteralField(const Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(
+      const Instruction* instr, const char* format);
+  int SubstituteShiftField(const Instruction* instr, const char* format);
+  int SubstituteExtendField(const Instruction* instr, const char* format);
+  int SubstituteConditionField(const Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(const Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(const Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(const Instruction* instr, const char* format);
+  int SubstitutePrefetchField(const Instruction* instr, const char* format);
+  int SubstituteBarrierField(const Instruction* instr, const char* format);
 
-  inline bool RdIsZROrSP(Instruction* instr) const {
+  inline bool RdIsZROrSP(const Instruction* instr) const {
     return (instr->Rd() == kZeroRegCode);
   }
 
-  inline bool RnIsZROrSP(Instruction* instr) const {
+  inline bool RnIsZROrSP(const Instruction* instr) const {
     return (instr->Rn() == kZeroRegCode);
   }
 
-  inline bool RmIsZROrSP(Instruction* instr) const {
+  inline bool RmIsZROrSP(const Instruction* instr) const {
     return (instr->Rm() == kZeroRegCode);
   }
 
-  inline bool RaIsZROrSP(Instruction* instr) const {
+  inline bool RaIsZROrSP(const Instruction* instr) const {
     return (instr->Ra() == kZeroRegCode);
   }
 
   bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
 
+ protected:
   void ResetOutput();
   void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
 
@@ -97,10 +133,10 @@
 class PrintDisassembler: public Disassembler {
  public:
   explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
-  ~PrintDisassembler() { }
+  virtual ~PrintDisassembler() { }
 
  protected:
-  virtual void ProcessOutput(Instruction* instr);
+  virtual void ProcessOutput(const Instruction* instr);
 
  private:
   FILE *stream_;
diff --git a/src/a64/instructions-a64.cc b/src/a64/instructions-a64.cc
index e9caceb..1f08c78 100644
--- a/src/a64/instructions-a64.cc
+++ b/src/a64/instructions-a64.cc
@@ -57,7 +57,7 @@
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
-uint64_t Instruction::ImmLogical() {
+uint64_t Instruction::ImmLogical() const {
   unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t n = BitN();
   int64_t imm_s = ImmSetBits();
@@ -108,7 +108,7 @@
 }
 
 
-float Instruction::ImmFP32() {
+float Instruction::ImmFP32() const {
   //  ImmFP: abcdefgh (8 bits)
   // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
   // where B is b ^ 1
@@ -122,7 +122,7 @@
 }
 
 
-double Instruction::ImmFP64() {
+double Instruction::ImmFP64() const {
   //  ImmFP: abcdefgh (8 bits)
   // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
   //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
@@ -148,8 +148,8 @@
 }
 
 
-Instruction* Instruction::ImmPCOffsetTarget() {
-  Instruction * base = this;
+const Instruction* Instruction::ImmPCOffsetTarget() const {
+  const Instruction * base = this;
   ptrdiff_t offset;
   if (IsPCRelAddressing()) {
     // ADR and ADRP.
@@ -182,7 +182,7 @@
 }
 
 
-void Instruction::SetImmPCOffsetTarget(Instruction* target) {
+void Instruction::SetImmPCOffsetTarget(const Instruction* target) {
   if (IsPCRelAddressing()) {
     SetPCRelImmTarget(target);
   } else {
@@ -191,7 +191,7 @@
 }
 
 
-void Instruction::SetPCRelImmTarget(Instruction* target) {
+void Instruction::SetPCRelImmTarget(const Instruction* target) {
   int32_t imm21;
   if ((Mask(PCRelAddressingMask) == ADR)) {
     imm21 = target - this;
@@ -207,7 +207,7 @@
 }
 
 
-void Instruction::SetBranchImmTarget(Instruction* target) {
+void Instruction::SetBranchImmTarget(const Instruction* target) {
   VIXL_ASSERT(((target - this) & 3) == 0);
   Instr branch_imm = 0;
   uint32_t imm_mask = 0;
@@ -239,9 +239,9 @@
 }
 
 
-void Instruction::SetImmLLiteral(Instruction* source) {
-  VIXL_ASSERT(((source - this) & 3) == 0);
-  int offset = (source - this) >> kLiteralEntrySizeLog2;
+void Instruction::SetImmLLiteral(const Instruction* source) {
+  VIXL_ASSERT(IsWordAligned(source));
+  ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2;
   Instr imm = Assembler::ImmLLiteral(offset);
   Instr mask = ImmLLiteral_mask;
 
diff --git a/src/a64/instructions-a64.h b/src/a64/instructions-a64.h
index 2f0c6a3..249542b 100644
--- a/src/a64/instructions-a64.h
+++ b/src/a64/instructions-a64.h
@@ -44,6 +44,7 @@
 // This is the nominal page size (as used by the adrp instruction); the actual
 // size of the memory pages allocated by the kernel is likely to differ.
 const unsigned kPageSize = 4 * KBytes;
+const unsigned kPageSizeLog2 = 12;
 
 const unsigned kWRegSize = 32;
 const unsigned kWRegSizeLog2 = 5;
@@ -201,9 +202,9 @@
     return signed_bitextract_32(width-1, 0, offset);
   }
 
-  uint64_t ImmLogical();
-  float ImmFP32();
-  double ImmFP64();
+  uint64_t ImmLogical() const;
+  float ImmFP32() const;
+  double ImmFP64() const;
 
   inline LSDataSize SizeLSPair() const {
     return CalcLSPairDataSize(
@@ -311,46 +312,49 @@
 
   // Find the target of this instruction. 'this' may be a branch or a
   // PC-relative addressing instruction.
-  Instruction* ImmPCOffsetTarget();
+  const Instruction* ImmPCOffsetTarget() const;
 
   // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
   // a PC-relative addressing instruction.
-  void SetImmPCOffsetTarget(Instruction* target);
+  void SetImmPCOffsetTarget(const Instruction* target);
   // Patch a literal load instruction to load from 'source'.
-  void SetImmLLiteral(Instruction* source);
+  void SetImmLLiteral(const Instruction* source);
 
-  inline uint8_t* LiteralAddress() {
+  inline uint8_t* LiteralAddress() const {
     int offset = ImmLLiteral() << kLiteralEntrySizeLog2;
-    return reinterpret_cast<uint8_t*>(this) + offset;
+    const uint8_t* address = reinterpret_cast<const uint8_t*>(this) + offset;
+    // Note that the result is safely mutable only if the backing buffer is
+    // safely mutable.
+    return const_cast<uint8_t*>(address);
   }
 
-  inline uint32_t Literal32() {
+  inline uint32_t Literal32() const {
     uint32_t literal;
     memcpy(&literal, LiteralAddress(), sizeof(literal));
 
     return literal;
   }
 
-  inline uint64_t Literal64() {
+  inline uint64_t Literal64() const {
     uint64_t literal;
     memcpy(&literal, LiteralAddress(), sizeof(literal));
 
     return literal;
   }
 
-  inline float LiteralFP32() {
+  inline float LiteralFP32() const {
     return rawbits_to_float(Literal32());
   }
 
-  inline double LiteralFP64() {
+  inline double LiteralFP64() const {
     return rawbits_to_double(Literal64());
   }
 
-  inline Instruction* NextInstruction() {
+  inline const Instruction* NextInstruction() const {
     return this + kInstructionSize;
   }
 
-  inline Instruction* InstructionAtOffset(int64_t offset) {
+  inline const Instruction* InstructionAtOffset(int64_t offset) const {
     VIXL_ASSERT(IsWordAligned(this + offset));
     return this + offset;
   }
@@ -359,11 +363,15 @@
     return reinterpret_cast<Instruction*>(src);
   }
 
+  template<typename T> static inline const Instruction* CastConst(T src) {
+    return reinterpret_cast<const Instruction*>(src);
+  }
+
  private:
   inline int ImmBranch() const;
 
-  void SetPCRelImmTarget(Instruction* target);
-  void SetBranchImmTarget(Instruction* target);
+  void SetPCRelImmTarget(const Instruction* target);
+  void SetBranchImmTarget(const Instruction* target);
 };
 }  // namespace vixl
 
diff --git a/src/a64/instrument-a64.cc b/src/a64/instrument-a64.cc
index 5bcf4b0..3e3c388 100644
--- a/src/a64/instrument-a64.cc
+++ b/src/a64/instrument-a64.cc
@@ -251,7 +251,7 @@
 }
 
 
-void Instrument::VisitPCRelAddressing(Instruction* instr) {
+void Instrument::VisitPCRelAddressing(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("PC Addressing");
@@ -259,7 +259,7 @@
 }
 
 
-void Instrument::VisitAddSubImmediate(Instruction* instr) {
+void Instrument::VisitAddSubImmediate(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Add/Sub DP");
@@ -267,7 +267,7 @@
 }
 
 
-void Instrument::VisitLogicalImmediate(Instruction* instr) {
+void Instrument::VisitLogicalImmediate(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Logical DP");
@@ -275,7 +275,7 @@
 }
 
 
-void Instrument::VisitMoveWideImmediate(Instruction* instr) {
+void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
   Update();
   static Counter* counter = GetCounter("Move Immediate");
 
@@ -288,7 +288,7 @@
 }
 
 
-void Instrument::VisitBitfield(Instruction* instr) {
+void Instrument::VisitBitfield(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other Int DP");
@@ -296,7 +296,7 @@
 }
 
 
-void Instrument::VisitExtract(Instruction* instr) {
+void Instrument::VisitExtract(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other Int DP");
@@ -304,7 +304,7 @@
 }
 
 
-void Instrument::VisitUnconditionalBranch(Instruction* instr) {
+void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Unconditional Branch");
@@ -312,7 +312,7 @@
 }
 
 
-void Instrument::VisitUnconditionalBranchToRegister(Instruction* instr) {
+void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Unconditional Branch");
@@ -320,7 +320,7 @@
 }
 
 
-void Instrument::VisitCompareBranch(Instruction* instr) {
+void Instrument::VisitCompareBranch(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Compare and Branch");
@@ -328,7 +328,7 @@
 }
 
 
-void Instrument::VisitTestBranch(Instruction* instr) {
+void Instrument::VisitTestBranch(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Test and Branch");
@@ -336,7 +336,7 @@
 }
 
 
-void Instrument::VisitConditionalBranch(Instruction* instr) {
+void Instrument::VisitConditionalBranch(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Conditional Branch");
@@ -344,7 +344,7 @@
 }
 
 
-void Instrument::VisitSystem(Instruction* instr) {
+void Instrument::VisitSystem(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other");
@@ -352,7 +352,7 @@
 }
 
 
-void Instrument::VisitException(Instruction* instr) {
+void Instrument::VisitException(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other");
@@ -360,7 +360,7 @@
 }
 
 
-void Instrument::InstrumentLoadStorePair(Instruction* instr) {
+void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
   static Counter* load_pair_counter = GetCounter("Load Pair");
   static Counter* store_pair_counter = GetCounter("Store Pair");
 
@@ -372,31 +372,31 @@
 }
 
 
-void Instrument::VisitLoadStorePairPostIndex(Instruction* instr) {
+void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
   Update();
   InstrumentLoadStorePair(instr);
 }
 
 
-void Instrument::VisitLoadStorePairOffset(Instruction* instr) {
+void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
   Update();
   InstrumentLoadStorePair(instr);
 }
 
 
-void Instrument::VisitLoadStorePairPreIndex(Instruction* instr) {
+void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
   Update();
   InstrumentLoadStorePair(instr);
 }
 
 
-void Instrument::VisitLoadStorePairNonTemporal(Instruction* instr) {
+void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   Update();
   InstrumentLoadStorePair(instr);
 }
 
 
-void Instrument::VisitLoadStoreExclusive(Instruction* instr) {
+void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other");
@@ -404,7 +404,7 @@
 }
 
 
-void Instrument::VisitLoadLiteral(Instruction* instr) {
+void Instrument::VisitLoadLiteral(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Load Literal");
@@ -412,7 +412,7 @@
 }
 
 
-void Instrument::InstrumentLoadStore(Instruction* instr) {
+void Instrument::InstrumentLoadStore(const Instruction* instr) {
   static Counter* load_int_counter = GetCounter("Load Integer");
   static Counter* store_int_counter = GetCounter("Store Integer");
   static Counter* load_fp_counter = GetCounter("Load FP");
@@ -440,38 +440,38 @@
 }
 
 
-void Instrument::VisitLoadStoreUnscaledOffset(Instruction* instr) {
+void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   Update();
   InstrumentLoadStore(instr);
 }
 
 
-void Instrument::VisitLoadStorePostIndex(Instruction* instr) {
+void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
   USE(instr);
   Update();
   InstrumentLoadStore(instr);
 }
 
 
-void Instrument::VisitLoadStorePreIndex(Instruction* instr) {
+void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
   Update();
   InstrumentLoadStore(instr);
 }
 
 
-void Instrument::VisitLoadStoreRegisterOffset(Instruction* instr) {
+void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   Update();
   InstrumentLoadStore(instr);
 }
 
 
-void Instrument::VisitLoadStoreUnsignedOffset(Instruction* instr) {
+void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   Update();
   InstrumentLoadStore(instr);
 }
 
 
-void Instrument::VisitLogicalShifted(Instruction* instr) {
+void Instrument::VisitLogicalShifted(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Logical DP");
@@ -479,7 +479,7 @@
 }
 
 
-void Instrument::VisitAddSubShifted(Instruction* instr) {
+void Instrument::VisitAddSubShifted(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Add/Sub DP");
@@ -487,7 +487,7 @@
 }
 
 
-void Instrument::VisitAddSubExtended(Instruction* instr) {
+void Instrument::VisitAddSubExtended(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Add/Sub DP");
@@ -495,7 +495,7 @@
 }
 
 
-void Instrument::VisitAddSubWithCarry(Instruction* instr) {
+void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Add/Sub DP");
@@ -503,7 +503,7 @@
 }
 
 
-void Instrument::VisitConditionalCompareRegister(Instruction* instr) {
+void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Conditional Compare");
@@ -511,7 +511,7 @@
 }
 
 
-void Instrument::VisitConditionalCompareImmediate(Instruction* instr) {
+void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Conditional Compare");
@@ -519,7 +519,7 @@
 }
 
 
-void Instrument::VisitConditionalSelect(Instruction* instr) {
+void Instrument::VisitConditionalSelect(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Conditional Select");
@@ -527,7 +527,7 @@
 }
 
 
-void Instrument::VisitDataProcessing1Source(Instruction* instr) {
+void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other Int DP");
@@ -535,7 +535,7 @@
 }
 
 
-void Instrument::VisitDataProcessing2Source(Instruction* instr) {
+void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other Int DP");
@@ -543,7 +543,7 @@
 }
 
 
-void Instrument::VisitDataProcessing3Source(Instruction* instr) {
+void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other Int DP");
@@ -551,7 +551,7 @@
 }
 
 
-void Instrument::VisitFPCompare(Instruction* instr) {
+void Instrument::VisitFPCompare(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -559,7 +559,7 @@
 }
 
 
-void Instrument::VisitFPConditionalCompare(Instruction* instr) {
+void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Conditional Compare");
@@ -567,7 +567,7 @@
 }
 
 
-void Instrument::VisitFPConditionalSelect(Instruction* instr) {
+void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Conditional Select");
@@ -575,7 +575,7 @@
 }
 
 
-void Instrument::VisitFPImmediate(Instruction* instr) {
+void Instrument::VisitFPImmediate(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -583,7 +583,7 @@
 }
 
 
-void Instrument::VisitFPDataProcessing1Source(Instruction* instr) {
+void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -591,7 +591,7 @@
 }
 
 
-void Instrument::VisitFPDataProcessing2Source(Instruction* instr) {
+void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -599,7 +599,7 @@
 }
 
 
-void Instrument::VisitFPDataProcessing3Source(Instruction* instr) {
+void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -607,7 +607,7 @@
 }
 
 
-void Instrument::VisitFPIntegerConvert(Instruction* instr) {
+void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -615,7 +615,7 @@
 }
 
 
-void Instrument::VisitFPFixedPointConvert(Instruction* instr) {
+void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("FP DP");
@@ -623,7 +623,7 @@
 }
 
 
-void Instrument::VisitUnallocated(Instruction* instr) {
+void Instrument::VisitUnallocated(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other");
@@ -631,7 +631,7 @@
 }
 
 
-void Instrument::VisitUnimplemented(Instruction* instr) {
+void Instrument::VisitUnimplemented(const Instruction* instr) {
   USE(instr);
   Update();
   static Counter* counter = GetCounter("Other");
diff --git a/src/a64/instrument-a64.h b/src/a64/instrument-a64.h
index ae975b6..6da6856 100644
--- a/src/a64/instrument-a64.h
+++ b/src/a64/instrument-a64.h
@@ -53,7 +53,7 @@
 
 class Counter {
  public:
-  Counter(const char* name, CounterType type = Gauge);
+  explicit Counter(const char* name, CounterType type = Gauge);
 
   void Increment();
   void Enable();
@@ -81,7 +81,7 @@
   void Disable();
 
   // Declare all Visitor functions.
-  #define DECLARE(A) void Visit##A(Instruction* instr);
+  #define DECLARE(A) void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
@@ -93,8 +93,8 @@
   void HandleInstrumentationEvent(unsigned event);
   Counter* GetCounter(const char* name);
 
-  void InstrumentLoadStore(Instruction* instr);
-  void InstrumentLoadStorePair(Instruction* instr);
+  void InstrumentLoadStore(const Instruction* instr);
+  void InstrumentLoadStorePair(const Instruction* instr);
 
   std::list<Counter*> counters_;
 
diff --git a/src/a64/macro-assembler-a64.cc b/src/a64/macro-assembler-a64.cc
index 6677529..dcf06c6 100644
--- a/src/a64/macro-assembler-a64.cc
+++ b/src/a64/macro-assembler-a64.cc
@@ -27,6 +27,176 @@
 #include "a64/macro-assembler-a64.h"
 namespace vixl {
 
+
+LiteralPool::LiteralPool(Assembler* assm)
+    : assm_(assm), first_use_(-1), monitor_(0) {
+}
+
+
+LiteralPool::~LiteralPool() {
+  VIXL_ASSERT(IsEmpty());
+  VIXL_ASSERT(!IsBlocked());
+}
+
+
+void LiteralPool::Reset() {
+  std::vector<RawLiteral*>::iterator it, end;
+  for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
+    delete *it;
+  }
+  entries_.clear();
+  first_use_ = -1;
+  monitor_ = 0;
+}
+
+
+size_t LiteralPool::Size() const {
+  size_t size = 0;
+  std::vector<RawLiteral*>::const_iterator it, end;
+  for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
+    size += (*it)->size();
+  }
+
+  // account for the pool header.
+  return size + kInstructionSize;
+}
+
+
+void LiteralPool::Release() {
+  if (--monitor_ == 0) {
+    // Has the literal pool been blocked for too long?
+    VIXL_ASSERT(assm_->CursorOffset() < MaxCursorOffset());
+  }
+}
+
+
+void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
+  if (IsEmpty() || IsBlocked()) return;
+
+  ptrdiff_t distance = assm_->CursorOffset() + amount - first_use_;
+  if (distance >= kRecommendedLiteralPoolRange) {
+    Emit(option);
+  }
+}
+
+
+void LiteralPool::Emit(EmitOption option) {
+  // There is an issue if we are asked to emit a blocked or empty pool.
+  VIXL_ASSERT(!IsBlocked());
+  VIXL_ASSERT(!IsEmpty());
+
+  size_t pool_size = Size();
+  size_t emit_size = pool_size;
+  if (option == kBranchRequired) emit_size += kInstructionSize;
+  Label end_of_pool;
+
+  CodeBufferCheckScope guard(assm_,
+                             emit_size,
+                             CodeBufferCheckScope::kCheck,
+                             CodeBufferCheckScope::kExactSize);
+  if (option == kBranchRequired) assm_->b(&end_of_pool);
+
+  // Marker indicating the size of the literal pool in 32-bit words.
+  VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
+  assm_->ldr(xzr, pool_size / kWRegSizeInBytes);
+
+  // Now populate the literal pool.
+  std::vector<RawLiteral*>::iterator it, end;
+  for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
+    VIXL_ASSERT((*it)->IsUsed());
+    assm_->place(*it);
+    delete *it;
+  }
+
+  if (option == kBranchRequired) assm_->bind(&end_of_pool);
+
+  entries_.clear();
+  first_use_ = -1;
+}
+
+
+ptrdiff_t LiteralPool::NextCheckOffset() {
+  if (IsEmpty()) {
+    return assm_->CursorOffset() + kRecommendedLiteralPoolRange;
+  }
+
+  VIXL_ASSERT(
+      ((assm_->CursorOffset() - first_use_) < kRecommendedLiteralPoolRange) ||
+       IsBlocked());
+
+  return first_use_ + kRecommendedLiteralPoolRange;
+}
+
+
+EmissionCheckScope::EmissionCheckScope(MacroAssembler* masm, size_t size) {
+  masm->EnsureEmitFor(size);
+#ifdef DEBUG
+  masm_ = masm;
+  masm->Bind(&start_);
+  size_ = size;
+  masm->AcquireBuffer();
+#endif
+}
+
+
+EmissionCheckScope::~EmissionCheckScope() {
+#ifdef DEBUG
+  masm_->ReleaseBuffer();
+  VIXL_ASSERT(masm_->SizeOfCodeGeneratedSince(&start_) <= size_);
+#endif
+}
+
+
+MacroAssembler::MacroAssembler(size_t capacity,
+                               PositionIndependentCodeOption pic)
+    : Assembler(capacity, pic),
+#ifdef DEBUG
+      allow_macro_instructions_(true),
+#endif
+      sp_(sp),
+      tmp_list_(ip0, ip1),
+      fptmp_list_(d31),
+      literal_pool_(this) {
+  checkpoint_ = NextCheckOffset();
+}
+
+
+MacroAssembler::MacroAssembler(byte * buffer,
+                               size_t capacity,
+                               PositionIndependentCodeOption pic)
+    : Assembler(buffer, capacity, pic),
+#ifdef DEBUG
+      allow_macro_instructions_(true),
+#endif
+      sp_(sp),
+      tmp_list_(ip0, ip1),
+      fptmp_list_(d31),
+      literal_pool_(this) {
+  checkpoint_ = NextCheckOffset();
+}
+
+
+MacroAssembler::~MacroAssembler() {
+}
+
+
+void MacroAssembler::Reset() {
+  Assembler::Reset();
+
+  VIXL_ASSERT(!literal_pool_.IsBlocked());
+  literal_pool_.Reset();
+
+  checkpoint_ = NextCheckOffset();
+}
+
+
+void MacroAssembler::FinalizeCode() {
+  if (!literal_pool_.IsEmpty()) literal_pool_.Emit();
+
+  Assembler::FinalizeCode();
+}
+
+
 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
@@ -121,6 +291,11 @@
                                   const Register& rn,
                                   const Operand& operand,
                                   LogicalOp op) {
+  // The worst case for size is logical immediate to sp:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to do the operation
+  //  * 1 instruction to move to sp
+  MacroEmissionCheckScope guard(this);
   UseScratchRegisterScope temps(this);
 
   if (operand.IsImmediate()) {
@@ -222,6 +397,9 @@
                          const Operand& operand,
                          DiscardMoveMode discard_mode) {
   VIXL_ASSERT(allow_macro_instructions_);
+  // The worst case for size is mov immediate with up to 4 instructions.
+  MacroEmissionCheckScope guard(this);
+
   if (operand.IsImmediate()) {
     // Call the macro assembler for generic immediates.
     Mov(rd, operand.immediate());
@@ -255,6 +433,9 @@
 
 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
+  // The worst case for size is mvn immediate with up to 4 instructions.
+  MacroEmissionCheckScope guard(this);
+
   if (operand.IsImmediate()) {
     // Call the macro assembler for generic immediates.
     Mvn(rd, operand.immediate());
@@ -279,6 +460,10 @@
 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
   VIXL_ASSERT(allow_macro_instructions_);
   VIXL_ASSERT(is_uint32(imm) || is_int32(imm) || rd.Is64Bits());
+  // The worst case for size is mov 64-bit immediate to sp:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to move to sp
+  MacroEmissionCheckScope guard(this);
 
   // Immediates on Aarch64 can be produced using an initial value, and zero to
   // three move keep operations.
@@ -412,6 +597,11 @@
                                              Condition cond,
                                              ConditionalCompareOp op) {
   VIXL_ASSERT((cond != al) && (cond != nv));
+  // The worst case for size is ccmp immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for ccmp
+  MacroEmissionCheckScope guard(this);
+
   if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) ||
       (operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
     // The immediate can be encoded in the instruction, or the operand is an
@@ -436,6 +626,11 @@
   VIXL_ASSERT(!rd.IsZero());
   VIXL_ASSERT(!rn.IsZero());
   VIXL_ASSERT((cond != al) && (cond != nv));
+  // The worst case for size is csel immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for csel
+  MacroEmissionCheckScope guard(this);
+
   if (operand.IsImmediate()) {
     // Immediate argument. Handle special cases of 0, 1 and -1 using zero
     // register.
@@ -532,6 +727,10 @@
 
 void MacroAssembler::Fcmp(const FPRegister& fn, double value) {
   VIXL_ASSERT(allow_macro_instructions_);
+  // The worst case for size is:
+  //  * 1 to materialise the constant, using literal pool if necessary
+  //  * 1 instruction for fcmp
+  MacroEmissionCheckScope guard(this);
   if (value != 0.0) {
     UseScratchRegisterScope temps(this);
     FPRegister tmp = temps.AcquireSameSizeAs(fn);
@@ -545,6 +744,9 @@
 
 void MacroAssembler::Fmov(FPRegister fd, double imm) {
   VIXL_ASSERT(allow_macro_instructions_);
+  // Floating point immediates are loaded through the literal pool.
+  MacroEmissionCheckScope guard(this);
+
   if (fd.Is32Bits()) {
     Fmov(fd, static_cast<float>(imm));
     return;
@@ -556,13 +758,17 @@
   } else if ((imm == 0.0) && (copysign(1.0, imm) == 1.0)) {
     fmov(fd, xzr);
   } else {
-    ldr(fd, imm);
+    RawLiteral* literal = literal_pool_.Add(imm);
+    ldr(fd, literal);
   }
 }
 
 
 void MacroAssembler::Fmov(FPRegister fd, float imm) {
   VIXL_ASSERT(allow_macro_instructions_);
+  // Floating point immediates are loaded through the literal pool.
+  MacroEmissionCheckScope guard(this);
+
   if (fd.Is64Bits()) {
     Fmov(fd, static_cast<double>(imm));
     return;
@@ -574,7 +780,8 @@
   } else if ((imm == 0.0) && (copysign(1.0, imm) == 1.0)) {
     fmov(fd, wzr);
   } else {
-    ldr(fd, imm);
+    RawLiteral* literal = literal_pool_.Add(imm);
+    ldr(fd, literal);
   }
 }
 
@@ -661,6 +868,11 @@
                                  const Operand& operand,
                                  FlagsUpdate S,
                                  AddSubOp op) {
+  // Worst case is add/sub immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for add/sub
+  MacroEmissionCheckScope guard(this);
+
   if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
       (S == LeaveFlags)) {
     // The instruction would be a nop. Avoid generating useless code.
@@ -740,6 +952,10 @@
                                           FlagsUpdate S,
                                           AddSubWithCarryOp op) {
   VIXL_ASSERT(rd.size() == rn.size());
+  // Worst case is addc/subc immediate:
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction for add/sub
+  MacroEmissionCheckScope guard(this);
   UseScratchRegisterScope temps(this);
 
   if (operand.IsImmediate() ||
@@ -780,6 +996,7 @@
 
 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                         \
 void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) {  \
+  VIXL_ASSERT(allow_macro_instructions_);                             \
   LoadStoreMacro(REG, addr, OP);                                      \
 }
 LS_MACRO_LIST(DEFINE_FUNCTION)
@@ -788,6 +1005,12 @@
 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
                                     const MemOperand& addr,
                                     LoadStoreOp op) {
+  // Worst case is ldr/str pre/post index:
+  //  * 1 instruction for ldr/str
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to update the base
+  MacroEmissionCheckScope guard(this);
+
   int64_t offset = addr.offset();
   LSDataSize size = CalcLSDataSize(op);
 
@@ -817,6 +1040,54 @@
 }
 
 
+#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP)  \
+void MacroAssembler::FN(const REGTYPE REG,           \
+                        const REGTYPE REG2,          \
+                        const MemOperand& addr) {    \
+  VIXL_ASSERT(allow_macro_instructions_);            \
+  LoadStorePairMacro(REG, REG2, addr, OP);           \
+}
+LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
+#undef DEFINE_FUNCTION
+
+void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
+                                        const CPURegister& rt2,
+                                        const MemOperand& addr,
+                                        LoadStorePairOp op) {
+  // TODO(all): Should we support register offset for load-store-pair?
+  VIXL_ASSERT(!addr.IsRegisterOffset());
+  // Worst case is ldp/stp immediate:
+  //  * 1 instruction for ldp/stp
+  //  * up to 4 instructions to materialise the constant
+  //  * 1 instruction to update the base
+  MacroEmissionCheckScope guard(this);
+
+  int64_t offset = addr.offset();
+  LSDataSize size = CalcLSPairDataSize(op);
+
+  // Check if the offset fits in the immediate field of the appropriate
+  // instruction. If not, emit two instructions to perform the operation.
+  if (IsImmLSPair(offset, size)) {
+    // Encodable in one load/store pair instruction.
+    LoadStorePair(rt, rt2, addr, op);
+  } else {
+    Register base = addr.base();
+    if (addr.IsImmediateOffset()) {
+      UseScratchRegisterScope temps(this);
+      Register temp = temps.AcquireSameSizeAs(base);
+      Add(temp, base, offset);
+      LoadStorePair(rt, rt2, MemOperand(temp), op);
+    } else if (addr.IsPostIndex()) {
+      LoadStorePair(rt, rt2, MemOperand(base), op);
+      Add(base, base, offset);
+    } else {
+      VIXL_ASSERT(addr.IsPreIndex());
+      Add(base, base, offset);
+      LoadStorePair(rt, rt2, MemOperand(base), op);
+    }
+  }
+}
+
 void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
                           const CPURegister& src2, const CPURegister& src3) {
   VIXL_ASSERT(allow_macro_instructions_);
@@ -918,7 +1189,9 @@
                                 const CPURegister& src2,
                                 const CPURegister& src3) {
   // Ensure that we don't unintentionally modify scratch or debug registers.
-  InstructionAccurateScope scope(this);
+  // Worst case for size is 2 stp.
+  InstructionAccurateScope scope(this, 2,
+                                 InstructionAccurateScope::kMaximumSize);
 
   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   VIXL_ASSERT(size == src0.SizeInBytes());
@@ -958,7 +1231,9 @@
                                const CPURegister& dst2,
                                const CPURegister& dst3) {
   // Ensure that we don't unintentionally modify scratch or debug registers.
-  InstructionAccurateScope scope(this);
+  // Worst case for size is 2 ldp.
+  InstructionAccurateScope scope(this, 2,
+                                 InstructionAccurateScope::kMaximumSize);
 
   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   VIXL_ASSERT(size == dst0.SizeInBytes());
@@ -1039,6 +1314,42 @@
 }
 
 
+void MacroAssembler::PeekCPURegList(CPURegList registers, int offset) {
+  VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
+  VIXL_ASSERT(offset >= 0);
+  int size = registers.RegisterSizeInBytes();
+
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
+    offset += 2 * size;
+  }
+  if (!registers.IsEmpty()) {
+    Ldr(registers.PopLowestIndex(),
+        MemOperand(StackPointer(), offset));
+  }
+}
+
+
+void MacroAssembler::PokeCPURegList(CPURegList registers, int offset) {
+  VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
+  VIXL_ASSERT(offset >= 0);
+  int size = registers.RegisterSizeInBytes();
+
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    Stp(dst0, dst1, MemOperand(StackPointer(), offset));
+    offset += 2 * size;
+  }
+  if (!registers.IsEmpty()) {
+    Str(registers.PopLowestIndex(),
+        MemOperand(StackPointer(), offset));
+  }
+}
+
+
 void MacroAssembler::Claim(const Operand& size) {
   VIXL_ASSERT(allow_macro_instructions_);
 
@@ -1081,7 +1392,9 @@
 
 void MacroAssembler::PushCalleeSavedRegisters() {
   // Ensure that the macro-assembler doesn't use any scratch registers.
-  InstructionAccurateScope scope(this);
+  // 10 stp will be emitted.
+  // TODO(all): Should we use GetCalleeSaved and SavedFP.
+  InstructionAccurateScope scope(this, 10);
 
   // This method must not be called unless the current stack pointer is sp.
   VIXL_ASSERT(sp.Is(StackPointer()));
@@ -1104,7 +1417,9 @@
 
 void MacroAssembler::PopCalleeSavedRegisters() {
   // Ensure that the macro-assembler doesn't use any scratch registers.
-  InstructionAccurateScope scope(this);
+  // 10 ldp will be emitted.
+  // TODO(all): Should we use GetCalleeSaved and SavedFP.
+  InstructionAccurateScope scope(this, 10);
 
   // This method must not be called unless the current stack pointer is sp.
   VIXL_ASSERT(sp.Is(StackPointer()));
@@ -1129,7 +1444,7 @@
   // TODO: Several callers rely on this not using scratch registers, so we use
   // the assembler directly here. However, this means that large immediate
   // values of 'space' cannot be handled.
-  InstructionAccurateScope scope(this);
+  InstructionAccurateScope scope(this, 1);
   sub(sp, StackPointer(), space);
 }
 
@@ -1239,11 +1554,20 @@
   Adr(x0, &format_address);
 
   // Emit the format string directly in the instruction stream.
-  { BlockLiteralPoolScope scope(this);
+  {
+    BlockLiteralPoolScope scope(this);
+    // Data emitted:
+    //   branch
+    //   strlen(format) + 1 (includes null termination)
+    //   padding to next instruction
+    //   unreachable
+    EmissionCheckScope guard(
+        this,
+        AlignUp(strlen(format) + 1, kInstructionSize) + 2 * kInstructionSize);
     Label after_data;
     B(&after_data);
     Bind(&format_address);
-    EmitStringData(format);
+    EmitString(format);
     Unreachable();
     Bind(&after_data);
   }
@@ -1258,7 +1582,8 @@
   // since the system printf function will use a different instruction set and
   // the procedure-call standard will not be compatible.
 #ifdef USE_SIMULATOR
-  { InstructionAccurateScope scope(this, kPrintfLength / kInstructionSize);
+  {
+    InstructionAccurateScope scope(this, kPrintfLength / kInstructionSize);
     hlt(kPrintfOpcode);
     dc32(arg_count);          // kPrintfArgCountOffset
 
diff --git a/src/a64/macro-assembler-a64.h b/src/a64/macro-assembler-a64.h
index c54ae27..adce79d 100644
--- a/src/a64/macro-assembler-a64.h
+++ b/src/a64/macro-assembler-a64.h
@@ -27,6 +27,9 @@
 #ifndef VIXL_A64_MACRO_ASSEMBLER_A64_H_
 #define VIXL_A64_MACRO_ASSEMBLER_A64_H_
 
+#include <algorithm>
+#include <limits>
+
 #include "globals-vixl.h"
 #include "a64/assembler-a64.h"
 #include "a64/debugger-a64.h"
@@ -43,8 +46,107 @@
   V(Str, CPURegister&, rt, StoreOpFor(rt))                    \
   V(Ldrsw, Register&, rt, LDRSW_x)
 
+
+#define LSPAIR_MACRO_LIST(V)                              \
+  V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2))   \
+  V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2))  \
+  V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x)
+
 namespace vixl {
 
+class LiteralPool {
+ public:
+  // Recommended not exact since the pool can be blocked for short periods.
+  static const ptrdiff_t kRecommendedLiteralPoolRange = 128 * KBytes;
+  enum EmitOption {
+    kBranchRequired,
+    kNoBranchRequired
+  };
+
+  explicit LiteralPool(Assembler *masm);
+  ~LiteralPool();
+  void Reset();
+
+  template <typename T>
+  RawLiteral* Add(T imm) {
+    if (IsEmpty()) {
+      first_use_ = assm_->CursorOffset();
+    } else {
+      VIXL_ASSERT(assm_->CursorOffset() > first_use_);
+    }
+
+    RawLiteral* literal = new Literal<T>(imm);
+    entries_.push_back(literal);
+
+    return literal;
+  }
+  bool IsEmpty() const { return entries_.empty(); }
+  size_t Size() const ;
+
+  void Block() { monitor_++; }
+  void Release();
+  bool IsBlocked() const { return monitor_ != 0; }
+
+  ptrdiff_t MaxCursorOffset() const {
+    if (IsEmpty()) return std::numeric_limits<ptrdiff_t>::max();
+    return first_use_ + kMaxLoadLiteralRange;
+  }
+
+  void CheckEmitFor(size_t amount, EmitOption option = kBranchRequired);
+  void Emit(EmitOption option = kNoBranchRequired);
+  ptrdiff_t NextCheckOffset();
+
+ private:
+  Assembler* assm_;
+  std::vector<RawLiteral*> entries_;
+  ptrdiff_t first_use_;
+  int monitor_;
+};
+
+
+// Forward declaration
+class MacroAssembler;
+
+// This scope has the following purposes:
+//  * Acquire/Release the underlying assembler's code buffer.
+//     * This is mandatory before emitting.
+//  * Emit the literal pool if necessary before emitting the macro-instruction.
+//  * Ensure there is enough space to emit the macro-instruction.
+class EmissionCheckScope {
+ public:
+  EmissionCheckScope(MacroAssembler* masm, size_t size);
+  ~EmissionCheckScope();
+
+ protected:
+#ifdef DEBUG
+  MacroAssembler* masm_;
+  Label start_;
+  size_t size_;
+#endif
+};
+
+
+// Helper for common Emission checks.
+// The macro-instruction maps to a single instruction.
+class SingleEmissionCheckScope : public EmissionCheckScope {
+ public:
+  explicit SingleEmissionCheckScope(MacroAssembler* masm)
+      : EmissionCheckScope(masm, kInstructionSize) {}
+};
+
+
+// The macro instruction is a "typical" macro-instruction. Typical macro-
+// instruction only emit a few instructions, a few being defined as 8 here.
+class MacroEmissionCheckScope : public EmissionCheckScope {
+ public:
+  explicit MacroEmissionCheckScope(MacroAssembler* masm)
+      : EmissionCheckScope(masm, kTypicalMacroInstructionMaxSize) {}
+
+ private:
+  static const size_t kTypicalMacroInstructionMaxSize = 8 * kInstructionSize;
+};
+
+
 enum BranchType {
   // Copies of architectural conditions.
   // The associated conditions can be used in place of those, the code will
@@ -86,15 +188,25 @@
 
 enum DiscardMoveMode { kDontDiscardForSameWReg, kDiscardForSameWReg };
 
+
 class MacroAssembler : public Assembler {
  public:
-  MacroAssembler(byte * buffer, unsigned buffer_size,
-                 PositionIndependentCodeOption pic = PositionIndependentCode)
-      : Assembler(buffer, buffer_size, pic),
-#ifdef DEBUG
-        allow_macro_instructions_(true),
-#endif
-        sp_(sp), tmp_list_(ip0, ip1), fptmp_list_(d31) {}
+  MacroAssembler(size_t capacity,
+                 PositionIndependentCodeOption pic = PositionIndependentCode);
+  MacroAssembler(byte * buffer, size_t capacity,
+                 PositionIndependentCodeOption pic = PositionIndependentCode);
+  ~MacroAssembler();
+
+  // Start generating code from the beginning of the buffer, discarding any code
+  // and data that has already been emitted into the buffer.
+  //
+  // In order to avoid any accidental transfer of state, Reset ASSERTs that the
+  // constant pool is not blocked.
+  void Reset();
+
+  // Finalize a code buffer of generated instructions. This function must be
+  // called before executing or copying code from the buffer.
+  void FinalizeCode();
 
   // Logical macros.
   void And(const Register& rd,
@@ -230,6 +342,16 @@
                       const MemOperand& addr,
                       LoadStoreOp op);
 
+#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
+  void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr);
+  LSPAIR_MACRO_LIST(DECLARE_FUNCTION)
+#undef DECLARE_FUNCTION
+
+  void LoadStorePairMacro(const CPURegister& rt,
+                          const CPURegister& rt2,
+                          const MemOperand& addr,
+                          LoadStorePairOp op);
+
   // Push or pop up to 4 registers of the same width to or from the stack,
   // using the current stack pointer as set by SetStackPointer.
   //
@@ -322,6 +444,52 @@
   // must be aligned to 16 bytes.
   void Peek(const Register& dst, const Operand& offset);
 
+  // Alternative forms of Peek and Poke, taking a RegList or CPURegList that
+  // specifies the registers that are to be pushed or popped. Higher-numbered
+  // registers are associated with higher memory addresses.
+  //
+  // (Peek|Poke)SizeRegList allow you to specify the register size as a
+  // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are
+  // supported.
+  //
+  // Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred.
+  void PeekCPURegList(CPURegList registers, int offset);
+  void PokeCPURegList(CPURegList registers, int offset);
+
+  void PeekSizeRegList(RegList registers, int offset, unsigned reg_size,
+      CPURegister::RegisterType type = CPURegister::kRegister) {
+    PeekCPURegList(CPURegList(type, reg_size, registers), offset);
+  }
+  void PokeSizeRegList(RegList registers, int offset, unsigned reg_size,
+      CPURegister::RegisterType type = CPURegister::kRegister) {
+    PokeCPURegList(CPURegList(type, reg_size, registers), offset);
+  }
+  void PeekXRegList(RegList regs, int offset) {
+    PeekSizeRegList(regs, offset, kXRegSize);
+  }
+  void PokeXRegList(RegList regs, int offset) {
+    PokeSizeRegList(regs, offset, kXRegSize);
+  }
+  void PeekWRegList(RegList regs, int offset) {
+    PeekSizeRegList(regs, offset, kWRegSize);
+  }
+  void PokeWRegList(RegList regs, int offset) {
+    PokeSizeRegList(regs, offset, kWRegSize);
+  }
+  inline void PeekDRegList(RegList regs, int offset) {
+    PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kFPRegister);
+  }
+  inline void PokeDRegList(RegList regs, int offset) {
+    PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kFPRegister);
+  }
+  inline void PeekSRegList(RegList regs, int offset) {
+    PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kFPRegister);
+  }
+  inline void PokeSRegList(RegList regs, int offset) {
+    PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kFPRegister);
+  }
+
+
   // Claim or drop stack space without actually accessing memory.
   //
   // If the current stack pointer (as set by SetStackPointer) is sp, then it
@@ -356,17 +524,20 @@
   void Adr(const Register& rd, Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     adr(rd, label);
   }
   void Adrp(const Register& rd, Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     adrp(rd, label);
   }
   void Asr(const Register& rd, const Register& rn, unsigned shift) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     asr(rd, rn, shift);
   }
   void Asr(const Register& rd, const Register& rn, const Register& rm) {
@@ -374,6 +545,7 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     asrv(rd, rn, rm);
   }
 
@@ -394,11 +566,13 @@
   void B(Label* label, BranchType type, Register reg = NoReg, int bit = -1);
 
   void B(Label* label) {
+    SingleEmissionCheckScope guard(this);
     b(label);
   }
   void B(Label* label, Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
     b(label, cond);
   }
   void B(Condition cond, Label* label) {
@@ -411,6 +585,7 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     bfi(rd, rn, lsb, width);
   }
   void Bfxil(const Register& rd,
@@ -420,82 +595,102 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     bfxil(rd, rn, lsb, width);
   }
   void Bind(Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     bind(label);
   }
+  // Bind a label to a specified offset from the start of the buffer.
+  void BindToOffset(Label* label, ptrdiff_t offset) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Assembler::BindToOffset(label, offset);
+  }
   void Bl(Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     bl(label);
   }
   void Blr(const Register& xn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!xn.IsZero());
+    SingleEmissionCheckScope guard(this);
     blr(xn);
   }
   void Br(const Register& xn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!xn.IsZero());
+    SingleEmissionCheckScope guard(this);
     br(xn);
   }
   void Brk(int code = 0) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     brk(code);
   }
   void Cbnz(const Register& rt, Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
     cbnz(rt, label);
   }
   void Cbz(const Register& rt, Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
     cbz(rt, label);
   }
   void Cinc(const Register& rd, const Register& rn, Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     cinc(rd, rn, cond);
   }
   void Cinv(const Register& rd, const Register& rn, Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     cinv(rd, rn, cond);
   }
   void Clrex() {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     clrex();
   }
   void Cls(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     cls(rd, rn);
   }
   void Clz(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     clz(rd, rn);
   }
   void Cneg(const Register& rd, const Register& rn, Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     cneg(rd, rn, cond);
   }
   void Cset(const Register& rd, Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     cset(rd, cond);
   }
   void Csetm(const Register& rd, Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     csetm(rd, cond);
   }
   void Csinc(const Register& rd,
@@ -507,6 +702,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
     csinc(rd, rn, rm, cond);
   }
   void Csinv(const Register& rd,
@@ -518,6 +714,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
     csinv(rd, rn, rm, cond);
   }
   void Csneg(const Register& rd,
@@ -529,14 +726,17 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
     csneg(rd, rn, rm, cond);
   }
   void Dmb(BarrierDomain domain, BarrierType type) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     dmb(domain, type);
   }
   void Dsb(BarrierDomain domain, BarrierType type) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     dsb(domain, type);
   }
   void Extr(const Register& rd,
@@ -547,14 +747,17 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     extr(rd, rn, rm, lsb);
   }
   void Fabs(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fabs(fd, fn);
   }
   void Fadd(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fadd(fd, fn, fm);
   }
   void Fccmp(const FPRegister& fn,
@@ -563,10 +766,12 @@
              Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
     fccmp(fn, fm, nzcv, cond);
   }
   void Fcmp(const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fcmp(fn, fm);
   }
   void Fcmp(const FPRegister& fn, double value);
@@ -576,78 +781,94 @@
              Condition cond) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT((cond != al) && (cond != nv));
+    SingleEmissionCheckScope guard(this);
     fcsel(fd, fn, fm, cond);
   }
   void Fcvt(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fcvt(fd, fn);
   }
   void Fcvtas(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtas(rd, fn);
   }
   void Fcvtau(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtau(rd, fn);
   }
   void Fcvtms(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtms(rd, fn);
   }
   void Fcvtmu(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtmu(rd, fn);
   }
   void Fcvtns(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtns(rd, fn);
   }
   void Fcvtnu(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtnu(rd, fn);
   }
   void Fcvtzs(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtzs(rd, fn);
   }
   void Fcvtzu(const Register& rd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fcvtzu(rd, fn);
   }
   void Fdiv(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fdiv(fd, fn, fm);
   }
   void Fmax(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fmax(fd, fn, fm);
   }
   void Fmaxnm(const FPRegister& fd,
               const FPRegister& fn,
               const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fmaxnm(fd, fn, fm);
   }
   void Fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fmin(fd, fn, fm);
   }
   void Fminnm(const FPRegister& fd,
               const FPRegister& fn,
               const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fminnm(fd, fn, fm);
   }
   void Fmov(FPRegister fd, FPRegister fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     // Only emit an instruction if fd and fn are different, and they are both D
     // registers. fmov(s0, s0) is not a no-op because it clears the top word of
     // d0. Technically, fmov(d0, d0) is not a no-op either because it clears
@@ -659,6 +880,7 @@
   void Fmov(FPRegister fd, Register rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     fmov(fd, rn);
   }
   // Provide explicit double and float interfaces for FP immediate moves, rather
@@ -676,10 +898,12 @@
   void Fmov(Register rd, FPRegister fn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     fmov(rd, fn);
   }
   void Fmul(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fmul(fd, fn, fm);
   }
   void Fmadd(const FPRegister& fd,
@@ -687,6 +911,7 @@
              const FPRegister& fm,
              const FPRegister& fa) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fmadd(fd, fn, fm, fa);
   }
   void Fmsub(const FPRegister& fd,
@@ -694,6 +919,7 @@
              const FPRegister& fm,
              const FPRegister& fa) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fmsub(fd, fn, fm, fa);
   }
   void Fnmadd(const FPRegister& fd,
@@ -701,6 +927,7 @@
               const FPRegister& fm,
               const FPRegister& fa) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fnmadd(fd, fn, fm, fa);
   }
   void Fnmsub(const FPRegister& fd,
@@ -708,139 +935,175 @@
               const FPRegister& fm,
               const FPRegister& fa) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fnmsub(fd, fn, fm, fa);
   }
   void Fneg(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fneg(fd, fn);
   }
   void Frinta(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     frinta(fd, fn);
   }
   void Frintm(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     frintm(fd, fn);
   }
   void Frintn(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     frintn(fd, fn);
   }
   void Frintz(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     frintz(fd, fn);
   }
   void Fsqrt(const FPRegister& fd, const FPRegister& fn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fsqrt(fd, fn);
   }
   void Fsub(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     fsub(fd, fn, fm);
   }
   void Hint(SystemHint code) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     hint(code);
   }
   void Hlt(int code) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     hlt(code);
   }
   void Isb() {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     isb();
   }
   void Ldar(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldar(rt, src);
   }
   void Ldarb(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldarb(rt, src);
   }
   void Ldarh(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldarh(rt, src);
   }
   void Ldaxp(const Register& rt, const Register& rt2, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
     ldaxp(rt, rt2, src);
   }
   void Ldaxr(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldaxr(rt, src);
   }
   void Ldaxrb(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldaxrb(rt, src);
   }
   void Ldaxrh(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldaxrh(rt, src);
   }
   void Ldnp(const CPURegister& rt,
             const CPURegister& rt2,
             const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldnp(rt, rt2, src);
   }
-  void Ldp(const CPURegister& rt,
-           const CPURegister& rt2,
-           const MemOperand& src) {
-    VIXL_ASSERT(allow_macro_instructions_);
-    ldp(rt, rt2, src);
-  }
-  void Ldpsw(const Register& rt, const Register& rt2, const MemOperand& src) {
-    VIXL_ASSERT(allow_macro_instructions_);
-    ldpsw(rt, rt2, src);
-  }
   // Provide both double and float interfaces for FP immediate loads, rather
   // than relying on implicit C++ casts. This allows signalling NaNs to be
   // preserved when the immediate matches the format of fd. Most systems convert
   // signalling NaNs to quiet NaNs when converting between float and double.
   void Ldr(const FPRegister& ft, double imm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    RawLiteral* literal;
     if (ft.Is64Bits()) {
-      ldr(ft, imm);
+      literal = literal_pool_.Add(imm);
     } else {
-      ldr(ft, static_cast<float>(imm));
+      literal = literal_pool_.Add(static_cast<float>(imm));
     }
+    ldr(ft, literal);
   }
   void Ldr(const FPRegister& ft, float imm) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    RawLiteral* literal;
     if (ft.Is32Bits()) {
-      ldr(ft, imm);
+      literal = literal_pool_.Add(imm);
     } else {
-      ldr(ft, static_cast<double>(imm));
+      literal = literal_pool_.Add(static_cast<double>(imm));
     }
+    ldr(ft, literal);
   }
   void Ldr(const Register& rt, uint64_t imm) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
-    ldr(rt, imm);
+    SingleEmissionCheckScope guard(this);
+    RawLiteral* literal;
+    if (rt.Is64Bits()) {
+      literal = literal_pool_.Add(imm);
+    } else {
+      VIXL_ASSERT(rt.Is32Bits());
+      VIXL_ASSERT(is_uint32(imm) || is_int32(imm));
+      literal = literal_pool_.Add(static_cast<uint32_t>(imm));
+    }
+    ldr(rt, literal);
+  }
+  void Ldrsw(const Register& rt, uint32_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
+    RawLiteral* literal = literal_pool_.Add(imm);
+    ldrsw(rt, literal);
   }
   void Ldxp(const Register& rt, const Register& rt2, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
     ldxp(rt, rt2, src);
   }
   void Ldxr(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldxr(rt, src);
   }
   void Ldxrb(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldxrb(rt, src);
   }
   void Ldxrh(const Register& rt, const MemOperand& src) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     ldxrh(rt, src);
   }
   void Lsl(const Register& rd, const Register& rn, unsigned shift) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     lsl(rd, rn, shift);
   }
   void Lsl(const Register& rd, const Register& rn, const Register& rm) {
@@ -848,12 +1111,14 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     lslv(rd, rn, rm);
   }
   void Lsr(const Register& rd, const Register& rn, unsigned shift) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     lsr(rd, rn, shift);
   }
   void Lsr(const Register& rd, const Register& rn, const Register& rm) {
@@ -861,6 +1126,7 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     lsrv(rd, rn, rm);
   }
   void Madd(const Register& rd,
@@ -872,6 +1138,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
     madd(rd, rn, rm, ra);
   }
   void Mneg(const Register& rd, const Register& rn, const Register& rm) {
@@ -879,25 +1146,30 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     mneg(rd, rn, rm);
   }
   void Mov(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     mov(rd, rn);
   }
   void Movk(const Register& rd, uint64_t imm, int shift = -1) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
+    SingleEmissionCheckScope guard(this);
     movk(rd, imm, shift);
   }
   void Mrs(const Register& rt, SystemRegister sysreg) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
     mrs(rt, sysreg);
   }
   void Msr(SystemRegister sysreg, const Register& rt) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
     msr(sysreg, rt);
   }
   void Msub(const Register& rd,
@@ -909,6 +1181,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
     msub(rd, rn, rm, ra);
   }
   void Mul(const Register& rd, const Register& rn, const Register& rm) {
@@ -916,45 +1189,53 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     mul(rd, rn, rm);
   }
   void Nop() {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     nop();
   }
   void Rbit(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     rbit(rd, rn);
   }
   void Ret(const Register& xn = lr) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!xn.IsZero());
+    SingleEmissionCheckScope guard(this);
     ret(xn);
   }
   void Rev(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     rev(rd, rn);
   }
   void Rev16(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     rev16(rd, rn);
   }
   void Rev32(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     rev32(rd, rn);
   }
   void Ror(const Register& rd, const Register& rs, unsigned shift) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rs.IsZero());
+    SingleEmissionCheckScope guard(this);
     ror(rd, rs, shift);
   }
   void Ror(const Register& rd, const Register& rn, const Register& rm) {
@@ -962,6 +1243,7 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     rorv(rd, rn, rm);
   }
   void Sbfiz(const Register& rd,
@@ -971,6 +1253,7 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     sbfiz(rd, rn, lsb, width);
   }
   void Sbfx(const Register& rd,
@@ -980,11 +1263,13 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     sbfx(rd, rn, lsb, width);
   }
   void Scvtf(const FPRegister& fd, const Register& rn, unsigned fbits = 0) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     scvtf(fd, rn, fbits);
   }
   void Sdiv(const Register& rd, const Register& rn, const Register& rm) {
@@ -992,6 +1277,7 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     sdiv(rd, rn, rm);
   }
   void Smaddl(const Register& rd,
@@ -1003,6 +1289,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
     smaddl(rd, rn, rm, ra);
   }
   void Smsubl(const Register& rd,
@@ -1014,6 +1301,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
     smsubl(rd, rn, rm, ra);
   }
   void Smull(const Register& rd, const Register& rn, const Register& rm) {
@@ -1021,6 +1309,7 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     smull(rd, rn, rm);
   }
   void Smulh(const Register& xd, const Register& xn, const Register& xm) {
@@ -1028,18 +1317,22 @@
     VIXL_ASSERT(!xd.IsZero());
     VIXL_ASSERT(!xn.IsZero());
     VIXL_ASSERT(!xm.IsZero());
+    SingleEmissionCheckScope guard(this);
     smulh(xd, xn, xm);
   }
   void Stlr(const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     stlr(rt, dst);
   }
   void Stlrb(const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     stlrb(rt, dst);
   }
   void Stlrh(const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     stlrh(rt, dst);
   }
   void Stlxp(const Register& rs,
@@ -1050,38 +1343,37 @@
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
     VIXL_ASSERT(!rs.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
     stlxp(rs, rt, rt2, dst);
   }
   void Stlxr(const Register& rs, const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
     stlxr(rs, rt, dst);
   }
   void Stlxrb(const Register& rs, const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
     stlxrb(rs, rt, dst);
   }
   void Stlxrh(const Register& rs, const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
     stlxrh(rs, rt, dst);
   }
   void Stnp(const CPURegister& rt,
             const CPURegister& rt2,
             const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
     stnp(rt, rt2, dst);
   }
-  void Stp(const CPURegister& rt,
-           const CPURegister& rt2,
-           const MemOperand& dst) {
-    VIXL_ASSERT(allow_macro_instructions_);
-    stp(rt, rt2, dst);
-  }
   void Stxp(const Register& rs,
             const Register& rt,
             const Register& rt2,
@@ -1090,52 +1382,61 @@
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
     VIXL_ASSERT(!rs.Aliases(rt2));
+    SingleEmissionCheckScope guard(this);
     stxp(rs, rt, rt2, dst);
   }
   void Stxr(const Register& rs, const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
     stxr(rs, rt, dst);
   }
   void Stxrb(const Register& rs, const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
     stxrb(rs, rt, dst);
   }
   void Stxrh(const Register& rs, const Register& rt, const MemOperand& dst) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rs.Aliases(dst.base()));
     VIXL_ASSERT(!rs.Aliases(rt));
+    SingleEmissionCheckScope guard(this);
     stxrh(rs, rt, dst);
   }
   void Sxtb(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     sxtb(rd, rn);
   }
   void Sxth(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     sxth(rd, rn);
   }
   void Sxtw(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     sxtw(rd, rn);
   }
   void Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
     tbnz(rt, bit_pos, label);
   }
   void Tbz(const Register& rt, unsigned bit_pos, Label* label) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rt.IsZero());
+    SingleEmissionCheckScope guard(this);
     tbz(rt, bit_pos, label);
   }
   void Ubfiz(const Register& rd,
@@ -1145,6 +1446,7 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     ubfiz(rd, rn, lsb, width);
   }
   void Ubfx(const Register& rd,
@@ -1154,11 +1456,13 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     ubfx(rd, rn, lsb, width);
   }
   void Ucvtf(const FPRegister& fd, const Register& rn, unsigned fbits = 0) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     ucvtf(fd, rn, fbits);
   }
   void Udiv(const Register& rd, const Register& rn, const Register& rm) {
@@ -1166,6 +1470,7 @@
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
+    SingleEmissionCheckScope guard(this);
     udiv(rd, rn, rm);
   }
   void Umaddl(const Register& rd,
@@ -1177,6 +1482,7 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
     umaddl(rd, rn, rm, ra);
   }
   void Umsubl(const Register& rd,
@@ -1188,10 +1494,12 @@
     VIXL_ASSERT(!rn.IsZero());
     VIXL_ASSERT(!rm.IsZero());
     VIXL_ASSERT(!ra.IsZero());
+    SingleEmissionCheckScope guard(this);
     umsubl(rd, rn, rm, ra);
   }
   void Unreachable() {
     VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
 #ifdef USE_SIMULATOR
     hlt(kUnreachableOpcode);
 #else
@@ -1204,18 +1512,21 @@
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     uxtb(rd, rn);
   }
   void Uxth(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     uxth(rd, rn);
   }
   void Uxtw(const Register& rd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     VIXL_ASSERT(!rd.IsZero());
     VIXL_ASSERT(!rn.IsZero());
+    SingleEmissionCheckScope guard(this);
     uxtw(rd, rn);
   }
 
@@ -1244,6 +1555,40 @@
   }
 #endif
 
+  void BlockLiteralPool() { literal_pool_.Block(); }
+  void ReleaseLiteralPool() { literal_pool_.Release(); }
+  bool IsLiteralPoolBlocked() const { return literal_pool_.IsBlocked(); }
+
+  size_t LiteralPoolSize() const {
+    if (literal_pool_.IsEmpty()) return 0;
+    return literal_pool_.Size() - kInstructionSize;
+  }
+
+  void EmitLiteralPool(LiteralPool::EmitOption option) {
+    if (!literal_pool_.IsEmpty()) literal_pool_.Emit(option);
+
+    checkpoint_ = NextCheckOffset();
+  }
+
+  ptrdiff_t NextCheckOffset() {
+    return std::min(literal_pool_.NextCheckOffset(), BufferEndOffset());
+  }
+
+  void EnsureEmitFor(size_t amount) {
+    ptrdiff_t offset = amount;
+    if ((CursorOffset() + offset) > checkpoint_) {
+      // Check if a pool need to be emitted.
+      literal_pool_.CheckEmitFor(amount);
+      // Ensure there's enough space for the emit, keep in mind the cursor will
+      // have moved if a pool was emitted.
+      if ((CursorOffset() + offset) > BufferEndOffset()) {
+        EnsureSpaceFor(amount);
+      }
+
+      checkpoint_ = NextCheckOffset();
+    }
+  }
+
   // Set the current stack pointer, but don't generate any code.
   void SetStackPointer(const Register& stack_pointer) {
     VIXL_ASSERT(!TmpList()->IncludesAliasOf(stack_pointer));
@@ -1356,6 +1701,9 @@
   // Scratch registers available for use by the MacroAssembler.
   CPURegList tmp_list_;
   CPURegList fptmp_list_;
+
+  LiteralPool literal_pool_;
+  ptrdiff_t checkpoint_;
 };
 
 
@@ -1363,47 +1711,51 @@
 // instructions. This scope prevents the MacroAssembler from being called and
 // literal pools from being emitted. It also asserts the number of instructions
 // emitted is what you specified when creating the scope.
-class InstructionAccurateScope {
+class InstructionAccurateScope : public CodeBufferCheckScope {
  public:
+  InstructionAccurateScope(MacroAssembler* masm,
+                           int count,
+                           AssertPolicy policy = kExactSize)
+      : CodeBufferCheckScope(masm,
+                             (count * kInstructionSize),
+                             kCheck,
+                             policy) {
+    VIXL_ASSERT(policy != kNoAssert);
 #ifdef DEBUG
-  explicit InstructionAccurateScope(MacroAssembler* masm, int count = 0)
-      : masm_(masm), size_(count * kInstructionSize) {
-    masm_->BlockLiteralPool();
-    if (size_ != 0) {
-      masm_->bind(&start_);
-    }
-    old_allow_macro_instructions_ = masm_->AllowMacroInstructions();
-    masm_->SetAllowMacroInstructions(false);
-  }
-#else
-  explicit InstructionAccurateScope(MacroAssembler* masm,
-                                    int count = 0)
-      : masm_(masm) {
-    USE(count);
-    masm_->BlockLiteralPool();
-  }
+    old_allow_macro_instructions_ = masm->AllowMacroInstructions();
+    masm->SetAllowMacroInstructions(false);
 #endif
+  }
 
   ~InstructionAccurateScope() {
-    masm_->ReleaseLiteralPool();
 #ifdef DEBUG
-    if (start_.IsBound()) {
-      VIXL_ASSERT(masm_->SizeOfCodeGeneratedSince(&start_) == size_);
-    }
-    masm_->SetAllowMacroInstructions(old_allow_macro_instructions_);
+    MacroAssembler* masm = reinterpret_cast<MacroAssembler*>(assm_);
+    masm->SetAllowMacroInstructions(old_allow_macro_instructions_);
 #endif
   }
 
  private:
-  MacroAssembler* masm_;
 #ifdef DEBUG
-  uint64_t size_;
-  Label start_;
   bool old_allow_macro_instructions_;
 #endif
 };
 
 
+class BlockLiteralPoolScope {
+ public:
+  explicit BlockLiteralPoolScope(MacroAssembler* masm) : masm_(masm) {
+    masm_->BlockLiteralPool();
+  }
+
+  ~BlockLiteralPoolScope() {
+    masm_->ReleaseLiteralPool();
+  }
+
+ private:
+  MacroAssembler* masm_;
+};
+
+
 // This scope utility allows scratch registers to be managed safely. The
 // MacroAssembler's TmpList() (and FPTmpList()) is used as a pool of scratch
 // registers. These registers can be allocated on demand, and will be returned
diff --git a/src/a64/simulator-a64.cc b/src/a64/simulator-a64.cc
index fb3286d..9b3211b 100644
--- a/src/a64/simulator-a64.cc
+++ b/src/a64/simulator-a64.cc
@@ -136,7 +136,7 @@
 }
 
 
-void Simulator::RunFrom(Instruction* first) {
+void Simulator::RunFrom(const Instruction* first) {
   set_pc(first);
   Run();
 }
@@ -419,7 +419,7 @@
       "0b10 (Round towards Minus Infinity)",
       "0b11 (Round towards Zero)"
     };
-    VIXL_ASSERT(fpcr().RMode() <= (sizeof(rmode) / sizeof(rmode[0])));
+    VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
     fprintf(stream_, "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
             clr_flag_name,
             clr_flag_value,
@@ -496,21 +496,21 @@
 
 // Visitors---------------------------------------------------------------------
 
-void Simulator::VisitUnimplemented(Instruction* instr) {
+void Simulator::VisitUnimplemented(const Instruction* instr) {
   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
-         reinterpret_cast<void*>(instr), instr->InstructionBits());
+         reinterpret_cast<const void*>(instr), instr->InstructionBits());
   VIXL_UNIMPLEMENTED();
 }
 
 
-void Simulator::VisitUnallocated(Instruction* instr) {
+void Simulator::VisitUnallocated(const Instruction* instr) {
   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
-         reinterpret_cast<void*>(instr), instr->InstructionBits());
+         reinterpret_cast<const void*>(instr), instr->InstructionBits());
   VIXL_UNIMPLEMENTED();
 }
 
 
-void Simulator::VisitPCRelAddressing(Instruction* instr) {
+void Simulator::VisitPCRelAddressing(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
               (instr->Mask(PCRelAddressingMask) == ADRP));
 
@@ -518,7 +518,7 @@
 }
 
 
-void Simulator::VisitUnconditionalBranch(Instruction* instr) {
+void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
   switch (instr->Mask(UnconditionalBranchMask)) {
     case BL:
       set_lr(instr->NextInstruction());
@@ -531,7 +531,7 @@
 }
 
 
-void Simulator::VisitConditionalBranch(Instruction* instr) {
+void Simulator::VisitConditionalBranch(const Instruction* instr) {
   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
   if (ConditionPassed(instr->ConditionBranch())) {
     set_pc(instr->ImmPCOffsetTarget());
@@ -539,8 +539,8 @@
 }
 
 
-void Simulator::VisitUnconditionalBranchToRegister(Instruction* instr) {
-  Instruction* target = Instruction::Cast(xreg(instr->Rn()));
+void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
+  const Instruction* target = Instruction::Cast(xreg(instr->Rn()));
 
   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
     case BLR:
@@ -553,7 +553,7 @@
 }
 
 
-void Simulator::VisitTestBranch(Instruction* instr) {
+void Simulator::VisitTestBranch(const Instruction* instr) {
   unsigned bit_pos = (instr->ImmTestBranchBit5() << 5) |
                      instr->ImmTestBranchBit40();
   bool bit_zero = ((xreg(instr->Rt()) >> bit_pos) & 1) == 0;
@@ -569,7 +569,7 @@
 }
 
 
-void Simulator::VisitCompareBranch(Instruction* instr) {
+void Simulator::VisitCompareBranch(const Instruction* instr) {
   unsigned rt = instr->Rt();
   bool take_branch = false;
   switch (instr->Mask(CompareBranchMask)) {
@@ -585,7 +585,7 @@
 }
 
 
-void Simulator::AddSubHelper(Instruction* instr, int64_t op2) {
+void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   bool set_flags = instr->FlagsUpdate();
   int64_t new_val = 0;
@@ -616,7 +616,7 @@
 }
 
 
-void Simulator::VisitAddSubShifted(Instruction* instr) {
+void Simulator::VisitAddSubShifted(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t op2 = ShiftOperand(reg_size,
                              reg(reg_size, instr->Rm()),
@@ -626,13 +626,13 @@
 }
 
 
-void Simulator::VisitAddSubImmediate(Instruction* instr) {
+void Simulator::VisitAddSubImmediate(const Instruction* instr) {
   int64_t op2 = instr->ImmAddSub() << ((instr->ShiftAddSub() == 1) ? 12 : 0);
   AddSubHelper(instr, op2);
 }
 
 
-void Simulator::VisitAddSubExtended(Instruction* instr) {
+void Simulator::VisitAddSubExtended(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t op2 = ExtendValue(reg_size,
                             reg(reg_size, instr->Rm()),
@@ -642,7 +642,7 @@
 }
 
 
-void Simulator::VisitAddSubWithCarry(Instruction* instr) {
+void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t op2 = reg(reg_size, instr->Rm());
   int64_t new_val;
@@ -661,7 +661,7 @@
 }
 
 
-void Simulator::VisitLogicalShifted(Instruction* instr) {
+void Simulator::VisitLogicalShifted(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   Shift shift_type = static_cast<Shift>(instr->ShiftDP());
   unsigned shift_amount = instr->ImmDPShift();
@@ -674,12 +674,12 @@
 }
 
 
-void Simulator::VisitLogicalImmediate(Instruction* instr) {
+void Simulator::VisitLogicalImmediate(const Instruction* instr) {
   LogicalHelper(instr, instr->ImmLogical());
 }
 
 
-void Simulator::LogicalHelper(Instruction* instr, int64_t op2) {
+void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t op1 = reg(reg_size, instr->Rn());
   int64_t result = 0;
@@ -707,18 +707,19 @@
 }
 
 
-void Simulator::VisitConditionalCompareRegister(Instruction* instr) {
+void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   ConditionalCompareHelper(instr, reg(reg_size, instr->Rm()));
 }
 
 
-void Simulator::VisitConditionalCompareImmediate(Instruction* instr) {
+void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
   ConditionalCompareHelper(instr, instr->ImmCondCmp());
 }
 
 
-void Simulator::ConditionalCompareHelper(Instruction* instr, int64_t op2) {
+void Simulator::ConditionalCompareHelper(const Instruction* instr,
+                                         int64_t op2) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t op1 = reg(reg_size, instr->Rn());
 
@@ -738,28 +739,28 @@
 }
 
 
-void Simulator::VisitLoadStoreUnsignedOffset(Instruction* instr) {
+void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   int offset = instr->ImmLSUnsigned() << instr->SizeLS();
   LoadStoreHelper(instr, offset, Offset);
 }
 
 
-void Simulator::VisitLoadStoreUnscaledOffset(Instruction* instr) {
+void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   LoadStoreHelper(instr, instr->ImmLS(), Offset);
 }
 
 
-void Simulator::VisitLoadStorePreIndex(Instruction* instr) {
+void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
   LoadStoreHelper(instr, instr->ImmLS(), PreIndex);
 }
 
 
-void Simulator::VisitLoadStorePostIndex(Instruction* instr) {
+void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
   LoadStoreHelper(instr, instr->ImmLS(), PostIndex);
 }
 
 
-void Simulator::VisitLoadStoreRegisterOffset(Instruction* instr) {
+void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   Extend ext = static_cast<Extend>(instr->ExtendMode());
   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
   unsigned shift_amount = instr->ImmShiftLS() * instr->SizeLS();
@@ -770,7 +771,7 @@
 }
 
 
-void Simulator::LoadStoreHelper(Instruction* instr,
+void Simulator::LoadStoreHelper(const Instruction* instr,
                                 int64_t offset,
                                 AddrMode addrmode) {
   unsigned srcdst = instr->Rt();
@@ -804,27 +805,27 @@
 }
 
 
-void Simulator::VisitLoadStorePairOffset(Instruction* instr) {
+void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
   LoadStorePairHelper(instr, Offset);
 }
 
 
-void Simulator::VisitLoadStorePairPreIndex(Instruction* instr) {
+void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
   LoadStorePairHelper(instr, PreIndex);
 }
 
 
-void Simulator::VisitLoadStorePairPostIndex(Instruction* instr) {
+void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
   LoadStorePairHelper(instr, PostIndex);
 }
 
 
-void Simulator::VisitLoadStorePairNonTemporal(Instruction* instr) {
+void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   LoadStorePairHelper(instr, Offset);
 }
 
 
-void Simulator::LoadStorePairHelper(Instruction* instr,
+void Simulator::LoadStorePairHelper(const Instruction* instr,
                                     AddrMode addrmode) {
   unsigned rt = instr->Rt();
   unsigned rt2 = instr->Rt2();
@@ -902,7 +903,7 @@
 }
 
 
-void Simulator::VisitLoadStoreExclusive(Instruction* instr) {
+void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
   PrintExclusiveAccessWarning();
 
   unsigned rs = instr->Rs();
@@ -1039,7 +1040,7 @@
 }
 
 
-void Simulator::VisitLoadLiteral(Instruction* instr) {
+void Simulator::VisitLoadLiteral(const Instruction* instr) {
   uint8_t* address = instr->LiteralAddress();
   unsigned rt = instr->Rt();
 
@@ -1048,6 +1049,7 @@
     case LDR_x_lit: set_xreg(rt, MemoryRead<uint64_t>(address)); break;
     case LDR_s_lit: set_sreg(rt, MemoryRead<float>(address)); break;
     case LDR_d_lit: set_dreg(rt, MemoryRead<double>(address)); break;
+    case LDRSW_x_lit: set_xreg(rt, MemoryRead<int32_t>(address)); break;
     default: VIXL_UNREACHABLE();
   }
 
@@ -1083,7 +1085,7 @@
 }
 
 
-void Simulator::VisitMoveWideImmediate(Instruction* instr) {
+void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
   MoveWideImmediateOp mov_op =
     static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
   int64_t new_xn_val = 0;
@@ -1127,7 +1129,7 @@
 }
 
 
-void Simulator::VisitConditionalSelect(Instruction* instr) {
+void Simulator::VisitConditionalSelect(const Instruction* instr) {
   uint64_t new_val = xreg(instr->Rn());
 
   if (ConditionFailed(static_cast<Condition>(instr->Condition()))) {
@@ -1149,7 +1151,7 @@
 }
 
 
-void Simulator::VisitDataProcessing1Source(Instruction* instr) {
+void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
   unsigned dst = instr->Rd();
   unsigned src = instr->Rn();
 
@@ -1214,7 +1216,7 @@
 }
 
 
-void Simulator::VisitDataProcessing2Source(Instruction* instr) {
+void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
   Shift shift_op = NO_SHIFT;
   int64_t result = 0;
   switch (instr->Mask(DataProcessing2SourceMask)) {
@@ -1312,7 +1314,7 @@
 }
 
 
-void Simulator::VisitDataProcessing3Source(Instruction* instr) {
+void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
 
   int64_t result = 0;
@@ -1343,7 +1345,7 @@
 }
 
 
-void Simulator::VisitBitfield(Instruction* instr) {
+void Simulator::VisitBitfield(const Instruction* instr) {
   unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t reg_mask = instr->SixtyFourBits() ? kXRegMask : kWRegMask;
   int64_t R = instr->ImmR();
@@ -1396,7 +1398,7 @@
 }
 
 
-void Simulator::VisitExtract(Instruction* instr) {
+void Simulator::VisitExtract(const Instruction* instr) {
   unsigned lsb = instr->ImmS();
   unsigned reg_size = (instr->SixtyFourBits() != 0) ? kXRegSize
                                                     : kWRegSize;
@@ -1407,7 +1409,7 @@
 }
 
 
-void Simulator::VisitFPImmediate(Instruction* instr) {
+void Simulator::VisitFPImmediate(const Instruction* instr) {
   AssertSupportedFPCR();
 
   unsigned dest = instr->Rd();
@@ -1419,7 +1421,7 @@
 }
 
 
-void Simulator::VisitFPIntegerConvert(Instruction* instr) {
+void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
   AssertSupportedFPCR();
 
   unsigned dst = instr->Rd();
@@ -1503,7 +1505,7 @@
 }
 
 
-void Simulator::VisitFPFixedPointConvert(Instruction* instr) {
+void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
   AssertSupportedFPCR();
 
   unsigned dst = instr->Rd();
@@ -1592,7 +1594,7 @@
 }
 
 
-void Simulator::VisitFPCompare(Instruction* instr) {
+void Simulator::VisitFPCompare(const Instruction* instr) {
   AssertSupportedFPCR();
 
   switch (instr->Mask(FPCompareMask)) {
@@ -1605,7 +1607,7 @@
 }
 
 
-void Simulator::VisitFPConditionalCompare(Instruction* instr) {
+void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
   AssertSupportedFPCR();
 
   switch (instr->Mask(FPConditionalCompareMask)) {
@@ -1628,7 +1630,7 @@
 }
 
 
-void Simulator::VisitFPConditionalSelect(Instruction* instr) {
+void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
   AssertSupportedFPCR();
 
   Instr selected;
@@ -1646,7 +1648,7 @@
 }
 
 
-void Simulator::VisitFPDataProcessing1Source(Instruction* instr) {
+void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
   AssertSupportedFPCR();
 
   unsigned fd = instr->Rd();
@@ -2067,7 +2069,7 @@
 }
 
 
-void Simulator::VisitFPDataProcessing2Source(Instruction* instr) {
+void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
   AssertSupportedFPCR();
 
   unsigned fd = instr->Rd();
@@ -2110,7 +2112,7 @@
 }
 
 
-void Simulator::VisitFPDataProcessing3Source(Instruction* instr) {
+void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
   AssertSupportedFPCR();
 
   unsigned fd = instr->Rd();
@@ -2375,7 +2377,7 @@
 }
 
 
-bool Simulator::FPProcessNaNs(Instruction* instr) {
+bool Simulator::FPProcessNaNs(const Instruction* instr) {
   unsigned fd = instr->Rd();
   unsigned fn = instr->Rn();
   unsigned fm = instr->Rm();
@@ -2399,7 +2401,7 @@
 }
 
 
-void Simulator::VisitSystem(Instruction* instr) {
+void Simulator::VisitSystem(const Instruction* instr) {
   // Some system instructions hijack their Op and Cp fields to represent a
   // range of immediates instead of indicating a different instruction. This
   // makes the decoding tricky.
@@ -2445,7 +2447,7 @@
 }
 
 
-void Simulator::VisitException(Instruction* instr) {
+void Simulator::VisitException(const Instruction* instr) {
   switch (instr->Mask(ExceptionMask)) {
     case BRK: HostBreakpoint(); break;
     case HLT:
@@ -2463,7 +2465,7 @@
 }
 
 
-void Simulator::DoPrintf(Instruction* instr) {
+void Simulator::DoPrintf(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
               (instr->ImmException() == kPrintfOpcode));
 
diff --git a/src/a64/simulator-a64.h b/src/a64/simulator-a64.h
index b7e6752..1f48ed4 100644
--- a/src/a64/simulator-a64.h
+++ b/src/a64/simulator-a64.h
@@ -245,12 +245,12 @@
 
   // Run the simulator.
   virtual void Run();
-  void RunFrom(Instruction* first);
+  void RunFrom(const Instruction* first);
 
   // Simulation helpers.
-  inline Instruction* pc() { return pc_; }
-  inline void set_pc(Instruction* new_pc) {
-    pc_ = new_pc;
+  inline const Instruction* pc() const { return pc_; }
+  inline void set_pc(const Instruction* new_pc) {
+    pc_ = AddressUntag(new_pc);
     pc_modified_ = true;
   }
 
@@ -270,7 +270,7 @@
   }
 
   // Declare all Visitor functions.
-  #define DECLARE(A)  void Visit##A(Instruction* instr);
+  #define DECLARE(A)  void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
   #undef DECLARE
 
@@ -592,18 +592,18 @@
     return !ConditionPassed(cond);
   }
 
-  void AddSubHelper(Instruction* instr, int64_t op2);
+  void AddSubHelper(const Instruction* instr, int64_t op2);
   int64_t AddWithCarry(unsigned reg_size,
                        bool set_flags,
                        int64_t src1,
                        int64_t src2,
                        int64_t carry_in = 0);
-  void LogicalHelper(Instruction* instr, int64_t op2);
-  void ConditionalCompareHelper(Instruction* instr, int64_t op2);
-  void LoadStoreHelper(Instruction* instr,
+  void LogicalHelper(const Instruction* instr, int64_t op2);
+  void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
+  void LoadStoreHelper(const Instruction* instr,
                        int64_t offset,
                        AddrMode addrmode);
-  void LoadStorePairHelper(Instruction* instr, AddrMode addrmode);
+  void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
   uint8_t* AddressModeHelper(unsigned addr_reg,
                              int64_t offset,
                              AddrMode addrmode);
@@ -702,7 +702,7 @@
   template <typename T>
   T FPProcessNaN(T op);
 
-  bool FPProcessNaNs(Instruction* instr);
+  bool FPProcessNaNs(const Instruction* instr);
 
   template <typename T>
   T FPProcessNaNs(T op1, T op2);
@@ -711,7 +711,7 @@
   T FPProcessNaNs3(T op1, T op2, T op3);
 
   // Pseudo Printf instruction
-  void DoPrintf(Instruction* instr);
+  void DoPrintf(const Instruction* instr);
 
   // Processor state ---------------------------------------
 
@@ -776,7 +776,7 @@
   // Indicates if the pc has been modified by the instruction and should not be
   // automatically incremented.
   bool pc_modified_;
-  Instruction* pc_;
+  const Instruction* pc_;
 
   static const char* xreg_names[];
   static const char* wreg_names[];
diff --git a/src/code-buffer.cc b/src/code-buffer.cc
new file mode 100644
index 0000000..acf7c45
--- /dev/null
+++ b/src/code-buffer.cc
@@ -0,0 +1,110 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "code-buffer.h"
+#include "utils-vixl.h"
+
+namespace vixl {
+
+
+CodeBuffer::CodeBuffer(size_t capacity) : managed_(true), capacity_(capacity) {
+  VIXL_CHECK(capacity_ != 0);
+  buffer_ = reinterpret_cast<byte*>(malloc(capacity_));
+  VIXL_CHECK(buffer_ != NULL);
+  // A64 instructions must be word aligned, we assert the default allocator
+  // always returns word align memory.
+  VIXL_ASSERT(IsWordAligned(buffer_));
+
+  cursor_ = buffer_;
+  dirty_ = false;
+}
+
+
+CodeBuffer::CodeBuffer(void* buffer, size_t capacity)
+    : buffer_(reinterpret_cast<byte*>(buffer)),
+      managed_(false),
+      cursor_(reinterpret_cast<byte*>(buffer)),
+      dirty_(false),
+      capacity_(capacity) {
+  VIXL_ASSERT(buffer_ != NULL);
+}
+
+
+CodeBuffer::~CodeBuffer() {
+  VIXL_ASSERT(!IsDirty());
+  if (managed_) {
+    free(buffer_);
+  }
+}
+
+
+void CodeBuffer::EmitString(const char* string) {
+  VIXL_ASSERT(RemainingBytes() > strlen(string));
+  char* dst = reinterpret_cast<char*>(cursor_);
+  dirty_ = true;
+  char* null_char = stpcpy(dst, string);
+  cursor_ = reinterpret_cast<byte*>(null_char) + 1;
+}
+
+
+void CodeBuffer::Align() {
+  byte* end = AlignUp(cursor_, 4);
+  VIXL_ASSERT(end >= cursor_);
+  const size_t padding_size = end - cursor_;
+  VIXL_ASSERT(RemainingBytes() >= padding_size);
+  VIXL_ASSERT(padding_size <= 4);
+  const byte padding[] = { 0, 0, 0, 0};
+  dirty_ = true;
+  memcpy(cursor_, padding, padding_size);
+  cursor_ = end;
+}
+
+
+void CodeBuffer::Reset() {
+#ifdef DEBUG
+  if (managed_) {
+    // TODO(all): Consider allowing for custom default values, e.g. HLT.
+    memset(buffer_, 0, capacity_);
+  }
+#endif
+  cursor_ = buffer_;
+  SetClean();
+}
+
+
+void CodeBuffer::Grow(size_t new_capacity) {
+  VIXL_ASSERT(managed_);
+  VIXL_ASSERT(new_capacity > capacity_);
+  size_t size = CursorOffset();
+  buffer_ = static_cast<byte*>(realloc(buffer_, new_capacity));
+  VIXL_CHECK(buffer_ != NULL);
+
+  cursor_ = buffer_ + size;
+  capacity_ = new_capacity;
+}
+
+
+}  // namespace vixl
diff --git a/src/code-buffer.h b/src/code-buffer.h
new file mode 100644
index 0000000..6b96415
--- /dev/null
+++ b/src/code-buffer.h
@@ -0,0 +1,113 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CODE_BUFFER_H
+#define VIXL_CODE_BUFFER_H
+
+#include <string.h>
+#include "globals-vixl.h"
+
+namespace vixl {
+
+class CodeBuffer {
+ public:
+  explicit CodeBuffer(size_t capacity = 4 * KBytes);
+  CodeBuffer(void* buffer, size_t capacity);
+  ~CodeBuffer();
+
+  void Reset();
+
+  ptrdiff_t OffsetFrom(ptrdiff_t offset) const {
+    ptrdiff_t cursor_offset = cursor_ - buffer_;
+    VIXL_ASSERT((offset >= 0) && (offset <= cursor_offset));
+    return cursor_offset - offset;
+  }
+
+  ptrdiff_t CursorOffset() const {
+    return OffsetFrom(0);
+  }
+
+  template <typename T>
+  T GetOffsetAddress(ptrdiff_t offset) const {
+    VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_)));
+    return reinterpret_cast<T>(buffer_ + offset);
+  }
+
+  size_t RemainingBytes() const {
+    VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_)));
+    return (buffer_ + capacity_) - cursor_;
+  }
+
+  // A code buffer can emit:
+  //  * 32-bit data: instruction and constant.
+  //  * 64-bit data: constant.
+  //  * string: debug info.
+  void Emit32(uint32_t data) { Emit(data); }
+
+  void Emit64(uint64_t data) { Emit(data); }
+
+  void EmitString(const char* string);
+
+  // Align to kInstructionSize.
+  void Align();
+
+  size_t capacity() const { return capacity_; }
+
+  bool IsManaged() const { return managed_; }
+
+  void Grow(size_t new_capacity);
+
+  bool IsDirty() const { return dirty_; }
+
+  void SetClean() { dirty_ = false; }
+
+ private:
+  template <typename T>
+  void Emit(T value) {
+    VIXL_ASSERT(RemainingBytes() >= sizeof(value));
+    dirty_ = true;
+    memcpy(cursor_, &value, sizeof(value));
+    cursor_ += sizeof(value);
+  }
+
+  // Backing store of the buffer.
+  byte* buffer_;
+  // If true the backing store is allocated and deallocated by the buffer. The
+  // backing store can then grow on demand. If false the backing store is
+  // provided by the user and cannot be resized internally.
+  bool managed_;
+  // Pointer to the next location to be written.
+  byte* cursor_;
+  // True if there has been any write since the buffer was created or cleaned.
+  bool dirty_;
+  // Capacity in bytes of the backing store.
+  size_t capacity_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CODE_BUFFER_H
+
diff --git a/test/examples/test-examples.cc b/test/examples/test-examples.cc
index efedf9f..a4a1feb 100644
--- a/test/examples/test-examples.cc
+++ b/test/examples/test-examples.cc
@@ -24,24 +24,33 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// The examples only work with the simulator.
-#ifdef USE_SIMULATOR
-
 #include "a64/macro-assembler-a64.h"
 #include "a64/debugger-a64.h"
 #include "a64/simulator-a64.h"
 #include "examples.h"
+#include "non-const-visitor.h"
+#include "custom-disassembler.h"
 #include "../test-utils-a64.h"
 
 #include "../cctest.h"
 
-#define ARRAY_SIZE(Array) (sizeof(Array) / sizeof((Array)[0]))
-#define BUF_SIZE (4096)
-#define __ masm->
+#define TEST(name) TEST_(EXAMPLE_##name)
 
 using namespace vixl;
 
 
+TEST(custom_disassembler) {
+  TestCustomDisassembler();
+}
+
+
+// The tests below only work with the simulator.
+#ifdef USE_SIMULATOR
+
+#define ARRAY_SIZE(Array) (sizeof(Array) / sizeof((Array)[0]))
+#define BUF_SIZE (4096)
+#define __ masm->
+
 uint64_t FactorialC(uint64_t n) {
   uint64_t result = 1;
 
@@ -137,8 +146,7 @@
   } while (0)
 
 #define START()                                             \
-  byte assm_buf[BUF_SIZE];                                  \
-  MacroAssembler masm(assm_buf, BUF_SIZE);                  \
+  MacroAssembler masm(BUF_SIZE);                            \
   Decoder decoder;                                          \
   Debugger simulator(&decoder);                             \
   simulator.set_coloured_trace(Cctest::coloured_trace());   \
@@ -161,8 +169,6 @@
   masm.FinalizeCode()
 
 
-#define TEST(name) TEST_(EXAMPLE_##name)
-
 
 #define FACTORIAL_DOTEST(N)                                             \
   do {                                                                  \
@@ -416,4 +422,25 @@
   GETTING_STARTED_DOTEST(0x5a5a5a5a5a5a5a5a);
 }
 
+
+TEST(non_const_visitor) {
+  byte assm_buf[BUF_SIZE];
+  MacroAssembler masm(assm_buf, BUF_SIZE);
+
+  Label code_start, code_end;
+  masm.Bind(&code_start);
+  GenerateNonConstVisitorTestCode(&masm);
+  masm.Bind(&code_end);
+  masm.FinalizeCode();
+  Instruction* instr_start = masm.GetLabelAddress<Instruction*>(&code_start);
+  Instruction* instr_end = masm.GetLabelAddress<Instruction*>(&code_end);
+
+  int64_t res_orig = RunNonConstVisitorTestGeneratedCode(instr_start);
+
+  ModifyNonConstVisitorTestGeneratedCode(instr_start, instr_end);
+
+  int64_t res_mod = RunNonConstVisitorTestGeneratedCode(instr_start);
+  assert(res_orig == -res_mod);
+}
+
 #endif  // USE_SIMULATOR
diff --git a/test/test-assembler-a64.cc b/test/test-assembler-a64.cc
index bc59e99..e2b02b8 100644
--- a/test/test-assembler-a64.cc
+++ b/test/test-assembler-a64.cc
@@ -94,14 +94,19 @@
 
 #define BUF_SIZE (4096)
 
-#define SETUP() SETUP_CUSTOM(BUF_SIZE, PositionIndependentCode)
-
 #ifdef USE_SIMULATOR
-
 // Run tests with the simulator.
-#define SETUP_CUSTOM(buf_size, pic)                                            \
-  byte* buf = new byte[buf_size];                                              \
-  MacroAssembler masm(buf, buf_size, pic);                                     \
+
+#define SETUP()                                                                \
+  MacroAssembler masm(BUF_SIZE);                                               \
+  SETUP_COMMON()
+
+#define SETUP_CUSTOM(size, pic)                                                \
+  byte* buf = new byte[size + BUF_SIZE];                                       \
+  MacroAssembler masm(buf, size + BUF_SIZE, pic);                              \
+  SETUP_COMMON()
+
+#define SETUP_COMMON()                                                         \
   Decoder decoder;                                                             \
   Simulator* simulator = NULL;                                                 \
   if (Cctest::run_debugger()) {                                                \
@@ -114,6 +119,11 @@
   simulator->set_instruction_stats(Cctest::instruction_stats());               \
   RegisterDump core
 
+// This is a convenience macro to avoid creating a scope for every assembler
+// function called. It will still assert the buffer hasn't been exceeded.
+#define ALLOW_ASM()                                                            \
+  CodeBufferCheckScope guard(&masm, masm.BufferCapacity())
+
 #define START()                                                                \
   masm.Reset();                                                                \
   simulator->ResetState();                                                     \
@@ -143,20 +153,37 @@
   masm.FinalizeCode()
 
 #define RUN()                                                                  \
-  simulator->RunFrom(reinterpret_cast<Instruction*>(buf))
+  simulator->RunFrom(masm.GetStartAddress<Instruction*>())
 
-#define TEARDOWN()                                                             \
-  delete simulator;                                                            \
-  delete[] buf;
+#define TEARDOWN() TEARDOWN_COMMON()
+
+#define TEARDOWN_CUSTOM()                                                      \
+  delete[] buf;                                                                \
+  TEARDOWN_COMMON()
+
+#define TEARDOWN_COMMON()                                                      \
+  delete simulator;
 
 #else  // ifdef USE_SIMULATOR.
 // Run the test on real hardware or models.
-#define SETUP_CUSTOM(buf_size, pic)                                            \
-  byte* buf = new byte[buf_size];                                              \
-  MacroAssembler masm(buf, buf_size, pic);                                     \
+#define SETUP()                                                                \
+  MacroAssembler masm(BUF_SIZE);                                               \
+  SETUP_COMMON()
+
+#define SETUP_CUSTOM(size, pic)                                                \
+  byte* buf = new byte[size + BUF_SIZE];                                       \
+  MacroAssembler masm(buf, size + BUF_SIZE, pic);                              \
+  SETUP_COMMON()
+
+#define SETUP_COMMON()                                                         \
   RegisterDump core;                                                           \
   CPU::SetUp()
 
+// This is a convenience macro to avoid creating a scope for every assembler
+// function called. It will still assert the buffer hasn't been exceeded.
+#define ALLOW_ASM()                                                            \
+  CodeBufferCheckScope guard(&masm, masm.BufferCapacity())
+
 #define START()                                                                \
   masm.Reset();                                                                \
   __ PushCalleeSavedRegisters()
@@ -168,16 +195,21 @@
   masm.FinalizeCode()
 
 #define RUN()                                                                  \
-  CPU::EnsureIAndDCacheCoherency(buf, masm.SizeOfCodeGenerated());             \
   {                                                                            \
+    byte* buffer_start = masm.GetStartAddress<byte*>();                        \
+    size_t buffer_length = masm.CursorOffset();                                \
     void (*test_function)(void);                                               \
-    VIXL_ASSERT(sizeof(buf) == sizeof(test_function));                         \
-    memcpy(&test_function, &buf, sizeof(buf));                                 \
+                                                                               \
+    CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
+    VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
+    memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
     test_function();                                                           \
   }
 
-#define TEARDOWN()                                                             \
-  delete[] buf;
+#define TEARDOWN()
+
+#define TEARDOWN_CUSTOM()                                                      \
+  delete[] buf;                                                                \
 
 #endif  // ifdef USE_SIMULATOR.
 
@@ -399,6 +431,7 @@
 
 TEST(mov) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(x0, 0xffffffffffffffff);
@@ -1561,6 +1594,46 @@
 }
 
 
+TEST(label_2) {
+  SETUP();
+
+  Label label_1, label_2, label_3;
+  Label first_jump_to_3;
+
+  START();
+  __ Mov(x0, 0x0);
+
+  __ B(&label_1);
+  ptrdiff_t offset_2 = masm.CursorOffset();
+  __ Orr(x0, x0, 1 << 1);
+  __ B(&label_3);
+  ptrdiff_t offset_1 = masm.CursorOffset();
+  __ Orr(x0, x0, 1 << 0);
+  __ B(&label_2);
+  ptrdiff_t offset_3 = masm.CursorOffset();
+  __ Tbz(x0, 2, &first_jump_to_3);
+  __ Orr(x0, x0, 1 << 3);
+  __ Bind(&first_jump_to_3);
+  __ Orr(x0, x0, 1 << 2);
+  __ Tbz(x0, 3, &label_3);
+
+  // Labels 1, 2, and 3 are bound before the current buffer offset. Branches to
+  // label_1 and label_2 branch respectively forward and backward. Branches to
+  // label 3 include both forward and backward branches.
+  masm.BindToOffset(&label_1, offset_1);
+  masm.BindToOffset(&label_2, offset_2);
+  masm.BindToOffset(&label_3, offset_3);
+
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0xf, x0);
+
+  TEARDOWN();
+}
+
+
 TEST(adr) {
   SETUP();
 
@@ -1617,9 +1690,12 @@
   START();
 
   // Waste space until the start of a page.
-  { InstructionAccurateScope scope(&masm);
+  {
+    InstructionAccurateScope scope(&masm,
+                                   kPageSize / kInstructionSize,
+                                   InstructionAccurateScope::kMaximumSize);
     const uintptr_t kPageOffsetMask = kPageSize - 1;
-    while ((GetPCAddress<uintptr_t>(&masm, buf) & kPageOffsetMask) != 0) {
+    while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
       __ b(&start);
     }
     __ bind(&start);
@@ -1666,7 +1742,7 @@
   ASSERT_EQUAL_64(expected, x7);
   ASSERT_EQUAL_64(expected, x8);
 
-  TEARDOWN();
+  TEARDOWN_CUSTOM();
 }
 
 
@@ -1680,26 +1756,29 @@
   // on pages from kStartPage to kEndPage (inclusive).
   const int kStartPage = -16;
   const int kEndPage = 16;
+  const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize;
 
-  SETUP_CUSTOM((kEndPage - kStartPage + 3) * kPageSize,
-               PageOffsetDependentCode);
+  SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
   START();
 
-  // Initialize NZCV with `eq` flags.
-  __ Cmp(wzr, wzr);
-
   Label test;
-  { InstructionAccurateScope scope(&masm);
-    Label start;
+  Label start;
 
+  {
+    InstructionAccurateScope scope(&masm,
+                                   kMaxCodeSize / kInstructionSize,
+                                   InstructionAccurateScope::kMaximumSize);
+    // Initialize NZCV with `eq` flags.
+    __ cmp(wzr, wzr);
     // Waste space until the start of a page.
-    while ((GetPCAddress<uintptr_t>(&masm, buf) & kPageOffsetMask) != 0) {
+    while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
       __ b(&start);
     }
 
     // The first page.
     VIXL_STATIC_ASSERT(kStartPage < 0);
-    { InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
+    {
+      InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
       __ bind(&start);
       __ adrp(x0, &test);
       __ adrp(x1, &test);
@@ -1727,11 +1806,11 @@
         }
       }
     }
-
-    // Every adrp instruction pointed to the same label (`test`), so they should
-    // all have produced the same result.
   }
 
+  // Every adrp instruction pointed to the same label (`test`), so they should
+  // all have produced the same result.
+
   END();
   RUN();
 
@@ -1741,7 +1820,7 @@
   ASSERT_EQUAL_64(expected, x1);
   ASSERT_EQUAL_NZCV(ZCFlag);
 
-  TEARDOWN();
+  TEARDOWN_CUSTOM();
 }
 
 
@@ -1758,22 +1837,27 @@
 
 static void AdrpOffsetHelper(int64_t imm21) {
   const size_t kPageOffsetMask = kPageSize - 1;
+  const int kMaxCodeSize = 2 * kPageSize;
 
-  SETUP_CUSTOM(kPageSize * 4, PageOffsetDependentCode);
+  SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
   START();
 
-  // Initialize NZCV with `eq` flags.
-  __ Cmp(wzr, wzr);
-
   Label page;
-  { InstructionAccurateScope scope(&masm);
+
+  {
+    InstructionAccurateScope scope(&masm,
+                                   kMaxCodeSize / kInstructionSize,
+                                   InstructionAccurateScope::kMaximumSize);
+    // Initialize NZCV with `eq` flags.
+    __ cmp(wzr, wzr);
     // Waste space until the start of a page.
-    while ((GetPCAddress<uintptr_t>(&masm, buf) & kPageOffsetMask) != 0) {
+    while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
       __ b(&page);
     }
     __ bind(&page);
 
-    { InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
+    {
+      InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
       // Every adrp instruction on this page should return the same value.
       __ adrp(x0, imm21);
       __ adrp(x1, imm21);
@@ -1793,7 +1877,7 @@
   ASSERT_EQUAL_64(expected, x1);
   ASSERT_EQUAL_NZCV(ZCFlag);
 
-  TEARDOWN();
+  TEARDOWN_CUSTOM();
 }
 
 
@@ -1812,6 +1896,7 @@
 
 TEST(branch_cond) {
   SETUP();
+  ALLOW_ASM();
 
   Label wrong;
 
@@ -1875,11 +1960,13 @@
   __ Mov(x0, 0x0);
   __ Bind(&ok_4);
 
+  // The MacroAssembler does not allow al as a branch condition.
   Label ok_5;
   __ b(&ok_5, al);
   __ Mov(x0, 0x0);
   __ Bind(&ok_5);
 
+  // The MacroAssembler does not allow nv as a branch condition.
   Label ok_6;
   __ b(&ok_6, nv);
   __ Mov(x0, 0x0);
@@ -2655,6 +2742,63 @@
 }
 
 
+TEST(ldp_stp_offset_wide) {
+  SETUP();
+
+  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
+                     0xffeeddccbbaa9988};
+  uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
+  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+  // Move base too far from the array to force multiple instructions
+  // to be emitted.
+  const int64_t base_offset = 1024;
+
+  START();
+  __ Mov(x20, src_base - base_offset);
+  __ Mov(x21, dst_base - base_offset);
+  __ Mov(x18, src_base + base_offset + 24);
+  __ Mov(x19, dst_base + base_offset + 56);
+  __ Ldp(w0, w1, MemOperand(x20, base_offset));
+  __ Ldp(w2, w3, MemOperand(x20, base_offset + 4));
+  __ Ldp(x4, x5, MemOperand(x20, base_offset + 8));
+  __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset));
+  __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset));
+  __ Stp(w0, w1, MemOperand(x21, base_offset));
+  __ Stp(w2, w3, MemOperand(x21, base_offset + 8));
+  __ Stp(x4, x5, MemOperand(x21, base_offset + 16));
+  __ Stp(w6, w7, MemOperand(x19, -24 - base_offset));
+  __ Stp(x8, x9, MemOperand(x19, -16 - base_offset));
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0x44556677, x0);
+  ASSERT_EQUAL_64(0x00112233, x1);
+  ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
+  ASSERT_EQUAL_64(0x00112233, x2);
+  ASSERT_EQUAL_64(0xccddeeff, x3);
+  ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
+  ASSERT_EQUAL_64(0x8899aabb, x6);
+  ASSERT_EQUAL_64(0xbbaa9988, x7);
+  ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
+  ASSERT_EQUAL_64(src_base - base_offset, x20);
+  ASSERT_EQUAL_64(dst_base - base_offset, x21);
+  ASSERT_EQUAL_64(src_base + base_offset + 24, x18);
+  ASSERT_EQUAL_64(dst_base + base_offset + 56, x19);
+
+  TEARDOWN();
+}
+
+
 TEST(ldnp_stnp_offset) {
   SETUP();
 
@@ -2763,6 +2907,68 @@
 }
 
 
+TEST(ldp_stp_preindex_wide) {
+  SETUP();
+
+  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
+                     0xffeeddccbbaa9988};
+  uint64_t dst[5] = {0, 0, 0, 0, 0};
+  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+  // Move base too far from the array to force multiple instructions
+  // to be emitted.
+  const int64_t base_offset = 1024;
+
+  START();
+  __ Mov(x24, src_base - base_offset);
+  __ Mov(x25, dst_base + base_offset);
+  __ Mov(x18, dst_base + base_offset + 16);
+  __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex));
+  __ Mov(x19, x24);
+  __ Mov(x24, src_base - base_offset + 4);
+  __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex));
+  __ Stp(w2, w3, MemOperand(x25, 4 - base_offset , PreIndex));
+  __ Mov(x20, x25);
+  __ Mov(x25, dst_base + base_offset + 4);
+  __ Mov(x24, src_base - base_offset);
+  __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex));
+  __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex));
+  __ Mov(x21, x24);
+  __ Mov(x24, src_base - base_offset + 8);
+  __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex));
+  __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex));
+  __ Mov(x22, x18);
+  __ Mov(x18, dst_base + base_offset + 16 + 8);
+  __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex));
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0x00112233, x0);
+  ASSERT_EQUAL_64(0xccddeeff, x1);
+  ASSERT_EQUAL_64(0x44556677, x2);
+  ASSERT_EQUAL_64(0x00112233, x3);
+  ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
+  ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
+  ASSERT_EQUAL_64(0x0011223344556677, x6);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
+  ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
+  ASSERT_EQUAL_64(src_base, x24);
+  ASSERT_EQUAL_64(dst_base, x25);
+  ASSERT_EQUAL_64(dst_base + 16, x18);
+  ASSERT_EQUAL_64(src_base + 4, x19);
+  ASSERT_EQUAL_64(dst_base + 4, x20);
+  ASSERT_EQUAL_64(src_base + 8, x21);
+  ASSERT_EQUAL_64(dst_base + 24, x22);
+
+  TEARDOWN();
+}
+
+
 TEST(ldp_stp_postindex) {
   SETUP();
 
@@ -2817,6 +3023,68 @@
 }
 
 
+TEST(ldp_stp_postindex_wide) {
+  SETUP();
+
+  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
+                     0xffeeddccbbaa9988, 0x7766554433221100};
+  uint64_t dst[5] = {0, 0, 0, 0, 0};
+  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+  // Move base too far from the array to force multiple instructions
+  // to be emitted.
+  const int64_t base_offset = 1024;
+
+  START();
+  __ Mov(x24, src_base);
+  __ Mov(x25, dst_base);
+  __ Mov(x18, dst_base + 16);
+  __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex));
+  __ Mov(x19, x24);
+  __ Sub(x24, x24, base_offset);
+  __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex));
+  __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex));
+  __ Mov(x20, x25);
+  __ Sub(x24, x24, base_offset);
+  __ Add(x25, x25, base_offset);
+  __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex));
+  __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex));
+  __ Mov(x21, x24);
+  __ Sub(x24, x24, base_offset);
+  __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex));
+  __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex));
+  __ Mov(x22, x18);
+  __ Add(x18, x18, base_offset);
+  __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex));
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0x44556677, x0);
+  ASSERT_EQUAL_64(0x00112233, x1);
+  ASSERT_EQUAL_64(0x00112233, x2);
+  ASSERT_EQUAL_64(0xccddeeff, x3);
+  ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
+  ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
+  ASSERT_EQUAL_64(0x0011223344556677, x4);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
+  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
+  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
+  ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
+  ASSERT_EQUAL_64(src_base + base_offset, x24);
+  ASSERT_EQUAL_64(dst_base - base_offset, x25);
+  ASSERT_EQUAL_64(dst_base - base_offset + 16, x18);
+  ASSERT_EQUAL_64(src_base + base_offset + 4, x19);
+  ASSERT_EQUAL_64(dst_base - base_offset + 4, x20);
+  ASSERT_EQUAL_64(src_base + base_offset + 8, x21);
+  ASSERT_EQUAL_64(dst_base - base_offset + 24, x22);
+
+  TEARDOWN();
+}
+
+
 TEST(ldp_sign_extend) {
   SETUP();
 
@@ -2887,6 +3155,8 @@
   START();
   __ Ldr(x2, 0x1234567890abcdef);
   __ Ldr(w3, 0xfedcba09);
+  __ Ldrsw(x4, 0x7fffffff);
+  __ Ldrsw(x5, 0x80000000);
   __ Ldr(d13, 1.234);
   __ Ldr(s25, 2.5);
   END();
@@ -2895,6 +3165,8 @@
 
   ASSERT_EQUAL_64(0x1234567890abcdef, x2);
   ASSERT_EQUAL_64(0xfedcba09, x3);
+  ASSERT_EQUAL_64(0x7fffffff, x4);
+  ASSERT_EQUAL_64(0xffffffff80000000, x5);
   ASSERT_EQUAL_FP64(1.234, d13);
   ASSERT_EQUAL_FP32(2.5, s25);
 
@@ -2902,76 +3174,41 @@
 }
 
 
-static void LdrLiteralRangeHelper(ptrdiff_t range_,
-                                  LiteralPoolEmitOption option,
-                                  bool expect_dump) {
-  VIXL_ASSERT(range_ > 0);
-  SETUP_CUSTOM(range_ + 1024, PositionIndependentCode);
-
-  Label label_1, label_2;
-
-  size_t range = static_cast<size_t>(range_);
-  size_t code_size = 0;
-  size_t pool_guard_size;
-
-  if (option == NoJumpRequired) {
-    // Space for an explicit branch.
-    pool_guard_size = sizeof(Instr);
-  } else {
-    pool_guard_size = 0;
-  }
+TEST(ldr_literal_range) {
+  SETUP();
 
   START();
-  // Force a pool dump so the pool starts off empty.
-  __ EmitLiteralPool(JumpRequired);
+  // Make sure the pool is empty;
+  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
   ASSERT_LITERAL_POOL_SIZE(0);
 
+  // Create some literal pool entries.
   __ Ldr(x0, 0x1234567890abcdef);
   __ Ldr(w1, 0xfedcba09);
+  __ Ldrsw(x2, 0x7fffffff);
+  __ Ldrsw(x3, 0x80000000);
   __ Ldr(d0, 1.234);
   __ Ldr(s1, 2.5);
-  ASSERT_LITERAL_POOL_SIZE(24);
+  ASSERT_LITERAL_POOL_SIZE(32);
 
-  code_size += 4 * sizeof(Instr);
-
-  // Check that the requested range (allowing space for a branch over the pool)
-  // can be handled by this test.
-  VIXL_ASSERT((code_size + pool_guard_size) <= range);
-
-  // Emit NOPs up to 'range', leaving space for the pool guard.
-  while ((code_size + pool_guard_size) < range) {
+  // Emit more code than the maximum literal load range to ensure the pool
+  // should be emitted.
+  const ptrdiff_t offset = masm.CursorOffset();
+  while ((masm.CursorOffset() - offset) < (2 * kMaxLoadLiteralRange)) {
     __ Nop();
-    code_size += sizeof(Instr);
   }
 
-  // Emit the guard sequence before the literal pool.
-  if (option == NoJumpRequired) {
-    __ B(&label_1);
-    code_size += sizeof(Instr);
-  }
-
-  VIXL_ASSERT(code_size == range);
-  ASSERT_LITERAL_POOL_SIZE(24);
-
-  // Possibly generate a literal pool.
-  __ CheckLiteralPool(option);
-  __ Bind(&label_1);
-  if (expect_dump) {
-    ASSERT_LITERAL_POOL_SIZE(0);
-  } else {
-    ASSERT_LITERAL_POOL_SIZE(24);
-  }
-
-  // Force a pool flush to check that a second pool functions correctly.
-  __ EmitLiteralPool(JumpRequired);
+  // The pool should have been emitted.
   ASSERT_LITERAL_POOL_SIZE(0);
 
   // These loads should be after the pool (and will require a new one).
   __ Ldr(x4, 0x34567890abcdef12);
   __ Ldr(w5, 0xdcba09fe);
+  __ Ldrsw(x6, 0x7fffffff);
+  __ Ldrsw(x7, 0x80000000);
   __ Ldr(d4, 123.4);
   __ Ldr(s5, 250.0);
-  ASSERT_LITERAL_POOL_SIZE(24);
+  ASSERT_LITERAL_POOL_SIZE(32);
   END();
 
   RUN();
@@ -2979,10 +3216,14 @@
   // Check that the literals loaded correctly.
   ASSERT_EQUAL_64(0x1234567890abcdef, x0);
   ASSERT_EQUAL_64(0xfedcba09, x1);
+  ASSERT_EQUAL_64(0x7fffffff, x2);
+  ASSERT_EQUAL_64(0xffffffff80000000, x3);
   ASSERT_EQUAL_FP64(1.234, d0);
   ASSERT_EQUAL_FP32(2.5, s1);
   ASSERT_EQUAL_64(0x34567890abcdef12, x4);
   ASSERT_EQUAL_64(0xdcba09fe, x5);
+  ASSERT_EQUAL_64(0x7fffffff, x6);
+  ASSERT_EQUAL_64(0xffffffff80000000, x7);
   ASSERT_EQUAL_FP64(123.4, d4);
   ASSERT_EQUAL_FP32(250.0, s5);
 
@@ -2990,45 +3231,144 @@
 }
 
 
-TEST(ldr_literal_range_1) {
-  LdrLiteralRangeHelper(kRecommendedLiteralPoolRange,
-                        NoJumpRequired,
-                        true);
+template <typename T>
+void LoadIntValueHelper(T values[], int card) {
+  SETUP();
+
+  const bool is_32bits = (sizeof(T) == 4);
+  const Register& tgt1 = is_32bits ? w1 : x1;
+  const Register& tgt2 = is_32bits ? w2 : x2;
+
+  START();
+  __ Mov(x0, 0);
+
+  // If one of the values differ then x0 will be one.
+  for (int i = 0; i < card; ++i) {
+    __ Mov(tgt1, values[i]);
+    __ Ldr(tgt2, values[i]);
+    __ Cmp(tgt1, tgt2);
+    __ Cset(x0, ne);
+  }
+  END();
+
+  RUN();
+
+  // If one of the values differs, the trace can be used to identify which one.
+  ASSERT_EQUAL_64(0, x0);
+
+  TEARDOWN();
 }
 
 
-TEST(ldr_literal_range_2) {
-  LdrLiteralRangeHelper(kRecommendedLiteralPoolRange-sizeof(Instr),
-                        NoJumpRequired,
-                        false);
+TEST(ldr_literal_values_x) {
+  static const uint64_t kValues[] = {
+    0x8000000000000000, 0x7fffffffffffffff, 0x0000000000000000,
+    0xffffffffffffffff, 0x00ff00ff00ff00ff, 0x1234567890abcdef
+  };
+
+  LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
 
 
-TEST(ldr_literal_range_3) {
-  LdrLiteralRangeHelper(2 * kRecommendedLiteralPoolRange,
-                        JumpRequired,
-                        true);
+TEST(ldr_literal_values_w) {
+  static const uint32_t kValues[] = {
+    0x80000000, 0x7fffffff, 0x00000000, 0xffffffff, 0x00ff00ff, 0x12345678,
+    0x90abcdef
+  };
+
+  LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
 
 
-TEST(ldr_literal_range_4) {
-  LdrLiteralRangeHelper(2 * kRecommendedLiteralPoolRange-sizeof(Instr),
-                        JumpRequired,
-                        false);
+template <typename T>
+void LoadFPValueHelper(T values[], int card) {
+  SETUP();
+
+  const bool is_32bits = (sizeof(T) == 4);
+  const FPRegister& fp_tgt = is_32bits ? s2 : d2;
+  const Register& tgt1 = is_32bits ? w1 : x1;
+  const Register& tgt2 = is_32bits ? w2 : x2;
+
+  START();
+  __ Mov(x0, 0);
+
+  // If one of the values differ then x0 will be one.
+  for (int i = 0; i < card; ++i) {
+    __ Mov(tgt1, is_32bits ? float_to_rawbits(values[i])
+                           : double_to_rawbits(values[i]));
+    __ Ldr(fp_tgt, values[i]);
+    __ Fmov(tgt2, fp_tgt);
+    __ Cmp(tgt1, tgt2);
+    __ Cset(x0, ne);
+  }
+  END();
+
+  RUN();
+
+  // If one of the values differs, the trace can be used to identify which one.
+  ASSERT_EQUAL_64(0, x0);
+
+  TEARDOWN();
+}
+
+TEST(ldr_literal_values_d) {
+  static const double kValues[] = {
+    -0.0, 0.0, -1.0, 1.0, -1e10, 1e10
+  };
+
+  LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
 
 
-TEST(ldr_literal_range_5) {
-  LdrLiteralRangeHelper(kLiteralPoolCheckInterval,
-                        JumpRequired,
-                        false);
+TEST(ldr_literal_values_s) {
+  static const float kValues[] = {
+    -0.0, 0.0, -1.0, 1.0, -1e10, 1e10
+  };
+
+  LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
 
 
-TEST(ldr_literal_range_6) {
-  LdrLiteralRangeHelper(kLiteralPoolCheckInterval-sizeof(Instr),
-                        JumpRequired,
-                        false);
+TEST(ldr_literal_custom) {
+  // The macro assembler always emit pools after the instruction using them,
+  // this test emit a pool then use it.
+  SETUP();
+  ALLOW_ASM();
+
+  Label end_of_pool;
+  Literal<uint64_t> literal_x(0x1234567890abcdef);
+  Literal<uint32_t> literal_w(0xfedcba09);
+  Literal<uint32_t> literal_sx(0x80000000);
+  Literal<double> literal_d(1.234);
+  Literal<float> literal_s(2.5);
+
+  START();
+  // "Manually generate a pool.
+  __ B(&end_of_pool);
+  __ place(&literal_x);
+  __ place(&literal_w);
+  __ place(&literal_sx);
+  __ place(&literal_d);
+  __ place(&literal_s);
+  __ Bind(&end_of_pool);
+
+  // now load the entries.
+  __ ldr(x2, &literal_x);
+  __ ldr(w3, &literal_w);
+  __ ldrsw(x5, &literal_sx);
+  __ ldr(d13, &literal_d);
+  __ ldr(s25, &literal_s);
+  END();
+
+  RUN();
+
+  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
+  ASSERT_EQUAL_64(0xfedcba09, x3);
+  ASSERT_EQUAL_64(0xffffffff80000000, x5);
+  ASSERT_EQUAL_FP64(1.234, d13);
+  ASSERT_EQUAL_FP32(2.5, s25);
+
+  TEARDOWN();
 }
 
 
@@ -3994,6 +4334,7 @@
 
 TEST(ccmp) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(w16, 0);
@@ -4014,9 +4355,11 @@
   __ Ccmn(x16, 2, NZCVFlag, ne);
   __ Mrs(x3, NZCV);
 
+  // The MacroAssembler does not allow al as a condition.
   __ ccmp(x16, x16, NZCVFlag, al);
   __ Mrs(x4, NZCV);
 
+  // The MacroAssembler does not allow nv as a condition.
   __ ccmp(x16, x16, NZCVFlag, nv);
   __ Mrs(x5, NZCV);
 
@@ -4104,6 +4447,7 @@
 
 TEST(csel) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(x16, 0);
@@ -4116,6 +4460,7 @@
   __ Csinc(w2, w24, w25, mi);
   __ Csinc(w3, w24, w25, pl);
 
+  // The MacroAssembler does not allow al or nv as a condition.
   __ csel(w13, w24, w25, al);
   __ csel(x14, x24, x25, nv);
 
@@ -4131,6 +4476,7 @@
   __ Cinv(x11, x24, ne);
   __ Cneg(x12, x24, ne);
 
+  // The MacroAssembler does not allow al or nv as a condition.
   __ csel(w15, w24, w25, al);
   __ csel(x17, x24, x25, nv);
 
@@ -4215,6 +4561,7 @@
 
 TEST(lslv) {
   SETUP();
+  ALLOW_ASM();
 
   uint64_t value = 0x0123456789abcdef;
   int shift[] = {1, 3, 5, 9, 17, 33};
@@ -4228,6 +4575,7 @@
   __ Mov(w5, shift[4]);
   __ Mov(w6, shift[5]);
 
+  // The MacroAssembler does not allow zr as an argument.
   __ lslv(x0, x0, xzr);
 
   __ Lsl(x16, x0, x1);
@@ -4267,6 +4615,7 @@
 
 TEST(lsrv) {
   SETUP();
+  ALLOW_ASM();
 
   uint64_t value = 0x0123456789abcdef;
   int shift[] = {1, 3, 5, 9, 17, 33};
@@ -4280,6 +4629,7 @@
   __ Mov(w5, shift[4]);
   __ Mov(w6, shift[5]);
 
+  // The MacroAssembler does not allow zr as an argument.
   __ lsrv(x0, x0, xzr);
 
   __ Lsr(x16, x0, x1);
@@ -4321,6 +4671,7 @@
 
 TEST(asrv) {
   SETUP();
+  ALLOW_ASM();
 
   int64_t value = 0xfedcba98fedcba98;
   int shift[] = {1, 3, 5, 9, 17, 33};
@@ -4334,6 +4685,7 @@
   __ Mov(w5, shift[4]);
   __ Mov(w6, shift[5]);
 
+  // The MacroAssembler does not allow zr as an argument.
   __ asrv(x0, x0, xzr);
 
   __ Asr(x16, x0, x1);
@@ -4375,6 +4727,7 @@
 
 TEST(rorv) {
   SETUP();
+  ALLOW_ASM();
 
   uint64_t value = 0x0123456789abcdef;
   int shift[] = {4, 8, 12, 16, 24, 36};
@@ -4388,6 +4741,7 @@
   __ Mov(w5, shift[4]);
   __ Mov(w6, shift[5]);
 
+  // The MacroAssembler does not allow zr as an argument.
   __ rorv(x0, x0, xzr);
 
   __ Ror(x16, x0, x1);
@@ -4427,6 +4781,7 @@
 
 TEST(bfm) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(x1, 0x0123456789abcdef);
@@ -4438,6 +4793,7 @@
   __ Mov(w20, 0x88888888);
   __ Mov(w21, 0x88888888);
 
+  // There are no macro instruction for bfm.
   __ bfm(x10, x1, 16, 31);
   __ bfm(x11, x1, 32, 15);
 
@@ -4467,11 +4823,13 @@
 
 TEST(sbfm) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(x1, 0x0123456789abcdef);
   __ Mov(x2, 0xfedcba9876543210);
 
+  // There are no macro instruction for sbfm.
   __ sbfm(x10, x1, 16, 31);
   __ sbfm(x11, x1, 32, 15);
   __ sbfm(x12, x1, 32, 47);
@@ -4529,6 +4887,7 @@
 
 TEST(ubfm) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(x1, 0x0123456789abcdef);
@@ -4537,6 +4896,7 @@
   __ Mov(x10, 0x8888888888888888);
   __ Mov(x11, 0x8888888888888888);
 
+  // There are no macro instruction for ubfm.
   __ ubfm(x10, x1, 16, 31);
   __ ubfm(x11, x1, 32, 15);
   __ ubfm(x12, x1, 32, 47);
@@ -5453,6 +5813,7 @@
 
 TEST(fccmp) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Fmov(s16, 0.0);
@@ -5493,6 +5854,7 @@
   __ Fccmp(d18, d19, NFlag, hi);
   __ Mrs(x7, NZCV);
 
+  // The Macro Assembler does not allow al or nv as condition.
   __ fccmp(s16, s16, NFlag, al);
   __ Mrs(x8, NZCV);
 
@@ -5599,6 +5961,7 @@
 
 TEST(fcsel) {
   SETUP();
+  ALLOW_ASM();
 
   START();
   __ Mov(x16, 0);
@@ -5612,6 +5975,7 @@
   __ Fcsel(s1, s16, s17, ne);
   __ Fcsel(d2, d18, d19, eq);
   __ Fcsel(d3, d18, d19, ne);
+  // The Macro Assembler does not allow al or nv as condition.
   __ fcsel(s4, s16, s17, al);
   __ fcsel(d5, d18, d19, nv);
   END();
@@ -7513,6 +7877,7 @@
 
 TEST(zero_dest) {
   SETUP();
+  ALLOW_ASM();
   RegisterDump before;
 
   START();
@@ -7579,6 +7944,7 @@
 
 TEST(zero_dest_setflags) {
   SETUP();
+  ALLOW_ASM();
   RegisterDump before;
 
   START();
@@ -7953,6 +8319,92 @@
 }
 
 
+TEST(peek_poke_reglist) {
+  SETUP();
+  START();
+
+  // The literal base is chosen to have two useful properties:
+  //  * When multiplied by small values (such as a register index), this value
+  //    is clearly readable in the result.
+  //  * The value is not formed from repeating fixed-size smaller values, so it
+  //    can be used to detect endianness-related errors.
+  uint64_t base = 0x0100001000100101;
+
+  // Initialize the registers.
+  __ Mov(x1, base);
+  __ Add(x2, x1, x1);
+  __ Add(x3, x2, x1);
+  __ Add(x4, x3, x1);
+
+  CPURegList list_1(x1, x2, x3, x4);
+  CPURegList list_2(x11, x12, x13, x14);
+  int list_1_size = list_1.TotalSizeInBytes();
+
+  __ Claim(2 * list_1_size);
+
+  __ PokeCPURegList(list_1, 0);
+  __ PokeXRegList(list_1.list(), list_1_size);
+  __ PeekCPURegList(list_2, 2 * kXRegSizeInBytes);
+  __ PeekXRegList(x15.Bit(), kWRegSizeInBytes);
+  __ PeekWRegList(w16.Bit() | w17.Bit(), 3 * kXRegSizeInBytes);
+
+  __ Drop(2 * list_1_size);
+
+
+  uint64_t base_d = 0x1010010001000010;
+
+  // Initialize the registers.
+  __ Mov(x1, base_d);
+  __ Add(x2, x1, x1);
+  __ Add(x3, x2, x1);
+  __ Add(x4, x3, x1);
+  __ Fmov(d1, x1);
+  __ Fmov(d2, x2);
+  __ Fmov(d3, x3);
+  __ Fmov(d4, x4);
+
+  CPURegList list_d_1(d1, d2, d3, d4);
+  CPURegList list_d_2(d11, d12, d13, d14);
+  int list_d_1_size = list_d_1.TotalSizeInBytes();
+
+  __ Claim(2 * list_d_1_size);
+
+  __ PokeCPURegList(list_d_1, 0);
+  __ PokeDRegList(list_d_1.list(), list_d_1_size);
+  __ PeekCPURegList(list_d_2, 2 * kDRegSizeInBytes);
+  __ PeekDRegList(d15.Bit(), kSRegSizeInBytes);
+  __ PeekSRegList(s16.Bit() | s17.Bit(), 3 * kDRegSizeInBytes);
+
+  __ Drop(2 * list_d_1_size);
+
+
+  END();
+  RUN();
+
+  ASSERT_EQUAL_64(3 * base, x11);
+  ASSERT_EQUAL_64(4 * base, x12);
+  ASSERT_EQUAL_64(1 * base, x13);
+  ASSERT_EQUAL_64(2 * base, x14);
+  ASSERT_EQUAL_64(((1 * base) >> kWRegSize) | ((2 * base) << kWRegSize), x15);
+  ASSERT_EQUAL_64(2 * base, x14);
+  ASSERT_EQUAL_32((4 * base) & kWRegMask, w16);
+  ASSERT_EQUAL_32((4 * base) >> kWRegSize, w17);
+
+  ASSERT_EQUAL_FP64(rawbits_to_double(3 * base_d), d11);
+  ASSERT_EQUAL_FP64(rawbits_to_double(4 * base_d), d12);
+  ASSERT_EQUAL_FP64(rawbits_to_double(1 * base_d), d13);
+  ASSERT_EQUAL_FP64(rawbits_to_double(2 * base_d), d14);
+  ASSERT_EQUAL_FP64(
+      rawbits_to_double((base_d >> kSRegSize) | ((2 * base_d) << kSRegSize)),
+      d15);
+  ASSERT_EQUAL_FP64(rawbits_to_double(2 * base_d), d14);
+  ASSERT_EQUAL_FP32(rawbits_to_float((4 * base_d) & kSRegMask), s16);
+  ASSERT_EQUAL_FP32(rawbits_to_float((4 * base_d) >> kSRegSize), s17);
+
+  TEARDOWN();
+}
+
+
 // This enum is used only as an argument to the push-pop test helpers.
 enum PushPopMethod {
   // Push or Pop using the Push and Pop methods, with blocks of up to four
@@ -8804,7 +9256,7 @@
 
 
 TEST(printf) {
-  SETUP_CUSTOM(BUF_SIZE * 2, PositionIndependentCode);
+  SETUP();
   START();
 
   char const * test_plain_string = "Printf with no arguments.\n";
@@ -9065,11 +9517,13 @@
   // By default macro instructions are allowed.
   VIXL_ASSERT(masm.AllowMacroInstructions());
   {
-    InstructionAccurateScope scope1(&masm);
+    InstructionAccurateScope scope1(&masm, 2);
     VIXL_ASSERT(!masm.AllowMacroInstructions());
+    __ nop();
     {
-      InstructionAccurateScope scope2(&masm);
+      InstructionAccurateScope scope2(&masm, 1);
       VIXL_ASSERT(!masm.AllowMacroInstructions());
+      __ nop();
     }
     VIXL_ASSERT(!masm.AllowMacroInstructions());
   }
@@ -10266,6 +10720,7 @@
       memset(dst, 0, kMaxDataLength);
 
       SETUP();
+      ALLOW_ASM();
       START();
 
       __ Mov(x0, src_tagged);
@@ -10410,6 +10865,7 @@
       }
 
       SETUP();
+      ALLOW_ASM();
       START();
 
       // Each MemOperand must apply a pre-index equal to the size of the
@@ -10417,6 +10873,7 @@
 
       // Start with a non-zero preindex.
       int preindex = 63 * kXRegSizeInBytes;
+      int data_length = 0;
 
       __ Mov(x0, src_tagged - preindex);
       __ Mov(x1, dst_tagged - preindex);
@@ -10424,7 +10881,7 @@
       __ ldp(x2, x3, MemOperand(x0, preindex, PreIndex));
       __ stp(x2, x3, MemOperand(x1, preindex, PreIndex));
       preindex = 2 * kXRegSizeInBytes;
-      int data_length = preindex;
+      data_length = preindex;
 
       __ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex));
       __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
@@ -10533,15 +10990,18 @@
       }
 
       SETUP();
+      ALLOW_ASM();
       START();
 
+      int postindex = 2 * kXRegSizeInBytes;
+      int data_length = 0;
+
       __ Mov(x0, src_tagged);
       __ Mov(x1, dst_tagged);
 
-      int postindex = 2 * kXRegSizeInBytes;
       __ ldp(x2, x3, MemOperand(x0, postindex, PostIndex));
       __ stp(x2, x3, MemOperand(x1, postindex, PostIndex));
-      int data_length = postindex;
+      data_length = postindex;
 
       postindex = 2 * kWRegSizeInBytes;
       __ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex));
@@ -10655,6 +11115,7 @@
         }
 
         SETUP();
+        ALLOW_ASM();
         START();
 
         __ Mov(x0, src_tagged);
@@ -10722,4 +11183,113 @@
 }
 
 
+TEST(branch_tagged) {
+  SETUP();
+  START();
+
+  Label loop, loop_entry, done;
+  __ Adr(x0, &loop);
+  __ Mov(x1, 0);
+  __ B(&loop_entry);
+
+  __ Bind(&loop);
+  __ Add(x1, x1, 1);  // Count successful jumps.
+
+  // Advance to the next tag, then bail out if we've come back around to tag 0.
+  __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
+  __ Tst(x0, kAddressTagMask);
+  __ B(eq, &done);
+
+  __ Bind(&loop_entry);
+  __ Br(x0);
+
+  __ Bind(&done);
+
+  END();
+  RUN();
+
+  ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
+
+  TEARDOWN();
+}
+
+
+TEST(branch_and_link_tagged) {
+  SETUP();
+  START();
+
+  Label loop, loop_entry, done;
+  __ Adr(x0, &loop);
+  __ Mov(x1, 0);
+  __ B(&loop_entry);
+
+  __ Bind(&loop);
+
+  // Bail out (before counting a successful jump) if lr appears to be tagged.
+  __ Tst(lr, kAddressTagMask);
+  __ B(ne, &done);
+
+  __ Add(x1, x1, 1);  // Count successful jumps.
+
+  // Advance to the next tag, then bail out if we've come back around to tag 0.
+  __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
+  __ Tst(x0, kAddressTagMask);
+  __ B(eq, &done);
+
+  __ Bind(&loop_entry);
+  __ Blr(x0);
+
+  __ Bind(&done);
+
+  END();
+  RUN();
+
+  ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
+
+  TEARDOWN();
+}
+
+
+TEST(branch_tagged_and_adr_adrp) {
+  SETUP_CUSTOM(BUF_SIZE, PageOffsetDependentCode);
+  START();
+
+  Label loop, loop_entry, done;
+  __ Adr(x0, &loop);
+  __ Mov(x1, 0);
+  __ B(&loop_entry);
+
+  __ Bind(&loop);
+
+  // Bail out (before counting a successful jump) if `adr x10, ...` is tagged.
+  __ Adr(x10, &done);
+  __ Tst(x10, kAddressTagMask);
+  __ B(ne, &done);
+
+  // Bail out (before counting a successful jump) if `adrp x11, ...` is tagged.
+  __ Adrp(x11, &done);
+  __ Tst(x11, kAddressTagMask);
+  __ B(ne, &done);
+
+  __ Add(x1, x1, 1);  // Count successful iterations.
+
+  // Advance to the next tag, then bail out if we've come back around to tag 0.
+  __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
+  __ Tst(x0, kAddressTagMask);
+  __ B(eq, &done);
+
+  __ Bind(&loop_entry);
+  __ Br(x0);
+
+  __ Bind(&done);
+
+  END();
+  RUN();
+
+  ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
+
+  TEARDOWN();
+}
+
+
 }  // namespace vixl
diff --git a/test/test-disasm-a64.cc b/test/test-disasm-a64.cc
index 7f85cba..60cc630 100644
--- a/test/test-disasm-a64.cc
+++ b/test/test-disasm-a64.cc
@@ -36,7 +36,7 @@
 #define EXP_SIZE   (256)
 #define INSTR_SIZE (1024)
 #define SETUP_CLASS(ASMCLASS)                                                  \
-  byte* buf = static_cast<byte*>(malloc(INSTR_SIZE));                          \
+  byte* buf = new byte[INSTR_SIZE];                                            \
   uint32_t encoding = 0;                                                       \
   ASMCLASS* masm = new ASMCLASS(buf, INSTR_SIZE);                              \
   Decoder* decoder = new Decoder();                                            \
@@ -45,9 +45,14 @@
 
 #define SETUP() SETUP_CLASS(Assembler)
 
+#define SETUP_MACRO() SETUP_CLASS(MacroAssembler)
+
 #define COMPARE(ASM, EXP)                                                      \
   masm->Reset();                                                               \
-  masm->ASM;                                                                   \
+  {                                                                            \
+    CodeBufferCheckScope blind(masm);                                          \
+    masm->ASM;                                                                 \
+  }                                                                            \
   masm->FinalizeCode();                                                        \
   decoder->Decode(reinterpret_cast<Instruction*>(buf));                        \
   encoding = *reinterpret_cast<uint32_t*>(buf);                                \
@@ -59,6 +64,21 @@
 
 #define COMPARE_PREFIX(ASM, EXP)                                               \
   masm->Reset();                                                               \
+  {                                                                            \
+    CodeBufferCheckScope blind(masm);                                          \
+    masm->ASM;                                                                 \
+  }                                                                            \
+  masm->FinalizeCode();                                                        \
+  decoder->Decode(reinterpret_cast<Instruction*>(buf));                        \
+  encoding = *reinterpret_cast<uint32_t*>(buf);                                \
+  if (strncmp(disasm->GetOutput(), EXP, strlen(EXP)) != 0) {                   \
+    printf("Encoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",            \
+           encoding, EXP, disasm->GetOutput());                                \
+    abort();                                                                   \
+  }
+
+#define COMPARE_MACRO(ASM, EXP)                                                \
+  masm->Reset();                                                               \
   masm->ASM;                                                                   \
   masm->FinalizeCode();                                                        \
   decoder->Decode(reinterpret_cast<Instruction*>(buf));                        \
@@ -72,7 +92,8 @@
 #define CLEANUP()                                                              \
   delete disasm;                                                               \
   delete decoder;                                                              \
-  delete masm
+  delete masm;                                                                 \
+  delete buf
 
 namespace vixl {
 
@@ -1478,12 +1499,13 @@
 
 
 TEST(load_literal) {
-  SETUP();
+  SETUP_CLASS(MacroAssembler);
 
-  COMPARE_PREFIX(ldr(x10, 0x1234567890abcdef),  "ldr x10, pc+8");
-  COMPARE_PREFIX(ldr(w20, 0xfedcba09),  "ldr w20, pc+8");
-  COMPARE_PREFIX(ldr(d11, 1.234),  "ldr d11, pc+8");
-  COMPARE_PREFIX(ldr(s22, 2.5f),  "ldr s22, pc+8");
+  COMPARE_PREFIX(Ldr(x10, 0x1234567890abcdef),  "ldr x10, pc+0");
+  COMPARE_PREFIX(Ldr(w20, 0xfedcba09),  "ldr w20, pc+0");
+  COMPARE_PREFIX(Ldr(d11, 1.234),  "ldr d11, pc+0");
+  COMPARE_PREFIX(Ldr(s22, 2.5f),  "ldr s22, pc+0");
+  COMPARE_PREFIX(Ldrsw(x21, 0x80000000), "ldrsw x21, pc+0");
 
   CLEANUP();
 }
@@ -1841,8 +1863,8 @@
   VIXL_ASSERT(kTraceOpcode == 0xdeb2);
 
   // All Trace calls should produce the same instruction.
-  COMPARE(Trace(LOG_ALL, TRACE_ENABLE), "hlt #0xdeb2");
-  COMPARE(Trace(LOG_REGS, TRACE_DISABLE), "hlt #0xdeb2");
+  COMPARE_MACRO(Trace(LOG_ALL, TRACE_ENABLE), "hlt #0xdeb2");
+  COMPARE_MACRO(Trace(LOG_REGS, TRACE_DISABLE), "hlt #0xdeb2");
 
   CLEANUP();
 }
@@ -1856,8 +1878,8 @@
   VIXL_ASSERT(kLogOpcode == 0xdeb3);
 
   // All Log calls should produce the same instruction.
-  COMPARE(Log(LOG_ALL), "hlt #0xdeb3");
-  COMPARE(Log(LOG_SYS_REGS), "hlt #0xdeb3");
+  COMPARE_MACRO(Log(LOG_ALL), "hlt #0xdeb3");
+  COMPARE_MACRO(Log(LOG_SYS_REGS), "hlt #0xdeb3");
 
   CLEANUP();
 }
diff --git a/test/test-simulator-a64.cc b/test/test-simulator-a64.cc
index 95cd7eb..9a21824 100644
--- a/test/test-simulator-a64.cc
+++ b/test/test-simulator-a64.cc
@@ -54,8 +54,7 @@
 #ifdef USE_SIMULATOR
 
 #define SETUP()                                                               \
-  byte* buf = new byte[BUF_SIZE];                                             \
-  MacroAssembler masm(buf, BUF_SIZE);                                         \
+  MacroAssembler masm(BUF_SIZE);                                              \
   Decoder decoder;                                                            \
   Simulator* simulator = NULL;                                                \
   if (Cctest::run_debugger()) {                                               \
@@ -95,17 +94,15 @@
   masm.FinalizeCode()
 
 #define RUN()                                                                 \
-  simulator->RunFrom(reinterpret_cast<Instruction*>(buf))
+  simulator->RunFrom(masm.GetStartAddress<Instruction*>())
 
 #define TEARDOWN()                                                            \
-  delete simulator;                                                           \
-  delete[] buf;
+  delete simulator;
 
 #else     // USE_SIMULATOR
 
 #define SETUP()                                                               \
-  byte* buf = new byte[BUF_SIZE];                                             \
-  MacroAssembler masm(buf, BUF_SIZE);                                         \
+  MacroAssembler masm(BUF_SIZE);                                              \
   CPU::SetUp()
 
 #define START()                                                               \
@@ -117,17 +114,19 @@
   __ Ret();                                                                   \
   masm.FinalizeCode()
 
-#define RUN()                                                                 \
-  CPU::EnsureIAndDCacheCoherency(buf, BUF_SIZE);                              \
-  {                                                                           \
-    void (*test_function)(void);                                              \
-    VIXL_ASSERT(sizeof(buf) == sizeof(test_function));                        \
-    memcpy(&test_function, &buf, sizeof(buf));                                \
-    test_function();                                                          \
+#define RUN()                                                                  \
+  {                                                                            \
+    byte* buffer_start = masm.GetStartAddress<byte*>();                        \
+    size_t buffer_length = masm.CursorOffset();                                \
+    void (*test_function)(void);                                               \
+                                                                               \
+    CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
+    VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
+    memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
+    test_function();                                                           \
   }
 
-#define TEARDOWN()                                                            \
-  delete[] buf;
+#define TEARDOWN()
 
 #endif    // USE_SIMULATOR
 
@@ -201,7 +200,10 @@
   __ Bind(&loop_n);
   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
 
-  (masm.*helper)(fd, fn);
+  {
+    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    (masm.*helper)(fd, fn);
+  }
   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
 
   __ Add(index_n, index_n, 1);
@@ -309,8 +311,11 @@
   __ Bind(&loop_m);
   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
 
-  (masm.*helper)(fd, fn, fm);
-  __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
+  {
+    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    (masm.*helper)(fd, fn, fm);
+  }
+    __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
 
   __ Add(index_m, index_m, 1);
   __ Cmp(index_m, inputs_length);
@@ -432,7 +437,10 @@
   __ Bind(&loop_a);
   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
 
-  (masm.*helper)(fd, fn, fm, fa);
+  {
+    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    (masm.*helper)(fd, fn, fm, fa);
+  }
   __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
 
   __ Add(index_a, index_a, 1);
@@ -558,7 +566,10 @@
   __ Bind(&loop_m);
   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
 
-  (masm.*helper)(fn, fm);
+  {
+    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    (masm.*helper)(fn, fm);
+  }
   __ Mrs(flags, NZCV);
   __ Ubfx(flags, flags, 28, 4);
   __ Strb(flags, MemOperand(out, 1, PostIndex));
@@ -678,7 +689,10 @@
   __ Bind(&loop_n);
   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
 
-  (masm.*helper)(fn, 0.0);
+  {
+    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    (masm.*helper)(fn, 0.0);
+  }
   __ Mrs(flags, NZCV);
   __ Ubfx(flags, flags, 28, 4);
   __ Strb(flags, MemOperand(out, 1, PostIndex));
@@ -791,7 +805,10 @@
   __ Bind(&loop_n);
   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
 
-  (masm.*helper)(rd, fn);
+  {
+    CodeBufferCheckScope guard(&masm, kInstructionSize);
+    (masm.*helper)(rd, fn);
+  }
   __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
 
   __ Add(index_n, index_n, 1);
diff --git a/test/test-utils-a64.h b/test/test-utils-a64.h
index 5ecc72a..8884ffe 100644
--- a/test/test-utils-a64.h
+++ b/test/test-utils-a64.h
@@ -225,20 +225,6 @@
 // Clobber or ClobberFP functions.
 void Clobber(MacroAssembler* masm, CPURegList reg_list);
 
-
-template <typename T>
-T GetPCAddress(MacroAssembler* masm, byte * buffer) {
-  VIXL_STATIC_ASSERT(sizeof(T) == sizeof(uintptr_t));
-
-  uintptr_t address =
-      reinterpret_cast<uintptr_t>(buffer) + masm->SizeOfCodeGenerated();
-
-  // Use a C-style cast to get static_cast behaviour for integral types (T), and
-  // reinterpret_cast behaviour for other types.
-  return (T)address;
-}
-
-
 }  // namespace vixl
 
 #endif  // VIXL_A64_TEST_UTILS_A64_H_
diff --git a/tools/make_instruction_doc.pl b/tools/make_instruction_doc.pl
index 5457c38..e1fa37e 100755
--- a/tools/make_instruction_doc.pl
+++ b/tools/make_instruction_doc.pl
@@ -30,7 +30,7 @@
 my $hfile = "src/a64/assembler-a64.h";
 
 # Extra pseudo instructions added to AArch64.
-my @extras = qw/bind debug dci dc32 dc64/;
+my @extras = qw/bind debug dci dc32 dc64 place/;
 
 my %inst = ();  # Global hash of instructions.
 
diff --git a/tools/presubmit.py b/tools/presubmit.py
index cb84ac2..f0cc2ba 100755
--- a/tools/presubmit.py
+++ b/tools/presubmit.py
@@ -61,8 +61,7 @@
   sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
   result.add_argument('--simulator', action='store', choices=['on', 'off'],
                       default=sim_default,
-                      help='''Explicitly enable or disable the simulator. On
-                      this system, the default is "''' + sim_default + '".')
+                      help='Explicitly enable or disable the simulator.')
   return result.parse_args()
 
 
@@ -135,7 +134,9 @@
       name += ' (%s)' % ('debugger' if debugger else 'simulator')
     Test.__init__(self, name)
 
-    self.cctest = './cctest_sim'
+    self.cctest = './cctest'
+    if simulator:
+        self.cctest += '_sim'
     if mode == 'debug':
       self.cctest += '_g'
 
diff --git a/tools/test.py b/tools/test.py
index e79cedd..4344a9d 100755
--- a/tools/test.py
+++ b/tools/test.py
@@ -30,6 +30,7 @@
 import sys
 import argparse
 import re
+import platform
 import subprocess
 import multiprocessing
 import time
@@ -66,6 +67,10 @@
                       help='''Runs the tests using N jobs. If the option is set
                       but no value is provided, the script will use as many jobs
                       as it thinks useful.''')
+  sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
+  result.add_argument('--simulator', action='store', choices=['on', 'off'],
+                      default=sim_default,
+                      help='Explicitly enable or disable the simulator.')
   return result.parse_args()