src/aarch64/simulator-aarch64.h - platform/external/vixl - Git at Google

 // Copyright 2015, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifndef VIXL_AARCH64_SIMULATOR_AARCH64_H_
 #define VIXL_AARCH64_SIMULATOR_AARCH64_H_

 #include <vector>

 #include "../globals-vixl.h"
 #include "../utils-vixl.h"

 #include "cpu-features.h"
 #include "abi-aarch64.h"
 #include "cpu-features-auditor-aarch64.h"
 #include "disasm-aarch64.h"
 #include "instructions-aarch64.h"
 #include "simulator-constants-aarch64.h"

 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64

 // These are only used for the ABI feature, and depend on checks performed for
 // it.
 #ifdef VIXL_HAS_ABI_SUPPORT
 #include <tuple>
 #if __cplusplus >= 201402L
 // Required for `std::index_sequence`
 #include <utility>
 #endif
 #endif

 namespace vixl {
 namespace aarch64 {

 // Representation of memory, with typed getters and setters for access.
 class Memory {
  public:
   template <typename T>
   static T AddressUntag(T address) {
     // Cast the address using a C-style cast. A reinterpret_cast would be
     // appropriate, but it can't cast one integral type to another.
     uint64_t bits = (uint64_t)address;
     return (T)(bits & ~kAddressTagMask);
   }

   template <typename T, typename A>
   static T Read(A address) {
     T value;
     address = AddressUntag(address);
     VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
                 (sizeof(value) == 4) || (sizeof(value) == 8) ||
                 (sizeof(value) == 16));
     memcpy(&value, reinterpret_cast<const char*>(address), sizeof(value));
     return value;
   }

   template <typename A>
   static uint64_t Read(int size_in_bytes, A address) {
     switch (size_in_bytes) {
       case 1:
         return Read<uint8_t>(address);
       case 2:
         return Read<uint16_t>(address);
       case 4:
         return Read<uint32_t>(address);
       case 8:
         return Read<uint64_t>(address);
     }
     VIXL_UNREACHABLE();
     return 0;
   }

   template <typename T, typename A>
   static void Write(A address, T value) {
     address = AddressUntag(address);
     VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
                 (sizeof(value) == 4) || (sizeof(value) == 8) ||
                 (sizeof(value) == 16));
     memcpy(reinterpret_cast<char*>(address), &value, sizeof(value));
   }
 };

 // Represent a register (r0-r31, v0-v31, z0-z31, p0-p15).
 template <unsigned kMaxSizeInBits>
 class SimRegisterBase {
  public:
   static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte;
   VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits);

   SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); }

   unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; }
   unsigned GetSizeInBytes() const { return size_in_bytes_; }

   void SetSizeInBytes(unsigned size_in_bytes) {
     VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes);
     size_in_bytes_ = size_in_bytes;
   }
   void SetSizeInBits(unsigned size_in_bits) {
     VIXL_ASSERT(size_in_bits <= kMaxSizeInBits);
     VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
     SetSizeInBytes(size_in_bits / kBitsPerByte);
   }

   // Write the specified value. The value is zero-extended if necessary.
   template <typename T>
   void Write(T new_value) {
     // All AArch64 registers are zero-extending.
     if (sizeof(new_value) < GetSizeInBytes()) Clear();
     WriteLane(new_value, 0);
     NotifyRegisterWrite();
   }
   template <typename T>
   VIXL_DEPRECATED("Write", void Set(T new_value)) {
     Write(new_value);
   }

   void Clear() {
     memset(value_, 0, kMaxSizeInBytes);
     NotifyRegisterWrite();
   }

   // Insert a typed value into a register, leaving the rest of the register
   // unchanged. The lane parameter indicates where in the register the value
   // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
   // 0 represents the least significant bits.
   template <typename T>
   void Insert(int lane, T new_value) {
     WriteLane(new_value, lane);
     NotifyRegisterWrite();
   }

   // Get the value as the specified type. The value is truncated if necessary.
   template <typename T>
   T Get() const {
     return GetLane<T>(0);
   }

   // Get the lane value as the specified type. The value is truncated if
   // necessary.
   template <typename T>
   T GetLane(int lane) const {
     T result;
     ReadLane(&result, lane);
     return result;
   }
   template <typename T>
   VIXL_DEPRECATED("GetLane", T Get(int lane) const) {
     return GetLane(lane);
   }

   // Get the value of a specific bit, indexed from the least-significant bit of
   // lane 0.
   bool GetBit(int bit) const {
     int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte);
     int byte = bit / (sizeof(value_[0]) * kBitsPerByte);
     return ((value_[byte] >> bit_in_byte) & 1) != 0;
   }

   // Return a pointer to the raw, underlying byte array.
   const uint8_t* GetBytes() const { return value_; }

   // TODO: Make this return a map of updated bytes, so that we can highlight
   // updated lanes for load-and-insert. (That never happens for scalar code, but
   // NEON has some instructions that can update individual lanes.)
   bool WrittenSinceLastLog() const { return written_since_last_log_; }

   void NotifyRegisterLogged() { written_since_last_log_ = false; }

  protected:
   uint8_t value_[kMaxSizeInBytes];

   unsigned size_in_bytes_;

   // Helpers to aid with register tracing.
   bool written_since_last_log_;

   void NotifyRegisterWrite() { written_since_last_log_ = true; }

  private:
   template <typename T>
   void ReadLane(T* dst, int lane) const {
     VIXL_ASSERT(lane >= 0);
     VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes());
     memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst));
   }

   template <typename T>
   void WriteLane(T src, int lane) {
     VIXL_ASSERT(lane >= 0);
     VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes());
     memcpy(&value_[lane * sizeof(src)], &src, sizeof(src));
   }

   // The default ReadLane and WriteLane methods assume what we are copying is
   // "trivially copyable" by using memcpy. We have to provide alternative
   // implementations for SimFloat16 which cannot be copied this way.

   void ReadLane(vixl::internal::SimFloat16* dst, int lane) const {
     uint16_t rawbits;
     ReadLane(&rawbits, lane);
     *dst = RawbitsToFloat16(rawbits);
   }

   void WriteLane(vixl::internal::SimFloat16 src, int lane) {
     WriteLane(Float16ToRawbits(src), lane);
   }
 };

 typedef SimRegisterBase<kXRegSize> SimRegister;      // r0-r31
 typedef SimRegisterBase<kPRegMaxSize> SimPRegister;  // p0-p15
 // FFR has the same format as a predicate register.
 typedef SimPRegister SimFFRRegister;

 // v0-v31 and z0-z31
 class SimVRegister : public SimRegisterBase<kZRegMaxSize> {
  public:
   SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {}

   void NotifyAccessAsZ() { accessed_as_z_ = true; }

   void NotifyRegisterLogged() {
     SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged();
     accessed_as_z_ = false;
   }

   bool AccessedAsZSinceLastLog() const { return accessed_as_z_; }

  private:
   bool accessed_as_z_;
 };

 // Representation of a SVE predicate register.
 class LogicPRegister {
  public:
   inline LogicPRegister(
       SimPRegister& other)  // NOLINT(runtime/references)(runtime/explicit)
       : register_(other) {}

   // Set a conveniently-sized block to 16 bits as the minimum predicate length
   // is 16 bits and allow to be increased to multiples of 16 bits.
   typedef uint16_t ChunkType;

   // Assign a bit into the end positon of the specified lane.
   // The bit is zero-extended if necessary.
   void SetActive(VectorFormat vform, int lane_index, bool value) {
     int psize = LaneSizeInBytesFromFormat(vform);
     int bit_index = lane_index * psize;
     int byte_index = bit_index / kBitsPerByte;
     int bit_offset = bit_index % kBitsPerByte;
     uint8_t byte = register_.GetLane<uint8_t>(byte_index);
     register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value));
   }

   bool IsActive(VectorFormat vform, int lane_index) const {
     int psize = LaneSizeInBytesFromFormat(vform);
     int bit_index = lane_index * psize;
     int byte_index = bit_index / kBitsPerByte;
     int bit_offset = bit_index % kBitsPerByte;
     uint8_t byte = register_.GetLane<uint8_t>(byte_index);
     return ExtractBit(byte, bit_offset);
   }

   // The accessors for bulk processing.
   int GetChunkCount() const {
     VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0);
     return register_.GetSizeInBytes() / sizeof(ChunkType);
   }

   ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); }

   void SetChunk(int lane, ChunkType new_value) {
     SetActiveMask(lane, new_value);
   }

   void SetAllBits() {
     int chunk_size = sizeof(ChunkType) * kBitsPerByte;
     ChunkType bits = GetUintMask(chunk_size);
     for (int lane = 0;
          lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
          lane++) {
       SetChunk(lane, bits);
     }
   }

   template <typename T>
   T GetActiveMask(int lane) const {
     return register_.GetLane<T>(lane);
   }

   template <typename T>
   void SetActiveMask(int lane, T new_value) {
     register_.Insert<T>(lane, new_value);
   }

   void Clear() { register_.Clear(); }

   bool Aliases(const LogicPRegister& other) const {
     return &register_ == &other.register_;
   }

  private:
   // The bit assignment is zero-extended to fill the size of predicate element.
   uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) {
     VIXL_ASSERT(index >= 0);
     VIXL_ASSERT(index + psize <= kBitsPerByte);
     int bits = value ? 1 : 0;
     switch (psize) {
       case 1:
         AssignBit(byte, index, bits);
         break;
       case 2:
         AssignBits(byte, index, 0x03, bits);
         break;
       case 4:
         AssignBits(byte, index, 0x0f, bits);
         break;
       case 8:
         AssignBits(byte, index, 0xff, bits);
         break;
       default:
         VIXL_UNREACHABLE();
         return 0;
     }
     return byte;
   }

   SimPRegister& register_;
 };

 // Representation of a vector register, with typed getters and setters for lanes
 // and additional information to represent lane state.
 class LogicVRegister {
  public:
   inline LogicVRegister(
       SimVRegister& other)  // NOLINT(runtime/references)(runtime/explicit)
       : register_(other) {
     for (size_t i = 0; i < ArrayLength(saturated_); i++) {
       saturated_[i] = kNotSaturated;
     }
     for (size_t i = 0; i < ArrayLength(round_); i++) {
       round_[i] = 0;
     }
   }

   int64_t Int(VectorFormat vform, int index) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     int64_t element;
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         element = register_.GetLane<int8_t>(index);
         break;
       case 16:
         element = register_.GetLane<int16_t>(index);
         break;
       case 32:
         element = register_.GetLane<int32_t>(index);
         break;
       case 64:
         element = register_.GetLane<int64_t>(index);
         break;
       default:
         VIXL_UNREACHABLE();
         return 0;
     }
     return element;
   }

   uint64_t Uint(VectorFormat vform, int index) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     uint64_t element;
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         element = register_.GetLane<uint8_t>(index);
         break;
       case 16:
         element = register_.GetLane<uint16_t>(index);
         break;
       case 32:
         element = register_.GetLane<uint32_t>(index);
         break;
       case 64:
         element = register_.GetLane<uint64_t>(index);
         break;
       default:
         VIXL_UNREACHABLE();
         return 0;
     }
     return element;
   }

   uint64_t UintLeftJustified(VectorFormat vform, int index) const {
     return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
   }

   int64_t IntLeftJustified(VectorFormat vform, int index) const {
     uint64_t value = UintLeftJustified(vform, index);
     int64_t result;
     memcpy(&result, &value, sizeof(result));
     return result;
   }

   void SetInt(VectorFormat vform, int index, int64_t value) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         register_.Insert(index, static_cast<int8_t>(value));
         break;
       case 16:
         register_.Insert(index, static_cast<int16_t>(value));
         break;
       case 32:
         register_.Insert(index, static_cast<int32_t>(value));
         break;
       case 64:
         register_.Insert(index, static_cast<int64_t>(value));
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }
   }

   void SetIntArray(VectorFormat vform, const int64_t* src) const {
     ClearForWrite(vform);
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       SetInt(vform, i, src[i]);
     }
   }

   void SetUint(VectorFormat vform, int index, uint64_t value) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         register_.Insert(index, static_cast<uint8_t>(value));
         break;
       case 16:
         register_.Insert(index, static_cast<uint16_t>(value));
         break;
       case 32:
         register_.Insert(index, static_cast<uint32_t>(value));
         break;
       case 64:
         register_.Insert(index, static_cast<uint64_t>(value));
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }
   }

   void SetUintArray(VectorFormat vform, const uint64_t* src) const {
     ClearForWrite(vform);
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       SetUint(vform, i, src[i]);
     }
   }

   void ReadIntFromMem(VectorFormat vform,
                       unsigned msize_in_bits,
                       int index,
                       uint64_t addr) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     int64_t value;
     switch (msize_in_bits) {
       case 8:
         value = Memory::Read<int8_t>(addr);
         break;
       case 16:
         value = Memory::Read<int16_t>(addr);
         break;
       case 32:
         value = Memory::Read<int32_t>(addr);
         break;
       case 64:
         value = Memory::Read<int64_t>(addr);
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }

     unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform);
     VIXL_ASSERT(esize_in_bits >= msize_in_bits);
     switch (esize_in_bits) {
       case 8:
         register_.Insert(index, static_cast<int8_t>(value));
         break;
       case 16:
         register_.Insert(index, static_cast<int16_t>(value));
         break;
       case 32:
         register_.Insert(index, static_cast<int32_t>(value));
         break;
       case 64:
         register_.Insert(index, static_cast<int64_t>(value));
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }
   }

   void ReadUintFromMem(VectorFormat vform,
                        unsigned msize_in_bits,
                        int index,
                        uint64_t addr) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     uint64_t value;
     switch (msize_in_bits) {
       case 8:
         value = Memory::Read<uint8_t>(addr);
         break;
       case 16:
         value = Memory::Read<uint16_t>(addr);
         break;
       case 32:
         value = Memory::Read<uint32_t>(addr);
         break;
       case 64:
         value = Memory::Read<uint64_t>(addr);
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }

     unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform);
     VIXL_ASSERT(esize_in_bits >= msize_in_bits);
     switch (esize_in_bits) {
       case 8:
         register_.Insert(index, static_cast<uint8_t>(value));
         break;
       case 16:
         register_.Insert(index, static_cast<uint16_t>(value));
         break;
       case 32:
         register_.Insert(index, static_cast<uint32_t>(value));
         break;
       case 64:
         register_.Insert(index, static_cast<uint64_t>(value));
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }
   }

   void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         register_.Insert(index, Memory::Read<uint8_t>(addr));
         break;
       case 16:
         register_.Insert(index, Memory::Read<uint16_t>(addr));
         break;
       case 32:
         register_.Insert(index, Memory::Read<uint32_t>(addr));
         break;
       case 64:
         register_.Insert(index, Memory::Read<uint64_t>(addr));
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }
   }

   void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     uint64_t value = Uint(vform, index);
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         Memory::Write(addr, static_cast<uint8_t>(value));
         break;
       case 16:
         Memory::Write(addr, static_cast<uint16_t>(value));
         break;
       case 32:
         Memory::Write(addr, static_cast<uint32_t>(value));
         break;
       case 64:
         Memory::Write(addr, value);
         break;
     }
   }

   template <typename T>
   T Float(int index) const {
     return register_.GetLane<T>(index);
   }

   template <typename T>
   void SetFloat(int index, T value) const {
     register_.Insert(index, value);
   }

   template <typename T>
   void SetFloat(VectorFormat vform, int index, T value) const {
     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     register_.Insert(index, value);
   }

   // When setting a result in a register larger than the result itself, the top
   // bits of the register must be cleared.
   void ClearForWrite(VectorFormat vform) const {
     // SVE destinations write whole registers, so we have nothing to clear.
     if (IsSVEFormat(vform)) return;

     unsigned size = RegisterSizeInBytesFromFormat(vform);
     for (unsigned i = size; i < register_.GetSizeInBytes(); i++) {
       SetUint(kFormat16B, i, 0);
     }
   }

   // Saturation state for each lane of a vector.
   enum Saturation {
     kNotSaturated = 0,
     kSignedSatPositive = 1 << 0,
     kSignedSatNegative = 1 << 1,
     kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
     kSignedSatUndefined = kSignedSatMask,
     kUnsignedSatPositive = 1 << 2,
     kUnsignedSatNegative = 1 << 3,
     kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
     kUnsignedSatUndefined = kUnsignedSatMask
   };

   // Getters for saturation state.
   Saturation GetSignedSaturation(int index) {
     return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
   }

   Saturation GetUnsignedSaturation(int index) {
     return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
   }

   // Setters for saturation state.
   void ClearSat(int index) { saturated_[index] = kNotSaturated; }

   void SetSignedSat(int index, bool positive) {
     SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
   }

   void SetUnsignedSat(int index, bool positive) {
     SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
   }

   void SetSatFlag(int index, Saturation sat) {
     saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
     VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
     VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
   }

   // Saturate lanes of a vector based on saturation state.
   LogicVRegister& SignedSaturate(VectorFormat vform) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       Saturation sat = GetSignedSaturation(i);
       if (sat == kSignedSatPositive) {
         SetInt(vform, i, MaxIntFromFormat(vform));
       } else if (sat == kSignedSatNegative) {
         SetInt(vform, i, MinIntFromFormat(vform));
       }
     }
     return *this;
   }

   LogicVRegister& UnsignedSaturate(VectorFormat vform) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       Saturation sat = GetUnsignedSaturation(i);
       if (sat == kUnsignedSatPositive) {
         SetUint(vform, i, MaxUintFromFormat(vform));
       } else if (sat == kUnsignedSatNegative) {
         SetUint(vform, i, 0);
       }
     }
     return *this;
   }

   // Getter for rounding state.
   bool GetRounding(int index) { return round_[index]; }

   // Setter for rounding state.
   void SetRounding(int index, bool round) { round_[index] = round; }

   // Round lanes of a vector based on rounding state.
   LogicVRegister& Round(VectorFormat vform) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       SetUint(vform, i, Uint(vform, i) + (GetRounding(i) ? 1 : 0));
     }
     return *this;
   }

   // Unsigned halve lanes of a vector, and use the saturation state to set the
   // top bit.
   LogicVRegister& Uhalve(VectorFormat vform) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       uint64_t val = Uint(vform, i);
       SetRounding(i, (val & 1) == 1);
       val >>= 1;
       if (GetUnsignedSaturation(i) != kNotSaturated) {
         // If the operation causes unsigned saturation, the bit shifted into the
         // most significant bit must be set.
         val |= (MaxUintFromFormat(vform) >> 1) + 1;
       }
       SetInt(vform, i, val);
     }
     return *this;
   }

   // Signed halve lanes of a vector, and use the carry state to set the top bit.
   LogicVRegister& Halve(VectorFormat vform) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       int64_t val = Int(vform, i);
       SetRounding(i, (val & 1) == 1);
       val >>= 1;
       if (GetSignedSaturation(i) != kNotSaturated) {
         // If the operation causes signed saturation, the sign bit must be
         // inverted.
         val ^= (MaxUintFromFormat(vform) >> 1) + 1;
       }
       SetInt(vform, i, val);
     }
     return *this;
   }

   int LaneCountFromFormat(VectorFormat vform) const {
     if (IsSVEFormat(vform)) {
       return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform);
     } else {
       return vixl::aarch64::LaneCountFromFormat(vform);
     }
   }

  private:
   SimVRegister& register_;

   // Allocate one saturation state entry per lane; largest register is type Q,
   // and lanes can be a minimum of one byte wide.
   Saturation saturated_[kZRegMaxSizeInBytes];

   // Allocate one rounding state entry per lane.
   bool round_[kZRegMaxSizeInBytes];
 };

 // Represent an SVE addressing mode and abstract per-lane address generation to
 // make iteration easy.
 //
 // Contiguous accesses are described with a simple base address, the memory
 // occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in
 // each struct (`SetRegCount()`).
 //
 // Scatter-gather accesses also require a SimVRegister and information about how
 // to extract lanes from it.
 class LogicSVEAddressVector {
  public:
   // scalar-plus-scalar
   // scalar-plus-immediate
   explicit LogicSVEAddressVector(uint64_t base)
       : base_(base),
         msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
         reg_count_(1),
         vector_(NULL),
         vector_form_(kFormatUndefined),
         vector_mod_(NO_SVE_OFFSET_MODIFIER),
         vector_shift_(0) {}

   // scalar-plus-vector
   // vector-plus-immediate
   //    `base` should be the constant used for each element. That is, the value
   //    of `xn`, or `#<imm>`.
   //    `vector` should be the SimVRegister with offsets for each element. The
   //    vector format must be specified; SVE scatter/gather accesses typically
   //    support both 32-bit and 64-bit addressing.
   //
   //    `mod` and `shift` correspond to the modifiers applied to each element in
   //    scalar-plus-vector forms, such as those used for unpacking and
   //    sign-extension. They are not used for vector-plus-immediate.
   LogicSVEAddressVector(uint64_t base,
                         const SimVRegister* vector,
                         VectorFormat vform,
                         SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER,
                         int shift = 0)
       : base_(base),
         msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
         reg_count_(1),
         vector_(vector),
         vector_form_(vform),
         vector_mod_(mod),
         vector_shift_(shift) {}

   // Set `msize` -- the memory occupied by each lane -- for address
   // calculations.
   void SetMsizeInBytesLog2(int msize_in_bytes_log2) {
     VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2));
     VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2));
     msize_in_bytes_log2_ = msize_in_bytes_log2;
   }

   bool HasMsize() const {
     return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2;
   }

   int GetMsizeInBytesLog2() const {
     VIXL_ASSERT(HasMsize());
     return msize_in_bytes_log2_;
   }
   int GetMsizeInBitsLog2() const {
     return GetMsizeInBytesLog2() + kBitsPerByteLog2;
   }

   int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); }
   int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); }

   void SetRegCount(int reg_count) {
     VIXL_ASSERT(reg_count >= 1);  // E.g. ld1/st1
     VIXL_ASSERT(reg_count <= 4);  // E.g. ld4/st4
     reg_count_ = reg_count;
   }

   int GetRegCount() const { return reg_count_; }

   // Full per-element address calculation for structured accesses.
   //
   // Note that the register number argument (`reg`) is zero-based.
   uint64_t GetElementAddress(int lane, int reg) const {
     VIXL_ASSERT(reg < GetRegCount());
     // Individual structures are always contiguous in memory, so this
     // implementation works for both contiguous and scatter-gather addressing.
     return GetStructAddress(lane) + (reg * GetMsizeInBytes());
   }

   // Full per-struct address calculation for structured accesses.
   uint64_t GetStructAddress(int lane) const;

   bool IsContiguous() const { return vector_ == NULL; }
   bool IsScatterGather() const { return !IsContiguous(); }

  private:
   uint64_t base_;
   int msize_in_bytes_log2_;
   int reg_count_;

   const SimVRegister* vector_;
   VectorFormat vector_form_;
   SVEOffsetModifier vector_mod_;
   int vector_shift_;

   static const int kUnknownMsizeInBytesLog2 = -1;
 };

 // The proper way to initialize a simulated system register (such as NZCV) is as
 // follows:
 //  SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
 class SimSystemRegister {
  public:
   // The default constructor represents a register which has no writable bits.
   // It is not possible to set its value to anything other than 0.
   SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) {}

   uint32_t GetRawValue() const { return value_; }
   VIXL_DEPRECATED("GetRawValue", uint32_t RawValue() const) {
     return GetRawValue();
   }

   void SetRawValue(uint32_t new_value) {
     value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
   }

   uint32_t ExtractBits(int msb, int lsb) const {
     return ExtractUnsignedBitfield32(msb, lsb, value_);
   }
   VIXL_DEPRECATED("ExtractBits", uint32_t Bits(int msb, int lsb) const) {
     return ExtractBits(msb, lsb);
   }

   int32_t ExtractSignedBits(int msb, int lsb) const {
     return ExtractSignedBitfield32(msb, lsb, value_);
   }
   VIXL_DEPRECATED("ExtractSignedBits",
                   int32_t SignedBits(int msb, int lsb) const) {
     return ExtractSignedBits(msb, lsb);
   }

   void SetBits(int msb, int lsb, uint32_t bits);

   // Default system register values.
   static SimSystemRegister DefaultValueFor(SystemRegister id);

 #define DEFINE_GETTER(Name, HighBit, LowBit, Func)                            \
   uint32_t Get##Name() const { return this->Func(HighBit, LowBit); }          \
   VIXL_DEPRECATED("Get" #Name, uint32_t Name() const) { return Get##Name(); } \
   void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \
   static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);

   SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)

 #undef DEFINE_ZERO_BITS
 #undef DEFINE_GETTER

  protected:
   // Most system registers only implement a few of the bits in the word. Other
   // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
   // describes the bits which are not modifiable.
   SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
       : value_(value), write_ignore_mask_(write_ignore_mask) {}

   uint32_t value_;
   uint32_t write_ignore_mask_;
 };


 class SimExclusiveLocalMonitor {
  public:
   SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
     Clear();
   }

   // Clear the exclusive monitor (like clrex).
   void Clear() {
     address_ = 0;
     size_ = 0;
   }

   // Clear the exclusive monitor most of the time.
   void MaybeClear() {
     if ((seed_ % kSkipClearProbability) != 0) {
       Clear();
     }

     // Advance seed_ using a simple linear congruential generator.
     seed_ = (seed_ * 48271) % 2147483647;
   }

   // Mark the address range for exclusive access (like load-exclusive).
   void MarkExclusive(uint64_t address, size_t size) {
     address_ = address;
     size_ = size;
   }

   // Return true if the address range is marked (like store-exclusive).
   // This helper doesn't implicitly clear the monitor.
   bool IsExclusive(uint64_t address, size_t size) {
     VIXL_ASSERT(size > 0);
     // Be pedantic: Require both the address and the size to match.
     return (size == size_) && (address == address_);
   }

  private:
   uint64_t address_;
   size_t size_;

   const int kSkipClearProbability;
   uint32_t seed_;
 };


 // We can't accurate simulate the global monitor since it depends on external
 // influences. Instead, this implementation occasionally causes accesses to
 // fail, according to kPassProbability.
 class SimExclusiveGlobalMonitor {
  public:
   SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}

   bool IsExclusive(uint64_t address, size_t size) {
     USE(address, size);

     bool pass = (seed_ % kPassProbability) != 0;
     // Advance seed_ using a simple linear congruential generator.
     seed_ = (seed_ * 48271) % 2147483647;
     return pass;
   }

  private:
   const int kPassProbability;
   uint32_t seed_;
 };


 class Simulator : public DecoderVisitor {
  public:
   explicit Simulator(Decoder* decoder, FILE* stream = stdout);
   ~Simulator();

   void ResetState();

   // Run the simulator.
   virtual void Run();
   void RunFrom(const Instruction* first);


 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
   // Templated `RunFrom` version taking care of passing arguments and returning
   // the result value.
   // This allows code like:
   //    int32_t res = simulator.RunFrom<int32_t, int32_t>(GenerateCode(),
   //                                                      0x123);
   // It requires VIXL's ABI features, and C++11 or greater.
   // Also, the initialisation of tuples is incorrect in GCC before 4.9.1:
   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
   template <typename R, typename... P>
   R RunFrom(const Instruction* code, P... arguments) {
     return RunFromStructHelper<R, P...>::Wrapper(this, code, arguments...);
   }

   template <typename R, typename... P>
   struct RunFromStructHelper {
     static R Wrapper(Simulator* simulator,
                      const Instruction* code,
                      P... arguments) {
       ABI abi;
       std::tuple<P...> unused_tuple{
           // TODO: We currently do not support arguments passed on the stack. We
           // could do so by using `WriteGenericOperand()` here, but may need to
           // add features to handle situations where the stack is or is not set
           // up.
           (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand<P>()
                                            .GetCPURegister(),
                                        arguments),
            arguments)...};
       simulator->RunFrom(code);
       return simulator->ReadGenericOperand<R>(abi.GetReturnGenericOperand<R>());
     }
   };

   // Partial specialization when the return type is `void`.
   template <typename... P>
   struct RunFromStructHelper<void, P...> {
     static void Wrapper(Simulator* simulator,
                         const Instruction* code,
                         P... arguments) {
       ABI abi;
       std::tuple<P...> unused_tuple{
           // TODO: We currently do not support arguments passed on the stack. We
           // could do so by using `WriteGenericOperand()` here, but may need to
           // add features to handle situations where the stack is or is not set
           // up.
           (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand<P>()
                                            .GetCPURegister(),
                                        arguments),
            arguments)...};
       simulator->RunFrom(code);
     }
   };
 #endif

   // Execution ends when the PC hits this address.
   static const Instruction* kEndOfSimAddress;

   // Simulation helpers.
   const Instruction* ReadPc() const { return pc_; }
   VIXL_DEPRECATED("ReadPc", const Instruction* pc() const) { return ReadPc(); }

   enum BranchLogMode { LogBranches, NoBranchLog };

   void WritePc(const Instruction* new_pc,
                BranchLogMode log_mode = LogBranches) {
     if (log_mode == LogBranches) LogTakenBranch(new_pc);
     pc_ = Memory::AddressUntag(new_pc);
     pc_modified_ = true;
   }
   VIXL_DEPRECATED("WritePc", void set_pc(const Instruction* new_pc)) {
     return WritePc(new_pc);
   }

   void IncrementPc() {
     if (!pc_modified_) {
       pc_ = pc_->GetNextInstruction();
     }
   }
   VIXL_DEPRECATED("IncrementPc", void increment_pc()) { IncrementPc(); }

   BType ReadBType() const { return btype_; }
   void WriteNextBType(BType btype) { next_btype_ = btype; }
   void UpdateBType() {
     btype_ = next_btype_;
     next_btype_ = DefaultBType;
   }

   // Helper function to determine BType for branches.
   BType GetBTypeFromInstruction(const Instruction* instr) const;

   bool PcIsInGuardedPage() const { return guard_pages_; }
   void SetGuardedPages(bool guard_pages) { guard_pages_ = guard_pages; }

   void ExecuteInstruction() {
     // The program counter should always be aligned.
     VIXL_ASSERT(IsWordAligned(pc_));
     pc_modified_ = false;

     if (movprfx_ != NULL) {
       VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_));
       movprfx_ = NULL;
     }

     // On guarded pages, if BType is not zero, take an exception on any
     // instruction other than BTI, PACI[AB]SP, HLT or BRK.
     if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
       if (pc_->IsPAuth()) {
         Instr i = pc_->Mask(SystemPAuthMask);
         if ((i != PACIASP) && (i != PACIBSP)) {
           VIXL_ABORT_WITH_MSG(
               "Executing non-BTI instruction with wrong BType.");
         }
       } else if (!pc_->IsBti() && !pc_->IsException()) {
         VIXL_ABORT_WITH_MSG("Executing non-BTI instruction with wrong BType.");
       }
     }

     // decoder_->Decode(...) triggers at least the following visitors:
     //  1. The CPUFeaturesAuditor (`cpu_features_auditor_`).
     //  2. The PrintDisassembler (`print_disasm_`), if enabled.
     //  3. The Simulator (`this`).
     // User can add additional visitors at any point, but the Simulator requires
     // that the ordering above is preserved.
     decoder_->Decode(pc_);
     IncrementPc();
     LogAllWrittenRegisters();
     UpdateBType();

     VIXL_CHECK(cpu_features_auditor_.InstructionIsAvailable());
   }

 // Declare all Visitor functions.
 #define DECLARE(A) \
   virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
   VISITOR_LIST_THAT_RETURN(DECLARE)
 #undef DECLARE

   virtual void Visit(Metadata* metadata,
                      const Instruction* instr) VIXL_OVERRIDE;

 #define DECLARE(A) \
   VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
   VISITOR_LIST_THAT_DONT_RETURN(DECLARE)
 #undef DECLARE

   void Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
   void Simulate_PdT_Rn_Rm(const Instruction* instr);
   void Simulate_PdT_Xn_Xm(const Instruction* instr);
   void Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
   void Simulate_ZdB_ZnB_ZmB(const Instruction* instr);
   void Simulate_ZdD_PgM_ZnS(const Instruction* instr);
   void Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr);
   void Simulate_ZdH_PgM_ZnS(const Instruction* instr);
   void Simulate_ZdH_ZnH_ZmH_imm(const Instruction* instr);
   void Simulate_ZdS_PgM_ZnD(const Instruction* instr);
   void Simulate_ZdS_PgM_ZnH(const Instruction* instr);
   void Simulate_ZdS_PgM_ZnS(const Instruction* instr);
   void Simulate_ZdS_ZnH_ZmH_imm(const Instruction* instr);
   void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr);
   void Simulate_ZdT_PgM_ZnT(const Instruction* instr);
   void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
   void Simulate_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr);
   void Simulate_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr);
   void Simulate_ZdT_ZnT_ZmT(const Instruction* instr);
   void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr);
   void Simulate_ZdT_ZnT_const(const Instruction* instr);
   void Simulate_ZdaD_ZnD_ZmD_imm(const Instruction* instr);
   void Simulate_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr);
   void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
   void Simulate_ZdaH_ZnH_ZmH_imm(const Instruction* instr);
   void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
   void Simulate_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr);
   void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr);
   void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
   void Simulate_ZdaS_ZnS_ZmS_imm(const Instruction* instr);
   void Simulate_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
   void Simulate_ZdaT_PgM_ZnTb(const Instruction* instr);
   void Simulate_ZdaT_ZnT_ZmT(const Instruction* instr);
   void Simulate_ZdaT_ZnT_ZmT_const(const Instruction* instr);
   void Simulate_ZdaT_ZnT_const(const Instruction* instr);
   void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr);
   void Simulate_ZdaT_ZnTb_ZmTb_const(const Instruction* instr);
   void Simulate_ZdnD_ZdnD_ZmD_ZkD(const Instruction* instr);
   void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
   void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr);
   void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
   void Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
   void Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr);
   void Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
   void Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr);

   void SimulateSVEHalvingAddSub(const Instruction* instr);
   void SimulateSVESaturatingArithmetic(const Instruction* instr);
   void SimulateSVEIntArithPair(const Instruction* instr);
   void SimulateSVENarrow(const Instruction* instr);
   void SimulateSVEInterleavedArithLong(const Instruction* instr);
   void SimulateSVEShiftLeftImm(const Instruction* instr);
   void SimulateSVEAddSubCarry(const Instruction* instr);
   void SimulateSVEAddSubHigh(const Instruction* instr);
   void SimulateSVEIntMulLongVec(const Instruction* instr);
   void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr);
   void SimulateSVEExclusiveOrRotate(const Instruction* instr);

   // Integer register accessors.

   // Basic accessor: Read the register as the specified type.
   template <typename T>
   T ReadRegister(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
     VIXL_ASSERT(
         code < kNumberOfRegisters ||
         ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
     if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
       T result;
       memset(&result, 0, sizeof(result));
       return result;
     }
     if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) {
       code = 31;
     }
     return registers_[code].Get<T>();
   }
   template <typename T>
   VIXL_DEPRECATED("ReadRegister",
                   T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister)
                       const) {
     return ReadRegister<T>(code, r31mode);
   }

   // Common specialized accessors for the ReadRegister() template.
   int32_t ReadWRegister(unsigned code,
                         Reg31Mode r31mode = Reg31IsZeroRegister) const {
     return ReadRegister<int32_t>(code, r31mode);
   }
   VIXL_DEPRECATED("ReadWRegister",
                   int32_t wreg(unsigned code,
                                Reg31Mode r31mode = Reg31IsZeroRegister) const) {
     return ReadWRegister(code, r31mode);
   }

   int64_t ReadXRegister(unsigned code,
                         Reg31Mode r31mode = Reg31IsZeroRegister) const {
     return ReadRegister<int64_t>(code, r31mode);
   }
   VIXL_DEPRECATED("ReadXRegister",
                   int64_t xreg(unsigned code,
                                Reg31Mode r31mode = Reg31IsZeroRegister) const) {
     return ReadXRegister(code, r31mode);
   }

   SimPRegister& ReadPRegister(unsigned code) {
     VIXL_ASSERT(code < kNumberOfPRegisters);
     return pregisters_[code];
   }

   SimFFRRegister& ReadFFR() { return ffr_register_; }

   // As above, with parameterized size and return type. The value is
   // either zero-extended or truncated to fit, as required.
   template <typename T>
   T ReadRegister(unsigned size,
                  unsigned code,
                  Reg31Mode r31mode = Reg31IsZeroRegister) const {
     uint64_t raw;
     switch (size) {
       case kWRegSize:
         raw = ReadRegister<uint32_t>(code, r31mode);
         break;
       case kXRegSize:
         raw = ReadRegister<uint64_t>(code, r31mode);
         break;
       default:
         VIXL_UNREACHABLE();
         return 0;
     }

     T result;
     VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
     // Copy the result and truncate to fit. This assumes a little-endian host.
     memcpy(&result, &raw, sizeof(result));
     return result;
   }
   template <typename T>
   VIXL_DEPRECATED("ReadRegister",
                   T reg(unsigned size,
                         unsigned code,
                         Reg31Mode r31mode = Reg31IsZeroRegister) const) {
     return ReadRegister<T>(size, code, r31mode);
   }

   // Use int64_t by default if T is not specified.
   int64_t ReadRegister(unsigned size,
                        unsigned code,
                        Reg31Mode r31mode = Reg31IsZeroRegister) const {
     return ReadRegister<int64_t>(size, code, r31mode);
   }
   VIXL_DEPRECATED("ReadRegister",
                   int64_t reg(unsigned size,
                               unsigned code,
                               Reg31Mode r31mode = Reg31IsZeroRegister) const) {
     return ReadRegister(size, code, r31mode);
   }

   enum RegLogMode { LogRegWrites, NoRegLog };

   // Write 'value' into an integer register. The value is zero-extended. This
   // behaviour matches AArch64 register writes.
   //
   // SP may be specified in one of two ways:
   //  - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister)
   //  - (code == 31) && (r31mode == Reg31IsStackPointer)
   template <typename T>
   void WriteRegister(unsigned code,
                      T value,
                      RegLogMode log_mode = LogRegWrites,
                      Reg31Mode r31mode = Reg31IsZeroRegister) {
     if (sizeof(T) < kWRegSizeInBytes) {
       // We use a C-style cast on purpose here.
       // Since we do not have access to 'constepxr if', the casts in this `if`
       // must be valid even if we know the code will never be executed, in
       // particular when `T` is a pointer type.
       int64_t tmp_64bit = (int64_t)value;
       int32_t tmp_32bit = static_cast<int32_t>(tmp_64bit);
       WriteRegister<int32_t>(code, tmp_32bit, log_mode, r31mode);
       return;
     }

     VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
                 (sizeof(T) == kXRegSizeInBytes));
     VIXL_ASSERT(
         (code < kNumberOfRegisters) ||
         ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));

     if (code == 31) {
       if (r31mode == Reg31IsZeroRegister) {
         // Discard writes to the zero register.
         return;
       } else {
         code = kSPRegInternalCode;
       }
     }

     // registers_[31] is the stack pointer.
     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
     registers_[code % kNumberOfRegisters].Write(value);

     if (log_mode == LogRegWrites) {
       LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T)));
     }
   }
   template <typename T>
   VIXL_DEPRECATED("WriteRegister",
                   void set_reg(unsigned code,
                                T value,
                                RegLogMode log_mode = LogRegWrites,
                                Reg31Mode r31mode = Reg31IsZeroRegister)) {
     WriteRegister<T>(code, value, log_mode, r31mode);
   }

   // Common specialized accessors for the set_reg() template.
   void WriteWRegister(unsigned code,
                       int32_t value,
                       RegLogMode log_mode = LogRegWrites,
                       Reg31Mode r31mode = Reg31IsZeroRegister) {
     WriteRegister(code, value, log_mode, r31mode);
   }
   VIXL_DEPRECATED("WriteWRegister",
                   void set_wreg(unsigned code,
                                 int32_t value,
                                 RegLogMode log_mode = LogRegWrites,
                                 Reg31Mode r31mode = Reg31IsZeroRegister)) {
     WriteWRegister(code, value, log_mode, r31mode);
   }

   void WriteXRegister(unsigned code,
                       int64_t value,
                       RegLogMode log_mode = LogRegWrites,
                       Reg31Mode r31mode = Reg31IsZeroRegister) {
     WriteRegister(code, value, log_mode, r31mode);
   }
   VIXL_DEPRECATED("WriteXRegister",
                   void set_xreg(unsigned code,
                                 int64_t value,
                                 RegLogMode log_mode = LogRegWrites,
                                 Reg31Mode r31mode = Reg31IsZeroRegister)) {
     WriteXRegister(code, value, log_mode, r31mode);
   }

   // As above, with parameterized size and type. The value is either
   // zero-extended or truncated to fit, as required.
   template <typename T>
   void WriteRegister(unsigned size,
                      unsigned code,
                      T value,
                      RegLogMode log_mode = LogRegWrites,
                      Reg31Mode r31mode = Reg31IsZeroRegister) {
     // Zero-extend the input.
     uint64_t raw = 0;
     VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
     memcpy(&raw, &value, sizeof(value));

     // Write (and possibly truncate) the value.
     switch (size) {
       case kWRegSize:
         WriteRegister(code, static_cast<uint32_t>(raw), log_mode, r31mode);
         break;
       case kXRegSize:
         WriteRegister(code, raw, log_mode, r31mode);
         break;
       default:
         VIXL_UNREACHABLE();
         return;
     }
   }
   template <typename T>
   VIXL_DEPRECATED("WriteRegister",
                   void set_reg(unsigned size,
                                unsigned code,
                                T value,
                                RegLogMode log_mode = LogRegWrites,
                                Reg31Mode r31mode = Reg31IsZeroRegister)) {
     WriteRegister(size, code, value, log_mode, r31mode);
   }

   // Common specialized accessors for the set_reg() template.

   // Commonly-used special cases.
   template <typename T>
   void WriteLr(T value) {
     WriteRegister(kLinkRegCode, value);
   }
   template <typename T>
   VIXL_DEPRECATED("WriteLr", void set_lr(T value)) {
     WriteLr(value);
   }

   template <typename T>
   void WriteSp(T value) {
     WriteRegister(31, value, LogRegWrites, Reg31IsStackPointer);
   }
   template <typename T>
   VIXL_DEPRECATED("WriteSp", void set_sp(T value)) {
     WriteSp(value);
   }

   // Vector register accessors.
   // These are equivalent to the integer register accessors, but for vector
   // registers.

   // A structure for representing a 128-bit Q register.
   struct qreg_t {
     uint8_t val[kQRegSizeInBytes];
   };

   // A structure for representing a SVE Z register.
   struct zreg_t {
     uint8_t val[kZRegMaxSizeInBytes];
   };

   // Basic accessor: read the register as the specified type.
   template <typename T>
   T ReadVRegister(unsigned code) const {
     VIXL_STATIC_ASSERT(
         (sizeof(T) == kBRegSizeInBytes) || (sizeof(T) == kHRegSizeInBytes) ||
         (sizeof(T) == kSRegSizeInBytes) || (sizeof(T) == kDRegSizeInBytes) ||
         (sizeof(T) == kQRegSizeInBytes));
     VIXL_ASSERT(code < kNumberOfVRegisters);

     return vregisters_[code].Get<T>();
   }
   template <typename T>
   VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned code) const) {
     return ReadVRegister<T>(code);
   }

   // Common specialized accessors for the vreg() template.
   int8_t ReadBRegister(unsigned code) const {
     return ReadVRegister<int8_t>(code);
   }
   VIXL_DEPRECATED("ReadBRegister", int8_t breg(unsigned code) const) {
     return ReadBRegister(code);
   }

   vixl::internal::SimFloat16 ReadHRegister(unsigned code) const {
     return RawbitsToFloat16(ReadHRegisterBits(code));
   }
   VIXL_DEPRECATED("ReadHRegister", int16_t hreg(unsigned code) const) {
     return Float16ToRawbits(ReadHRegister(code));
   }

   uint16_t ReadHRegisterBits(unsigned code) const {
     return ReadVRegister<uint16_t>(code);
   }

   float ReadSRegister(unsigned code) const {
     return ReadVRegister<float>(code);
   }
   VIXL_DEPRECATED("ReadSRegister", float sreg(unsigned code) const) {
     return ReadSRegister(code);
   }

   uint32_t ReadSRegisterBits(unsigned code) const {
     return ReadVRegister<uint32_t>(code);
   }
   VIXL_DEPRECATED("ReadSRegisterBits",
                   uint32_t sreg_bits(unsigned code) const) {
     return ReadSRegisterBits(code);
   }

   double ReadDRegister(unsigned code) const {
     return ReadVRegister<double>(code);
   }
   VIXL_DEPRECATED("ReadDRegister", double dreg(unsigned code) const) {
     return ReadDRegister(code);
   }

   uint64_t ReadDRegisterBits(unsigned code) const {
     return ReadVRegister<uint64_t>(code);
   }
   VIXL_DEPRECATED("ReadDRegisterBits",
                   uint64_t dreg_bits(unsigned code) const) {
     return ReadDRegisterBits(code);
   }

   qreg_t ReadQRegister(unsigned code) const {
     return ReadVRegister<qreg_t>(code);
   }
   VIXL_DEPRECATED("ReadQRegister", qreg_t qreg(unsigned code) const) {
     return ReadQRegister(code);
   }

   // As above, with parameterized size and return type. The value is
   // either zero-extended or truncated to fit, as required.
   template <typename T>
   T ReadVRegister(unsigned size, unsigned code) const {
     uint64_t raw = 0;
     T result;

     switch (size) {
       case kSRegSize:
         raw = ReadVRegister<uint32_t>(code);
         break;
       case kDRegSize:
         raw = ReadVRegister<uint64_t>(code);
         break;
       default:
         VIXL_UNREACHABLE();
         break;
     }

     VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
     // Copy the result and truncate to fit. This assumes a little-endian host.
     memcpy(&result, &raw, sizeof(result));
     return result;
   }
   template <typename T>
   VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned size, unsigned code) const) {
     return ReadVRegister<T>(size, code);
   }

   SimVRegister& ReadVRegister(unsigned code) { return vregisters_[code]; }
   VIXL_DEPRECATED("ReadVRegister", SimVRegister& vreg(unsigned code)) {
     return ReadVRegister(code);
   }

   // Basic accessor: Write the specified value.
   template <typename T>
   void WriteVRegister(unsigned code,
                       T value,
                       RegLogMode log_mode = LogRegWrites) {
     VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
                        (sizeof(value) == kHRegSizeInBytes) ||
                        (sizeof(value) == kSRegSizeInBytes) ||
                        (sizeof(value) == kDRegSizeInBytes) ||
                        (sizeof(value) == kQRegSizeInBytes) ||
                        (sizeof(value) == kZRegMaxSizeInBytes));
     VIXL_ASSERT(code < kNumberOfVRegisters);
     vregisters_[code].Write(value);

     if (log_mode == LogRegWrites) {
       LogVRegister(code, GetPrintRegisterFormat(value));
     }
   }
   template <typename T>
   VIXL_DEPRECATED("WriteVRegister",
                   void set_vreg(unsigned code,
                                 T value,
                                 RegLogMode log_mode = LogRegWrites)) {
     WriteVRegister(code, value, log_mode);
   }

   // Common specialized accessors for the WriteVRegister() template.
   void WriteBRegister(unsigned code,
                       int8_t value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteBRegister",
                   void set_breg(unsigned code,
                                 int8_t value,
                                 RegLogMode log_mode = LogRegWrites)) {
     return WriteBRegister(code, value, log_mode);
   }

   void WriteHRegister(unsigned code,
                       vixl::internal::SimFloat16 value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, Float16ToRawbits(value), log_mode);
   }

   void WriteHRegister(unsigned code,
                       int16_t value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteHRegister",
                   void set_hreg(unsigned code,
                                 int16_t value,
                                 RegLogMode log_mode = LogRegWrites)) {
     return WriteHRegister(code, value, log_mode);
   }

   void WriteSRegister(unsigned code,
                       float value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteSRegister",
                   void set_sreg(unsigned code,
                                 float value,
                                 RegLogMode log_mode = LogRegWrites)) {
     WriteSRegister(code, value, log_mode);
   }

   void WriteSRegisterBits(unsigned code,
                           uint32_t value,
                           RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteSRegisterBits",
                   void set_sreg_bits(unsigned code,
                                      uint32_t value,
                                      RegLogMode log_mode = LogRegWrites)) {
     WriteSRegisterBits(code, value, log_mode);
   }

   void WriteDRegister(unsigned code,
                       double value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteDRegister",
                   void set_dreg(unsigned code,
                                 double value,
                                 RegLogMode log_mode = LogRegWrites)) {
     WriteDRegister(code, value, log_mode);
   }

   void WriteDRegisterBits(unsigned code,
                           uint64_t value,
                           RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteDRegisterBits",
                   void set_dreg_bits(unsigned code,
                                      uint64_t value,
                                      RegLogMode log_mode = LogRegWrites)) {
     WriteDRegisterBits(code, value, log_mode);
   }

   void WriteQRegister(unsigned code,
                       qreg_t value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }
   VIXL_DEPRECATED("WriteQRegister",
                   void set_qreg(unsigned code,
                                 qreg_t value,
                                 RegLogMode log_mode = LogRegWrites)) {
     WriteQRegister(code, value, log_mode);
   }

   void WriteZRegister(unsigned code,
                       zreg_t value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister(code, value, log_mode);
   }

   template <typename T>
   T ReadRegister(Register reg) const {
     return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister);
   }

   template <typename T>
   void WriteRegister(Register reg,
                      T value,
                      RegLogMode log_mode = LogRegWrites) {
     WriteRegister<T>(reg.GetCode(), value, log_mode, Reg31IsZeroRegister);
   }

   template <typename T>
   T ReadVRegister(VRegister vreg) const {
     return ReadVRegister<T>(vreg.GetCode());
   }

   template <typename T>
   void WriteVRegister(VRegister vreg,
                       T value,
                       RegLogMode log_mode = LogRegWrites) {
     WriteVRegister<T>(vreg.GetCode(), value, log_mode);
   }

   template <typename T>
   T ReadCPURegister(CPURegister reg) const {
     if (reg.IsVRegister()) {
       return ReadVRegister<T>(VRegister(reg));
     } else {
       return ReadRegister<T>(Register(reg));
     }
   }

   template <typename T>
   void WriteCPURegister(CPURegister reg,
                         T value,
                         RegLogMode log_mode = LogRegWrites) {
     if (reg.IsVRegister()) {
       WriteVRegister<T>(VRegister(reg), value, log_mode);
     } else {
       WriteRegister<T>(Register(reg), value, log_mode);
     }
   }

   uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const;

   template <typename T>
   T ReadGenericOperand(GenericOperand operand) const {
     if (operand.IsCPURegister()) {
       return ReadCPURegister<T>(operand.GetCPURegister());
     } else {
       VIXL_ASSERT(operand.IsMemOperand());
       return Memory::Read<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
     }
   }

   template <typename T>
   void WriteGenericOperand(GenericOperand operand,
                            T value,
                            RegLogMode log_mode = LogRegWrites) {
     if (operand.IsCPURegister()) {
       // Outside SIMD, registers are 64-bit or a subset of a 64-bit register. If
       // the width of the value to write is smaller than 64 bits, the unused
       // bits may contain unrelated values that the code following this write
       // needs to handle gracefully.
       // Here we fill the unused bits with a predefined pattern to catch issues
       // early.
       VIXL_ASSERT(operand.GetCPURegister().GetSizeInBits() <= 64);
       uint64_t raw = 0xdeadda1adeadda1a;
       memcpy(&raw, &value, sizeof(value));
       WriteCPURegister(operand.GetCPURegister(), raw, log_mode);
     } else {
       VIXL_ASSERT(operand.IsMemOperand());
       Memory::Write(ComputeMemOperandAddress(operand.GetMemOperand()), value);
     }
   }

   bool ReadN() const { return nzcv_.GetN() != 0; }
   VIXL_DEPRECATED("ReadN", bool N() const) { return ReadN(); }

   bool ReadZ() const { return nzcv_.GetZ() != 0; }
   VIXL_DEPRECATED("ReadZ", bool Z() const) { return ReadZ(); }

   bool ReadC() const { return nzcv_.GetC() != 0; }
   VIXL_DEPRECATED("ReadC", bool C() const) { return ReadC(); }

   bool ReadV() const { return nzcv_.GetV() != 0; }
   VIXL_DEPRECATED("ReadV", bool V() const) { return ReadV(); }

   SimSystemRegister& ReadNzcv() { return nzcv_; }
   VIXL_DEPRECATED("ReadNzcv", SimSystemRegister& nzcv()) { return ReadNzcv(); }

   // TODO: Find a way to make the fpcr_ members return the proper types, so
   // these accessors are not necessary.
   FPRounding ReadRMode() const {
     return static_cast<FPRounding>(fpcr_.GetRMode());
   }
   VIXL_DEPRECATED("ReadRMode", FPRounding RMode()) { return ReadRMode(); }

   UseDefaultNaN ReadDN() const {
     return fpcr_.GetDN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN;
   }

   VIXL_DEPRECATED("ReadDN", bool DN()) {
     return ReadDN() == kUseDefaultNaN ? true : false;
   }

   SimSystemRegister& ReadFpcr() { return fpcr_; }
   VIXL_DEPRECATED("ReadFpcr", SimSystemRegister& fpcr()) { return ReadFpcr(); }

   // Specify relevant register formats for Print(V)Register and related helpers.
   enum PrintRegisterFormat {
     // The lane size.
     kPrintRegLaneSizeB = 0 << 0,
     kPrintRegLaneSizeH = 1 << 0,
     kPrintRegLaneSizeS = 2 << 0,
     kPrintRegLaneSizeW = kPrintRegLaneSizeS,
     kPrintRegLaneSizeD = 3 << 0,
     kPrintRegLaneSizeX = kPrintRegLaneSizeD,
     kPrintRegLaneSizeQ = 4 << 0,
     kPrintRegLaneSizeUnknown = 5 << 0,

     kPrintRegLaneSizeOffset = 0,
     kPrintRegLaneSizeMask = 7 << 0,

     // The overall register size.
     kPrintRegAsScalar = 0,
     kPrintRegAsDVector = 1 << 3,
     kPrintRegAsQVector = 2 << 3,
     kPrintRegAsSVEVector = 3 << 3,

     kPrintRegAsVectorMask = 3 << 3,

     // Indicate floating-point format lanes. (This flag is only supported for
     // S-, H-, and D-sized lanes.)
     kPrintRegAsFP = 1 << 5,

     // With this flag, print helpers won't check that the upper bits are zero.
     // This also forces the register name to be printed with the `reg<msb:0>`
     // format.
     //
     // The flag is supported with any PrintRegisterFormat other than those with
     // kPrintRegAsSVEVector.
     kPrintRegPartial = 1 << 6,

 // Supported combinations.
 // These exist so that they can be referred to by name, but also because C++
 // does not allow enum types to hold values that aren't explicitly
 // enumerated, and we want to be able to combine the above flags.

 // Scalar formats.
 #define VIXL_DECL_PRINT_REG_SCALAR(size)                           \
   kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \
   kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial
 #define VIXL_DECL_PRINT_REG_SCALAR_FP(size)                  \
   VIXL_DECL_PRINT_REG_SCALAR(size)                           \
   , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \
     kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP
     VIXL_DECL_PRINT_REG_SCALAR(W),
     VIXL_DECL_PRINT_REG_SCALAR(X),
     VIXL_DECL_PRINT_REG_SCALAR_FP(H),
     VIXL_DECL_PRINT_REG_SCALAR_FP(S),
     VIXL_DECL_PRINT_REG_SCALAR_FP(D),
     VIXL_DECL_PRINT_REG_SCALAR(Q),
 #undef VIXL_DECL_PRINT_REG_SCALAR
 #undef VIXL_DECL_PRINT_REG_SCALAR_FP

 #define VIXL_DECL_PRINT_REG_NEON(count, type, size)                     \
   kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \
   kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial
 #define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size)                   \
   VIXL_DECL_PRINT_REG_NEON(count, type, size)                            \
   , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \
     kPrintReg##count##type##PartialFP =                                  \
         kPrintReg##count##type##Partial | kPrintRegAsFP
     VIXL_DECL_PRINT_REG_NEON(1, B, Scalar),
     VIXL_DECL_PRINT_REG_NEON(8, B, DVector),
     VIXL_DECL_PRINT_REG_NEON(16, B, QVector),
     VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar),
     VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector),
     VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector),
     VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar),
     VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector),
     VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector),
     VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar),
     VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector),
     VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar),
 #undef VIXL_DECL_PRINT_REG_NEON
 #undef VIXL_DECL_PRINT_REG_NEON_FP

 #define VIXL_DECL_PRINT_REG_SVE(type)                                 \
   kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \
   kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial
 #define VIXL_DECL_PRINT_REG_SVE_FP(type)                       \
   VIXL_DECL_PRINT_REG_SVE(type)                                \
   , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \
     kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP
     VIXL_DECL_PRINT_REG_SVE(B),
     VIXL_DECL_PRINT_REG_SVE_FP(H),
     VIXL_DECL_PRINT_REG_SVE_FP(S),
     VIXL_DECL_PRINT_REG_SVE_FP(D),
     VIXL_DECL_PRINT_REG_SVE(Q)
 #undef VIXL_DECL_PRINT_REG_SVE
 #undef VIXL_DECL_PRINT_REG_SVE_FP
   };

   // Return `format` with the kPrintRegPartial flag set.
   PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) {
     // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the
     // result of this cast will always be well-defined.
     return static_cast<PrintRegisterFormat>(format | kPrintRegPartial);
   }

   // For SVE formats, return the format of a Q register part of it.
   PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) {
     VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
     // Keep the FP and lane size fields.
     int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP);
     // The resulting format must always be partial, because we're not formatting
     // the whole Z register.
     q_format |= (kPrintRegAsQVector | kPrintRegPartial);

     // This cast is always safe because NEON QVector formats support every
     // combination of FP and lane size that SVE formats do.
     return static_cast<PrintRegisterFormat>(q_format);
   }

   unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
     VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown);
     return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
   }

   unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
     return 1 << GetPrintRegLaneSizeInBytesLog2(format);
   }

   unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
     switch (format & kPrintRegAsVectorMask) {
       case kPrintRegAsScalar:
         return GetPrintRegLaneSizeInBytesLog2(format);
       case kPrintRegAsDVector:
         return kDRegSizeInBytesLog2;
       case kPrintRegAsQVector:
         return kQRegSizeInBytesLog2;
       default:
       case kPrintRegAsSVEVector:
         // We print SVE vectors in Q-sized chunks. These need special handling,
         // and it's probably an error to call this function in that case.
         VIXL_UNREACHABLE();
         return kQRegSizeInBytesLog2;
     }
   }

   unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
     return 1 << GetPrintRegSizeInBytesLog2(format);
   }

   unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) {
     return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2;
   }

   unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) {
     return 1 << GetPrintRegSizeInBitsLog2(format);
   }

   const char* GetPartialRegSuffix(PrintRegisterFormat format) {
     switch (GetPrintRegSizeInBitsLog2(format)) {
       case kBRegSizeLog2:
         return "<7:0>";
       case kHRegSizeLog2:
         return "<15:0>";
       case kSRegSizeLog2:
         return "<31:0>";
       case kDRegSizeLog2:
         return "<63:0>";
       case kQRegSizeLog2:
         return "<127:0>";
     }
     VIXL_UNREACHABLE();
     return "<UNKNOWN>";
   }

   unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
     unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
     unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
     VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
     return 1 << (reg_size_log2 - lane_size_log2);
   }

   uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) {
     int print_as = format & kPrintRegAsVectorMask;
     if (print_as == kPrintRegAsScalar) return 1;

     // Vector formats, including SVE formats printed in Q-sized chunks.
     static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001};
     unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format);
     VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks));
     uint16_t mask = masks[size_in_bytes_log2];

     // Exclude lanes that aren't visible in D vectors.
     if (print_as == kPrintRegAsDVector) mask &= 0x00ff;
     return mask;
   }

   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
                                                     unsigned lane_size);

   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
     return GetPrintRegisterFormatForSize(size, size);
   }

   PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
     switch (size) {
       default:
         VIXL_UNREACHABLE();
         return kPrintDReg;
       case kDRegSizeInBytes:
         return kPrintDReg;
       case kSRegSizeInBytes:
         return kPrintSReg;
       case kHRegSizeInBytes:
         return kPrintHReg;
     }
   }

   PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
     if ((GetPrintRegLaneSizeInBytes(format) == kHRegSizeInBytes) ||
         (GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
         (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
       return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
     }
     return format;
   }

   PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) {
     return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size));
   }

   template <typename T>
   PrintRegisterFormat GetPrintRegisterFormat(T value) {
     return GetPrintRegisterFormatForSize(sizeof(value));
   }

   PrintRegisterFormat GetPrintRegisterFormat(double value) {
     VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
     return GetPrintRegisterFormatForSizeFP(sizeof(value));
   }

   PrintRegisterFormat GetPrintRegisterFormat(float value) {
     VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
     return GetPrintRegisterFormatForSizeFP(sizeof(value));
   }

   PrintRegisterFormat GetPrintRegisterFormat(Float16 value) {
     VIXL_STATIC_ASSERT(sizeof(Float16ToRawbits(value)) == kHRegSizeInBytes);
     return GetPrintRegisterFormatForSizeFP(sizeof(Float16ToRawbits(value)));
   }

   PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
   PrintRegisterFormat GetPrintRegisterFormatFP(VectorFormat vform);

   // Print all registers of the specified types.
   void PrintRegisters();
   void PrintVRegisters();
   void PrintZRegisters();
   void PrintSystemRegisters();

   // As above, but only print the registers that have been updated.
   void PrintWrittenRegisters();
   void PrintWrittenVRegisters();
   void PrintWrittenPRegisters();

   // As above, but respect LOG_REG and LOG_VREG.
   void LogWrittenRegisters() {
     if (ShouldTraceRegs()) PrintWrittenRegisters();
   }
   void LogWrittenVRegisters() {
     if (ShouldTraceVRegs()) PrintWrittenVRegisters();
   }
   void LogWrittenPRegisters() {
     if (ShouldTraceVRegs()) PrintWrittenPRegisters();
   }
   void LogAllWrittenRegisters() {
     LogWrittenRegisters();
     LogWrittenVRegisters();
     LogWrittenPRegisters();
   }

   // The amount of space to leave for a register name. This is used to keep the
   // values vertically aligned. The longest register name has the form
   // "z31<2047:1920>". The total overall value indentation must also take into
   // account the fixed formatting: "# {name}: 0x{value}".
   static const int kPrintRegisterNameFieldWidth = 14;

   // Print whole, individual register values.
   // - The format can be used to restrict how much of the register is printed,
   //   but such formats indicate that the unprinted high-order bits are zero and
   //   these helpers will assert that.
   // - If the format includes the kPrintRegAsFP flag then human-friendly FP
   //   value annotations will be printed.
   // - The suffix can be used to add annotations (such as memory access
   //   details), or to suppress the newline.
   void PrintRegister(int code,
                      PrintRegisterFormat format = kPrintXReg,
                      const char* suffix = "\n");
   void PrintVRegister(int code,
                       PrintRegisterFormat format = kPrintReg1Q,
                       const char* suffix = "\n");
   // PrintZRegister and PrintPRegister print over several lines, so they cannot
   // allow the suffix to be overridden.
   void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
   void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
   void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ);
   // Print a single Q-sized part of a Z register, or the corresponding two-byte
   // part of a P register. These print single lines, and therefore allow the
   // suffix to be overridden. The format must include the kPrintRegPartial flag.
   void PrintPartialZRegister(int code,
                              int q_index,
                              PrintRegisterFormat format = kPrintRegVnQ,
                              const char* suffix = "\n");
   void PrintPartialPRegister(int code,
                              int q_index,
                              PrintRegisterFormat format = kPrintRegVnQ,
                              const char* suffix = "\n");
   void PrintPartialPRegister(const char* name,
                              const SimPRegister& reg,
                              int q_index,
                              PrintRegisterFormat format = kPrintRegVnQ,
                              const char* suffix = "\n");

   // Like Print*Register (above), but respect trace parameters.
   void LogRegister(unsigned code, PrintRegisterFormat format) {
     if (ShouldTraceRegs()) PrintRegister(code, format);
   }
   void LogVRegister(unsigned code, PrintRegisterFormat format) {
     if (ShouldTraceVRegs()) PrintVRegister(code, format);
   }
   void LogZRegister(unsigned code, PrintRegisterFormat format) {
     if (ShouldTraceVRegs()) PrintZRegister(code, format);
   }
   void LogPRegister(unsigned code, PrintRegisterFormat format) {
     if (ShouldTraceVRegs()) PrintPRegister(code, format);
   }
   void LogFFR(PrintRegisterFormat format) {
     if (ShouldTraceVRegs()) PrintFFR(format);
   }

   // Other state updates, including system registers.
   void PrintSystemRegister(SystemRegister id);
   void PrintTakenBranch(const Instruction* target);
   void LogSystemRegister(SystemRegister id) {
     if (ShouldTraceSysRegs()) PrintSystemRegister(id);
   }
   void LogTakenBranch(const Instruction* target) {
     if (ShouldTraceBranches()) PrintTakenBranch(target);
   }

   // Trace memory accesses.

   // Common, contiguous register accesses (such as for scalars).
   // The *Write variants automatically set kPrintRegPartial on the format.
   void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
   void PrintExtendingRead(int rt_code,
                           PrintRegisterFormat format,
                           int access_size_in_bytes,
                           uintptr_t address);
   void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
   void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
   void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
   // Simple, unpredicated SVE accesses always access the whole vector, and never
   // know the lane type, so there's no need to accept a `format`.
   void PrintZRead(int rt_code, uintptr_t address) {
     vregisters_[rt_code].NotifyRegisterLogged();
     PrintZAccess(rt_code, "<-", address);
   }
   void PrintZWrite(int rt_code, uintptr_t address) {
     PrintZAccess(rt_code, "->", address);
   }
   void PrintPRead(int rt_code, uintptr_t address) {
     pregisters_[rt_code].NotifyRegisterLogged();
     PrintPAccess(rt_code, "<-", address);
   }
   void PrintPWrite(int rt_code, uintptr_t address) {
     PrintPAccess(rt_code, "->", address);
   }

   // Like Print* (above), but respect GetTraceParameters().
   void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
     if (ShouldTraceRegs()) PrintRead(rt_code, format, address);
   }
   void LogExtendingRead(int rt_code,
                         PrintRegisterFormat format,
                         int access_size_in_bytes,
                         uintptr_t address) {
     if (ShouldTraceRegs()) {
       PrintExtendingRead(rt_code, format, access_size_in_bytes, address);
     }
   }
   void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
     if (ShouldTraceWrites()) PrintWrite(rt_code, format, address);
   }
   void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
     if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address);
   }
   void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
     if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address);
   }
   void LogZRead(int rt_code, uintptr_t address) {
     if (ShouldTraceVRegs()) PrintZRead(rt_code, address);
   }
   void LogZWrite(int rt_code, uintptr_t address) {
     if (ShouldTraceWrites()) PrintZWrite(rt_code, address);
   }
   void LogPRead(int rt_code, uintptr_t address) {
     if (ShouldTraceVRegs()) PrintPRead(rt_code, address);
   }
   void LogPWrite(int rt_code, uintptr_t address) {
     if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
   }

   // Helpers for the above, where the access operation is parameterised.
   // - For loads, set op = "<-".
   // - For stores, set op = "->".
   void PrintAccess(int rt_code,
                    PrintRegisterFormat format,
                    const char* op,
                    uintptr_t address);
   void PrintVAccess(int rt_code,
                     PrintRegisterFormat format,
                     const char* op,
                     uintptr_t address);
   // Simple, unpredicated SVE accesses always access the whole vector, and never
   // know the lane type, so these don't accept a `format`.
   void PrintZAccess(int rt_code, const char* op, uintptr_t address);
   void PrintPAccess(int rt_code, const char* op, uintptr_t address);

   // Multiple-structure accesses.
   void PrintVStructAccess(int rt_code,
                           int reg_count,
                           PrintRegisterFormat format,
                           const char* op,
                           uintptr_t address);
   // Single-structure (single-lane) accesses.
   void PrintVSingleStructAccess(int rt_code,
                                 int reg_count,
                                 int lane,
                                 PrintRegisterFormat format,
                                 const char* op,
                                 uintptr_t address);
   // Replicating accesses.
   void PrintVReplicatingStructAccess(int rt_code,
                                      int reg_count,
                                      PrintRegisterFormat format,
                                      const char* op,
                                      uintptr_t address);

   // Multiple-structure accesses.
   void PrintZStructAccess(int rt_code,
                           int reg_count,
                           const LogicPRegister& pg,
                           PrintRegisterFormat format,
                           int msize_in_bytes,
                           const char* op,
                           const LogicSVEAddressVector& addr);

   // Register-printing helper for all structured accessors.
   //
   // All lanes (according to `format`) are printed, but lanes indicated by
   // `focus_mask` are of particular interest. Each bit corresponds to a byte in
   // the printed register, in a manner similar to SVE's predicates. Currently,
   // this is used to determine when to print human-readable FP annotations.
   void PrintVRegistersForStructuredAccess(int rt_code,
                                           int reg_count,
                                           uint16_t focus_mask,
                                           PrintRegisterFormat format);

   // As for the VRegister variant, but print partial Z register names.
   void PrintZRegistersForStructuredAccess(int rt_code,
                                           int q_index,
                                           int reg_count,
                                           uint16_t focus_mask,
                                           PrintRegisterFormat format);

   // Print part of a memory access. This should be used for annotating
   // non-trivial accesses, such as structured or sign-extending loads. Call
   // Print*Register (or Print*RegistersForStructuredAccess), then
   // PrintPartialAccess for each contiguous access that makes up the
   // instruction.
   //
   //  access_mask:
   //      The lanes to be printed. Each bit corresponds to a byte in the printed
   //      register, in a manner similar to SVE's predicates, except that the
   //      lane size is not respected when interpreting lane_mask: unaligned bits
   //      must be zeroed.
   //
   //      This function asserts that this mask is non-zero.
   //
   //  future_access_mask:
   //      The lanes to be printed by a future invocation. This must be specified
   //      because vertical lines are drawn for partial accesses that haven't yet
   //      been printed. The format is the same as for accessed_mask.
   //
   //      If a lane is active in both `access_mask` and `future_access_mask`,
   //      `access_mask` takes precedence.
   //
   //  struct_element_count:
   //      The number of elements in each structure. For non-structured accesses,
   //      set this to one. Along with lane_size_in_bytes, this is used determine
   //      the size of each access, and to format the accessed value.
   //
   //  op:
   //      For stores, use "->". For loads, use "<-".
   //
   //  address:
   //      The address of this partial access. (Not the base address of the whole
   //      instruction.) The traced value is read from this address (according to
   //      part_count and lane_size_in_bytes) so it must be accessible, and when
   //      tracing stores, the store must have been executed before this function
   //      is called.
   //
   //  reg_size_in_bytes:
   //      The size of the register being accessed. This helper is usually used
   //      for V registers or Q-sized chunks of Z registers, so that is the
   //      default, but it is possible to use this to annotate X register
   //      accesses by specifying kXRegSizeInBytes.
   //
   // The return value is a future_access_mask suitable for the next iteration,
   // so that it is possible to execute this in a loop, until the mask is zero.
   // Note that accessed_mask must still be updated by the caller for each call.
   uint16_t PrintPartialAccess(uint16_t access_mask,
                               uint16_t future_access_mask,
                               int struct_element_count,
                               int lane_size_in_bytes,
                               const char* op,
                               uintptr_t address,
                               int reg_size_in_bytes = kQRegSizeInBytes);

   // Print an abstract register value. This works for all register types, and
   // can print parts of registers. This exists to ensure consistent formatting
   // of values.
   void PrintRegisterValue(const uint8_t* value,
                           int value_size,
                           PrintRegisterFormat format);
   template <typename T>
   void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) {
     PrintRegisterValue(sim_register.GetBytes(),
                        std::min(sim_register.GetSizeInBytes(),
                                 kQRegSizeInBytes),
                        format);
   }

   // As above, but format as an SVE predicate value, using binary notation with
   // spaces between each bit so that they align with the Z register bytes that
   // they predicate.
   void PrintPRegisterValue(uint16_t value);

   void PrintRegisterValueFPAnnotations(const uint8_t* value,
                                        uint16_t lane_mask,
                                        PrintRegisterFormat format);
   template <typename T>
   void PrintRegisterValueFPAnnotations(const T& sim_register,
                                        uint16_t lane_mask,
                                        PrintRegisterFormat format) {
     PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format);
   }
   template <typename T>
   void PrintRegisterValueFPAnnotations(const T& sim_register,
                                        PrintRegisterFormat format) {
     PrintRegisterValueFPAnnotations(sim_register.GetBytes(),
                                     GetPrintRegLaneMask(format),
                                     format);
   }

   VIXL_NO_RETURN void DoUnreachable(const Instruction* instr);
   void DoTrace(const Instruction* instr);
   void DoLog(const Instruction* instr);

   static const char* WRegNameForCode(unsigned code,
                                      Reg31Mode mode = Reg31IsZeroRegister);
   static const char* XRegNameForCode(unsigned code,
                                      Reg31Mode mode = Reg31IsZeroRegister);
   static const char* BRegNameForCode(unsigned code);
   static const char* HRegNameForCode(unsigned code);
   static const char* SRegNameForCode(unsigned code);
   static const char* DRegNameForCode(unsigned code);
   static const char* VRegNameForCode(unsigned code);
   static const char* ZRegNameForCode(unsigned code);
   static const char* PRegNameForCode(unsigned code);

   bool IsColouredTrace() const { return coloured_trace_; }
   VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) {
     return IsColouredTrace();
   }

   void SetColouredTrace(bool value);
   VIXL_DEPRECATED("SetColouredTrace", void set_coloured_trace(bool value)) {
     SetColouredTrace(value);
   }

   // Values for traces parameters defined in simulator-constants-aarch64.h in
   // enum TraceParameters.
   int GetTraceParameters() const { return trace_parameters_; }
   VIXL_DEPRECATED("GetTraceParameters", int trace_parameters() const) {
     return GetTraceParameters();
   }

   bool ShouldTraceWrites() const {
     return (GetTraceParameters() & LOG_WRITE) != 0;
   }
   bool ShouldTraceRegs() const {
     return (GetTraceParameters() & LOG_REGS) != 0;
   }
   bool ShouldTraceVRegs() const {
     return (GetTraceParameters() & LOG_VREGS) != 0;
   }
   bool ShouldTraceSysRegs() const {
     return (GetTraceParameters() & LOG_SYSREGS) != 0;
   }
   bool ShouldTraceBranches() const {
     return (GetTraceParameters() & LOG_BRANCH) != 0;
   }

   void SetTraceParameters(int parameters);
   VIXL_DEPRECATED("SetTraceParameters",
                   void set_trace_parameters(int parameters)) {
     SetTraceParameters(parameters);
   }

   // Clear the simulated local monitor to force the next store-exclusive
   // instruction to fail.
   void ClearLocalMonitor() { local_monitor_.Clear(); }

   void SilenceExclusiveAccessWarning() {
     print_exclusive_access_warning_ = false;
   }

   void CheckIsValidUnalignedAtomicAccess(int rn,
                                          uint64_t address,
                                          unsigned access_size) {
     // Verify that the address is available to the host.
     VIXL_ASSERT(address == static_cast<uintptr_t>(address));

     if (GetCPUFeatures()->Has(CPUFeatures::kUSCAT)) {
       // Check that the access falls entirely within one atomic access granule.
       if (AlignDown(address, kAtomicAccessGranule) !=
           AlignDown(address + access_size - 1, kAtomicAccessGranule)) {
         VIXL_ALIGNMENT_EXCEPTION();
       }
     } else {
       // Check that the access is aligned.
       if (AlignDown(address, access_size) != address) {
         VIXL_ALIGNMENT_EXCEPTION();
       }
     }

     // The sp must be aligned to 16 bytes when it is accessed.
     if ((rn == kSpRegCode) && (AlignDown(address, 16) != address)) {
       VIXL_ALIGNMENT_EXCEPTION();
     }
   }

   enum PointerType { kDataPointer, kInstructionPointer };

   struct PACKey {
     uint64_t high;
     uint64_t low;
     int number;
   };

   // Current implementation is that all pointers are tagged.
   bool HasTBI(uint64_t ptr, PointerType type) {
     USE(ptr, type);
     return true;
   }

   // Current implementation uses 48-bit virtual addresses.
   int GetBottomPACBit(uint64_t ptr, int ttbr) {
     USE(ptr, ttbr);
     VIXL_ASSERT((ttbr == 0) || (ttbr == 1));
     return 48;
   }

   // The top PAC bit is 55 for the purposes of relative bit fields with TBI,
   // however bit 55 is the TTBR bit regardless of TBI so isn't part of the PAC
   // codes in pointers.
   int GetTopPACBit(uint64_t ptr, PointerType type) {
     return HasTBI(ptr, type) ? 55 : 63;
   }

   // Armv8.3 Pointer authentication helpers.
   uint64_t CalculatePACMask(uint64_t ptr, PointerType type, int ext_bit);
   uint64_t ComputePAC(uint64_t data, uint64_t context, PACKey key);
   uint64_t AuthPAC(uint64_t ptr,
                    uint64_t context,
                    PACKey key,
                    PointerType type);
   uint64_t AddPAC(uint64_t ptr, uint64_t context, PACKey key, PointerType type);
   uint64_t StripPAC(uint64_t ptr, PointerType type);

   // The common CPUFeatures interface with the set of available features.

   CPUFeatures* GetCPUFeatures() {
     return cpu_features_auditor_.GetCPUFeatures();
   }

   void SetCPUFeatures(const CPUFeatures& cpu_features) {
     cpu_features_auditor_.SetCPUFeatures(cpu_features);
   }

   // The set of features that the simulator has encountered.
   const CPUFeatures& GetSeenFeatures() {
     return cpu_features_auditor_.GetSeenFeatures();
   }
   void ResetSeenFeatures() { cpu_features_auditor_.ResetSeenFeatures(); }

 // Runtime call emulation support.
 // It requires VIXL's ABI features, and C++11 or greater.
 // Also, the initialisation of the tuples in RuntimeCall(Non)Void is incorrect
 // in GCC before 4.9.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))

 #define VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT

 // The implementation of the runtime call helpers require the functionality
 // provided by `std::index_sequence`. It is only available from C++14, but
 // we want runtime call simulation to work from C++11, so we emulate if
 // necessary.
 #if __cplusplus >= 201402L
   template <std::size_t... I>
   using local_index_sequence = std::index_sequence<I...>;
   template <typename... P>
   using __local_index_sequence_for = std::index_sequence_for<P...>;
 #else
   // Emulate the behaviour of `std::index_sequence` and
   // `std::index_sequence_for`.
   // Naming follow the `std` names, prefixed with `emulated_`.
   template <size_t... I>
   struct emulated_index_sequence {};

   // A recursive template to create a sequence of indexes.
   // The base case (for `N == 0`) is declared outside of the class scope, as
   // required by C++.
   template <std::size_t N, size_t... I>
   struct emulated_make_index_sequence_helper
       : emulated_make_index_sequence_helper<N - 1, N - 1, I...> {};

   template <std::size_t N>
   struct emulated_make_index_sequence : emulated_make_index_sequence_helper<N> {
   };

   template <typename... P>
   struct emulated_index_sequence_for
       : emulated_make_index_sequence<sizeof...(P)> {};

   template <std::size_t... I>
   using local_index_sequence = emulated_index_sequence<I...>;
   template <typename... P>
   using __local_index_sequence_for = emulated_index_sequence_for<P...>;
 #endif

   // Expand the argument tuple and perform the call.
   template <typename R, typename... P, std::size_t... I>
   R DoRuntimeCall(R (*function)(P...),
                   std::tuple<P...> arguments,
                   local_index_sequence<I...>) {
     return function(std::get<I>(arguments)...);
   }

   template <typename R, typename... P>
   void RuntimeCallNonVoid(R (*function)(P...)) {
     ABI abi;
     std::tuple<P...> argument_operands{
         ReadGenericOperand<P>(abi.GetNextParameterGenericOperand<P>())...};
     R return_value = DoRuntimeCall(function,
                                    argument_operands,
                                    __local_index_sequence_for<P...>{});
     WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
   }

   template <typename R, typename... P>
   void RuntimeCallVoid(R (*function)(P...)) {
     ABI abi;
     std::tuple<P...> argument_operands{
         ReadGenericOperand<P>(abi.GetNextParameterGenericOperand<P>())...};
     DoRuntimeCall(function,
                   argument_operands,
                   __local_index_sequence_for<P...>{});
   }

   // We use `struct` for `void` return type specialisation.
   template <typename R, typename... P>
   struct RuntimeCallStructHelper {
     static void Wrapper(Simulator* simulator, uintptr_t function_pointer) {
       R (*function)(P...) = reinterpret_cast<R (*)(P...)>(function_pointer);
       simulator->RuntimeCallNonVoid(function);
     }
   };

   // Partial specialization when the return type is `void`.
   template <typename... P>
   struct RuntimeCallStructHelper<void, P...> {
     static void Wrapper(Simulator* simulator, uintptr_t function_pointer) {
       void (*function)(P...) =
           reinterpret_cast<void (*)(P...)>(function_pointer);
       simulator->RuntimeCallVoid(function);
     }
   };
 #endif

   // Configure the simulated value of 'VL', which is the size of a Z register.
   // Because this cannot occur during a program's lifetime, this function also
   // resets the SVE registers.
   void SetVectorLengthInBits(unsigned vector_length);

   unsigned GetVectorLengthInBits() const { return vector_length_; }
   unsigned GetVectorLengthInBytes() const {
     VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0);
     return vector_length_ / kBitsPerByte;
   }
   unsigned GetPredicateLengthInBits() const {
     VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0);
     return GetVectorLengthInBits() / kZRegBitsPerPRegBit;
   }
   unsigned GetPredicateLengthInBytes() const {
     VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0);
     return GetVectorLengthInBytes() / kZRegBitsPerPRegBit;
   }

   unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const {
     if (IsSVEFormat(vform)) {
       return GetVectorLengthInBits();
     } else {
       return vixl::aarch64::RegisterSizeInBitsFromFormat(vform);
     }
   }

   unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const {
     unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform);
     VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
     return size_in_bits / kBitsPerByte;
   }

   int LaneCountFromFormat(VectorFormat vform) const {
     if (IsSVEFormat(vform)) {
       return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform);
     } else {
       return vixl::aarch64::LaneCountFromFormat(vform);
     }
   }

   bool IsFirstActive(VectorFormat vform,
                      const LogicPRegister& mask,
                      const LogicPRegister& bits) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       if (mask.IsActive(vform, i)) {
         return bits.IsActive(vform, i);
       }
     }
     return false;
   }

   bool AreNoneActive(VectorFormat vform,
                      const LogicPRegister& mask,
                      const LogicPRegister& bits) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) {
         return false;
       }
     }
     return true;
   }

   bool IsLastActive(VectorFormat vform,
                     const LogicPRegister& mask,
                     const LogicPRegister& bits) {
     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
       if (mask.IsActive(vform, i)) {
         return bits.IsActive(vform, i);
       }
     }
     return false;
   }

   void PredTest(VectorFormat vform,
                 const LogicPRegister& mask,
                 const LogicPRegister& bits) {
     ReadNzcv().SetN(IsFirstActive(vform, mask, bits));
     ReadNzcv().SetZ(AreNoneActive(vform, mask, bits));
     ReadNzcv().SetC(!IsLastActive(vform, mask, bits));
     ReadNzcv().SetV(0);
     LogSystemRegister(NZCV);
   }

   SimPRegister& GetPTrue() { return pregister_all_true_; }

  protected:
   const char* clr_normal;
   const char* clr_flag_name;
   const char* clr_flag_value;
   const char* clr_reg_name;
   const char* clr_reg_value;
   const char* clr_vreg_name;
   const char* clr_vreg_value;
   const char* clr_preg_name;
   const char* clr_preg_value;
   const char* clr_memory_address;
   const char* clr_warning;
   const char* clr_warning_message;
   const char* clr_printf;
   const char* clr_branch_marker;

   // Simulation helpers ------------------------------------

   void ResetSystemRegisters();
   void ResetRegisters();
   void ResetVRegisters();
   void ResetPRegisters();
   void ResetFFR();

   bool ConditionPassed(Condition cond) {
     switch (cond) {
       case eq:
         return ReadZ();
       case ne:
         return !ReadZ();
       case hs:
         return ReadC();
       case lo:
         return !ReadC();
       case mi:
         return ReadN();
       case pl:
         return !ReadN();
       case vs:
         return ReadV();
       case vc:
         return !ReadV();
       case hi:
         return ReadC() && !ReadZ();
       case ls:
         return !(ReadC() && !ReadZ());
       case ge:
         return ReadN() == ReadV();
       case lt:
         return ReadN() != ReadV();
       case gt:
         return !ReadZ() && (ReadN() == ReadV());
       case le:
         return !(!ReadZ() && (ReadN() == ReadV()));
       case nv:
         VIXL_FALLTHROUGH();
       case al:
         return true;
       default:
         VIXL_UNREACHABLE();
         return false;
     }
   }

   bool ConditionPassed(Instr cond) {
     return ConditionPassed(static_cast<Condition>(cond));
   }

   bool ConditionFailed(Condition cond) { return !ConditionPassed(cond); }

   void AddSubHelper(const Instruction* instr, int64_t op2);
   uint64_t AddWithCarry(unsigned reg_size,
                         bool set_flags,
                         uint64_t left,
                         uint64_t right,
                         int carry_in = 0);
   std::pair<uint64_t, uint8_t> AddWithCarry(unsigned reg_size,
                                             uint64_t left,
                                             uint64_t right,
                                             int carry_in);
   void LogicalHelper(const Instruction* instr, int64_t op2);
   void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
   void LoadStoreHelper(const Instruction* instr,
                        int64_t offset,
                        AddrMode addrmode);
   void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
   template <typename T>
   void CompareAndSwapHelper(const Instruction* instr);
   template <typename T>
   void CompareAndSwapPairHelper(const Instruction* instr);
   template <typename T>
   void AtomicMemorySimpleHelper(const Instruction* instr);
   template <typename T>
   void AtomicMemorySwapHelper(const Instruction* instr);
   template <typename T>
   void LoadAcquireRCpcHelper(const Instruction* instr);
   template <typename T1, typename T2>
   void LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr);
   template <typename T>
   void StoreReleaseUnscaledOffsetHelper(const Instruction* instr);
   uintptr_t AddressModeHelper(unsigned addr_reg,
                               int64_t offset,
                               AddrMode addrmode);
   void NEONLoadStoreMultiStructHelper(const Instruction* instr,
                                       AddrMode addr_mode);
   void NEONLoadStoreSingleStructHelper(const Instruction* instr,
                                        AddrMode addr_mode);

   uint64_t AddressUntag(uint64_t address) { return address & ~kAddressTagMask; }

   template <typename T>
   T* AddressUntag(T* address) {
     uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
     return reinterpret_cast<T*>(AddressUntag(address_raw));
   }

   int64_t ShiftOperand(unsigned reg_size,
                        uint64_t value,
                        Shift shift_type,
                        unsigned amount) const;
   int64_t ExtendValue(unsigned reg_width,
                       int64_t value,
                       Extend extend_type,
                       unsigned left_shift = 0) const;
   uint64_t PolynomialMult(uint64_t op1,
                           uint64_t op2,
                           int lane_size_in_bits) const;

   void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
   void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
   void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
   void ld1r(VectorFormat vform,
             VectorFormat unpack_vform,
             LogicVRegister dst,
             uint64_t addr,
             bool is_signed = false);
   void ld2(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            uint64_t addr);
   void ld2(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            int index,
            uint64_t addr);
   void ld2r(VectorFormat vform,
             LogicVRegister dst1,
             LogicVRegister dst2,
             uint64_t addr);
   void ld3(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            uint64_t addr);
   void ld3(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            int index,
            uint64_t addr);
   void ld3r(VectorFormat vform,
             LogicVRegister dst1,
             LogicVRegister dst2,
             LogicVRegister dst3,
             uint64_t addr);
   void ld4(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            LogicVRegister dst4,
            uint64_t addr);
   void ld4(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            LogicVRegister dst4,
            int index,
            uint64_t addr);
   void ld4r(VectorFormat vform,
             LogicVRegister dst1,
             LogicVRegister dst2,
             LogicVRegister dst3,
             LogicVRegister dst4,
             uint64_t addr);
   void st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
   void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
   void st2(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            uint64_t addr);
   void st2(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            int index,
            uint64_t addr);
   void st3(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            uint64_t addr);
   void st3(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            int index,
            uint64_t addr);
   void st4(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            LogicVRegister src4,
            uint64_t addr);
   void st4(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            LogicVRegister src4,
            int index,
            uint64_t addr);
   LogicVRegister cmp(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2,
                      Condition cond);
   LogicVRegister cmp(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      int imm,
                      Condition cond);
   LogicVRegister cmptst(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister add(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   // Add `value` to each lane of `src1`, treating `value` as unsigned for the
   // purposes of setting the saturation flags.
   LogicVRegister add_uint(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           uint64_t value);
   LogicVRegister addp(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicPRegister brka(LogicPRegister pd,
                       const LogicPRegister& pg,
                       const LogicPRegister& pn);
   LogicPRegister brkb(LogicPRegister pd,
                       const LogicPRegister& pg,
                       const LogicPRegister& pn);
   LogicPRegister brkn(LogicPRegister pdm,
                       const LogicPRegister& pg,
                       const LogicPRegister& pn);
   LogicPRegister brkpa(LogicPRegister pd,
                        const LogicPRegister& pg,
                        const LogicPRegister& pn,
                        const LogicPRegister& pm);
   LogicPRegister brkpb(LogicPRegister pd,
                        const LogicPRegister& pg,
                        const LogicPRegister& pn,
                        const LogicPRegister& pm);
   // dst = srca + src1 * src2
   LogicVRegister mla(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& srca,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   // dst = srca - src1 * src2
   LogicVRegister mls(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& srca,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister mul(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister mul(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2,
                      int index);
   LogicVRegister mla(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2,
                      int index);
   LogicVRegister mls(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2,
                      int index);
   LogicVRegister pmul(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister sdiv(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister udiv(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);

   typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
                                                    LogicVRegister dst,
                                                    const LogicVRegister& src1,
                                                    const LogicVRegister& src2,
                                                    int index);
   LogicVRegister fmul(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       int index);
   LogicVRegister fmla(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       int index);
   LogicVRegister fmlal(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister fmlal2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister fmls(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       int index);
   LogicVRegister fmlsl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister fmlsl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister fmulx(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister smulh(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister smull(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister smull2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister umull(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister umull2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister smlal(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister smlal2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister umlal(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister umlal2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister smlsl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister smlsl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister umlsl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
   LogicVRegister umlsl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
   LogicVRegister umulh(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister sqdmull(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          int index);
   LogicVRegister sqdmull2(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           int index);
   LogicVRegister sqdmlal(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          int index);
   LogicVRegister sqdmlal2(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           int index);
   LogicVRegister sqdmlsl(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          int index);
   LogicVRegister sqdmlsl2(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           int index);
   LogicVRegister sqdmulh(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          int index);
   LogicVRegister sqrdmulh(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           int index);
   LogicVRegister sdot(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       int index);
   LogicVRegister sqrdmlah(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           int index);
   LogicVRegister udot(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       int index);
   LogicVRegister sqrdmlsh(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           int index);
   LogicVRegister sub(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   // Subtract `value` from each lane of `src1`, treating `value` as unsigned for
   // the purposes of setting the saturation flags.
   LogicVRegister sub_uint(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           uint64_t value);
   LogicVRegister and_(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister orr(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister orn(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister eor(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister bic(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister bic(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      uint64_t imm);
   LogicVRegister bif(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister bit(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister bsl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister cls(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister clz(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister cnot(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister cnt(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister not_(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister rbit(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister rev(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister rev_byte(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src,
                           int rev_size);
   LogicVRegister rev16(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister rev32(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister rev64(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister addlp(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        bool is_signed,
                        bool do_accumulate);
   LogicVRegister saddlp(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister uaddlp(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister sadalp(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister uadalp(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister ror(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      int rotation);
   LogicVRegister ext(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2,
                      int index);
   LogicVRegister rotate_elements_right(VectorFormat vform,
                                        LogicVRegister dst,
                                        const LogicVRegister& src,
                                        int index);
   template <typename T>
   LogicVRegister fcadd(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int rot);
   LogicVRegister fcadd(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int rot);
   template <typename T>
   LogicVRegister fcmla(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        const LogicVRegister& acc,
                        int index,
                        int rot);
   LogicVRegister fcmla(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index,
                        int rot);
   LogicVRegister fcmla(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        const LogicVRegister& acc,
                        int rot);
   template <typename T>
   LogicVRegister fadda(VectorFormat vform,
                        LogicVRegister acc,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister fadda(VectorFormat vform,
                        LogicVRegister acc,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister cadd(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       int rot,
                       bool saturate = false);
   LogicVRegister bgrp(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       bool do_bext = false);
   LogicVRegister bdep(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister histcnt(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicPRegister& pg,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2);
   LogicVRegister index(VectorFormat vform,
                        LogicVRegister dst,
                        uint64_t start,
                        uint64_t step);
   LogicVRegister ins_element(VectorFormat vform,
                              LogicVRegister dst,
                              int dst_index,
                              const LogicVRegister& src,
                              int src_index);
   LogicVRegister ins_immediate(VectorFormat vform,
                                LogicVRegister dst,
                                int dst_index,
                                uint64_t imm);
   LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm);
   LogicVRegister dup_element(VectorFormat vform,
                              LogicVRegister dst,
                              const LogicVRegister& src,
                              int src_index);
   LogicVRegister dup_elements_to_segments(VectorFormat vform,
                                           LogicVRegister dst,
                                           const LogicVRegister& src,
                                           int src_index);
   LogicVRegister dup_immediate(VectorFormat vform,
                                LogicVRegister dst,
                                uint64_t imm);
   LogicVRegister mov(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src);
   LogicVRegister mov_merging(VectorFormat vform,
                              LogicVRegister dst,
                              const SimPRegister& pg,
                              const LogicVRegister& src);
   LogicVRegister mov_zeroing(VectorFormat vform,
                              LogicVRegister dst,
                              const SimPRegister& pg,
                              const LogicVRegister& src);
   LogicVRegister mov_alternating(VectorFormat vform,
                                  LogicVRegister dst,
                                  const LogicVRegister& src,
                                  int start_at);
   LogicPRegister mov_merging(LogicPRegister dst,
                              const LogicPRegister& pg,
                              const LogicPRegister& src);
   LogicPRegister mov_zeroing(LogicPRegister dst,
                              const LogicPRegister& pg,
                              const LogicPRegister& src);
   LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);
   LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);
   LogicVRegister orr(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      uint64_t imm);
   LogicVRegister sshl(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       bool shift_is_8bit = true);
   LogicVRegister ushl(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       bool shift_is_8bit = true);
   LogicVRegister sshr(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister ushr(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   // Perform a "conditional last" operation. The first part of the pair is true
   // if any predicate lane is active, false otherwise. The second part takes the
   // value of the last active (plus offset) lane, or last (plus offset) lane if
   // none active.
   std::pair<bool, uint64_t> clast(VectorFormat vform,
                                   const LogicPRegister& pg,
                                   const LogicVRegister& src2,
                                   int offset_from_last_active);
   LogicPRegister match(VectorFormat vform,
                        LogicPRegister dst,
                        const LogicVRegister& haystack,
                        const LogicVRegister& needles,
                        bool negate_match);
   LogicVRegister compact(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicPRegister& pg,
                          const LogicVRegister& src);
   LogicVRegister splice(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicPRegister& pg,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister sel(VectorFormat vform,
                      LogicVRegister dst,
                      const SimPRegister& pg,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicPRegister sel(LogicPRegister dst,
                      const LogicPRegister& pg,
                      const LogicPRegister& src1,
                      const LogicPRegister& src2);
   LogicVRegister sminmax(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          bool max);
   LogicVRegister smax(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister smin(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister sminmaxp(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           bool max);
   LogicVRegister smaxp(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister sminp(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister addp(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister addv(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister uaddlv(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister saddlv(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister sminmaxv(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicPRegister& pg,
                           const LogicVRegister& src,
                           bool max);
   LogicVRegister smaxv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister sminv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister uxtl(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister uxtl2(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister sxtl(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister sxtl2(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister uxt(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      unsigned from_size_in_bits);
   LogicVRegister sxt(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      unsigned from_size_in_bits);
   LogicVRegister tbl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& ind);
   LogicVRegister tbl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& tab2,
                      const LogicVRegister& ind);
   LogicVRegister tbl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& tab2,
                      const LogicVRegister& tab3,
                      const LogicVRegister& ind);
   LogicVRegister tbl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& tab2,
                      const LogicVRegister& tab3,
                      const LogicVRegister& tab4,
                      const LogicVRegister& ind);
   LogicVRegister Table(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        const LogicVRegister& tab);
   LogicVRegister Table(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& ind,
                        bool zero_out_of_bounds,
                        const LogicVRegister* tab1,
                        const LogicVRegister* tab2 = NULL,
                        const LogicVRegister* tab3 = NULL,
                        const LogicVRegister* tab4 = NULL);
   LogicVRegister tbx(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& ind);
   LogicVRegister tbx(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& tab2,
                      const LogicVRegister& ind);
   LogicVRegister tbx(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& tab2,
                      const LogicVRegister& tab3,
                      const LogicVRegister& ind);
   LogicVRegister tbx(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
                      const LogicVRegister& tab2,
                      const LogicVRegister& tab3,
                      const LogicVRegister& tab4,
                      const LogicVRegister& ind);
   LogicVRegister uaddl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister uaddl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister uaddw(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister uaddw2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister saddl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister saddl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister saddw(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister saddw2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister usubl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister usubl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister usubw(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister usubw2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister ssubl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister ssubl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister ssubw(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister ssubw2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister uminmax(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          bool max);
   LogicVRegister umax(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister umin(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister uminmaxp(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           bool max);
   LogicVRegister umaxp(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister uminp(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister uminmaxv(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicPRegister& pg,
                           const LogicVRegister& src,
                           bool max);
   LogicVRegister umaxv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister uminv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister trn1(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister trn2(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister zip1(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister zip2(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister uzp1(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister uzp2(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister shl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      int shift);
   LogicVRegister scvtf(VectorFormat vform,
                        unsigned dst_data_size_in_bits,
                        unsigned src_data_size_in_bits,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src,
                        FPRounding round,
                        int fbits = 0);
   LogicVRegister scvtf(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int fbits,
                        FPRounding rounding_mode);
   LogicVRegister ucvtf(VectorFormat vform,
                        unsigned dst_data_size,
                        unsigned src_data_size,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src,
                        FPRounding round,
                        int fbits = 0);
   LogicVRegister ucvtf(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int fbits,
                        FPRounding rounding_mode);
   LogicVRegister sshll(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister sshll2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         int shift);
   LogicVRegister shll(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister shll2(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister ushll(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister ushll2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         int shift);
   LogicVRegister sli(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      int shift);
   LogicVRegister sri(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      int shift);
   LogicVRegister sshr(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src,
                       int shift);
   LogicVRegister ushr(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src,
                       int shift);
   LogicVRegister ssra(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src,
                       int shift);
   LogicVRegister usra(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src,
                       int shift);
   LogicVRegister srsra(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister ursra(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister suqadd(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister usqadd(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister sqshl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister uqshl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister sqshlu(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         int shift);
   LogicVRegister abs(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister neg(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister extractnarrow(VectorFormat vform,
                                LogicVRegister dst,
                                bool dst_is_signed,
                                const LogicVRegister& src,
                                bool src_is_signed);
   LogicVRegister xtn(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
   LogicVRegister sqxtn(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister uqxtn(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister sqxtun(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister absdiff(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          bool is_signed);
   LogicVRegister saba(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister uaba(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister shrn(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src,
                       int shift);
   LogicVRegister shrn2(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister rshrn(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int shift);
   LogicVRegister rshrn2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         int shift);
   LogicVRegister uqshrn(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         int shift);
   LogicVRegister uqshrn2(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src,
                          int shift);
   LogicVRegister uqrshrn(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src,
                          int shift);
   LogicVRegister uqrshrn2(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src,
                           int shift);
   LogicVRegister sqshrn(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         int shift);
   LogicVRegister sqshrn2(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src,
                          int shift);
   LogicVRegister sqrshrn(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src,
                          int shift);
   LogicVRegister sqrshrn2(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src,
                           int shift);
   LogicVRegister sqshrun(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src,
                          int shift);
   LogicVRegister sqshrun2(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src,
                           int shift);
   LogicVRegister sqrshrun(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src,
                           int shift);
   LogicVRegister sqrshrun2(VectorFormat vform,
                            LogicVRegister dst,
                            const LogicVRegister& src,
                            int shift);
   LogicVRegister sqrdmulh(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           bool round = true);
   LogicVRegister dot(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2,
                      bool is_signed);
   LogicVRegister sdot(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister udot(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister sqrdmlash(VectorFormat vform,
                            LogicVRegister dst,
                            const LogicVRegister& src1,
                            const LogicVRegister& src2,
                            bool round = true,
                            bool sub_op = false);
   LogicVRegister sqrdmlah(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           bool round = true);
   LogicVRegister sqrdmlsh(VectorFormat vform,
                           LogicVRegister dst,
                           const LogicVRegister& src1,
                           const LogicVRegister& src2,
                           bool round = true);
   LogicVRegister sqdmulh(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2);
 #define NEON_3VREG_LOGIC_LIST(V) \
   V(addhn)                       \
   V(addhn2)                      \
   V(raddhn)                      \
   V(raddhn2)                     \
   V(subhn)                       \
   V(subhn2)                      \
   V(rsubhn)                      \
   V(rsubhn2)                     \
   V(pmull)                       \
   V(pmull2)                      \
   V(sabal)                       \
   V(sabal2)                      \
   V(uabal)                       \
   V(uabal2)                      \
   V(sabdl)                       \
   V(sabdl2)                      \
   V(uabdl)                       \
   V(uabdl2)                      \
   V(smull)                       \
   V(smull2)                      \
   V(umull)                       \
   V(umull2)                      \
   V(smlal)                       \
   V(smlal2)                      \
   V(umlal)                       \
   V(umlal2)                      \
   V(smlsl)                       \
   V(smlsl2)                      \
   V(umlsl)                       \
   V(umlsl2)                      \
   V(sqdmlal)                     \
   V(sqdmlal2)                    \
   V(sqdmlsl)                     \
   V(sqdmlsl2)                    \
   V(sqdmull)                     \
   V(sqdmull2)

 #define DEFINE_LOGIC_FUNC(FXN)                   \
   LogicVRegister FXN(VectorFormat vform,         \
                      LogicVRegister dst,         \
                      const LogicVRegister& src1, \
                      const LogicVRegister& src2);
   NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
 #undef DEFINE_LOGIC_FUNC

 #define NEON_FP3SAME_LIST(V) \
   V(fadd, FPAdd, false)      \
   V(fsub, FPSub, true)       \
   V(fmul, FPMul, true)       \
   V(fmulx, FPMulx, true)     \
   V(fdiv, FPDiv, true)       \
   V(fmax, FPMax, false)      \
   V(fmin, FPMin, false)      \
   V(fmaxnm, FPMaxNM, false)  \
   V(fminnm, FPMinNM, false)

 #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
   template <typename T>                            \
   LogicVRegister FN(VectorFormat vform,            \
                     LogicVRegister dst,            \
                     const LogicVRegister& src1,    \
                     const LogicVRegister& src2);   \
   LogicVRegister FN(VectorFormat vform,            \
                     LogicVRegister dst,            \
                     const LogicVRegister& src1,    \
                     const LogicVRegister& src2);
   NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
 #undef DECLARE_NEON_FP_VECTOR_OP

 #define NEON_FPPAIRWISE_LIST(V) \
   V(faddp, fadd, FPAdd)         \
   V(fmaxp, fmax, FPMax)         \
   V(fmaxnmp, fmaxnm, FPMaxNM)   \
   V(fminp, fmin, FPMin)         \
   V(fminnmp, fminnm, FPMinNM)

 #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP)      \
   LogicVRegister FNP(VectorFormat vform,          \
                      LogicVRegister dst,          \
                      const LogicVRegister& src1,  \
                      const LogicVRegister& src2); \
   LogicVRegister FNP(VectorFormat vform,          \
                      LogicVRegister dst,          \
                      const LogicVRegister& src);
   NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
 #undef DECLARE_NEON_FP_PAIR_OP

   enum FrintMode {
     kFrintToInteger = 0,
     kFrintToInt32 = 32,
     kFrintToInt64 = 64
   };

   template <typename T>
   LogicVRegister frecps(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister frecps(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   template <typename T>
   LogicVRegister frsqrts(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2);
   LogicVRegister frsqrts(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2);
   template <typename T>
   LogicVRegister fmla(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister fmla(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   template <typename T>
   LogicVRegister fmls(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister fmls(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister fnmul(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);

   LogicVRegister fmlal(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister fmlal2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister fmlsl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2);
   LogicVRegister fmlsl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);

   template <typename T>
   LogicVRegister fcmp(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       Condition cond);
   LogicVRegister fcmp(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       Condition cond);
   LogicVRegister fabscmp(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
                          Condition cond);
   LogicVRegister fcmp_zero(VectorFormat vform,
                            LogicVRegister dst,
                            const LogicVRegister& src,
                            Condition cond);

   template <typename T>
   LogicVRegister fneg(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   LogicVRegister fneg(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src);
   template <typename T>
   LogicVRegister frecpx(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister frecpx(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister ftsmul(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister ftssel(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister ftmad(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        unsigned index);
   LogicVRegister fexpa(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   template <typename T>
   LogicVRegister fscale(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   LogicVRegister fscale(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src1,
                         const LogicVRegister& src2);
   template <typename T>
   LogicVRegister fabs_(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister fabs_(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister fabd(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister frint(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        FPRounding rounding_mode,
                        bool inexact_exception = false,
                        FrintMode frint_mode = kFrintToInteger);
   LogicVRegister fcvt(VectorFormat vform,
                       unsigned dst_data_size_in_bits,
                       unsigned src_data_size_in_bits,
                       LogicVRegister dst,
                       const LogicPRegister& pg,
                       const LogicVRegister& src);
   LogicVRegister fcvts(VectorFormat vform,
                        unsigned dst_data_size_in_bits,
                        unsigned src_data_size_in_bits,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src,
                        FPRounding round,
                        int fbits = 0);
   LogicVRegister fcvts(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        FPRounding rounding_mode,
                        int fbits = 0);
   LogicVRegister fcvtu(VectorFormat vform,
                        unsigned dst_data_size_in_bits,
                        unsigned src_data_size_in_bits,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src,
                        FPRounding round,
                        int fbits = 0);
   LogicVRegister fcvtu(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        FPRounding rounding_mode,
                        int fbits = 0);
   LogicVRegister fcvtl(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister fcvtl2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister fcvtn(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister fcvtn2(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister fcvtxn(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
   LogicVRegister fcvtxn2(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src);
   LogicVRegister fsqrt(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister frsqrte(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src);
   LogicVRegister frecpe(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src,
                         FPRounding rounding);
   LogicVRegister ursqrte(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src);
   LogicVRegister urecpe(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);

   LogicPRegister pfalse(LogicPRegister dst);
   LogicPRegister pfirst(LogicPRegister dst,
                         const LogicPRegister& pg,
                         const LogicPRegister& src);
   LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern);
   LogicPRegister pnext(VectorFormat vform,
                        LogicPRegister dst,
                        const LogicPRegister& pg,
                        const LogicPRegister& src);

   LogicVRegister asrd(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       int shift);

   LogicVRegister andv(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicPRegister& pg,
                       const LogicVRegister& src);
   LogicVRegister eorv(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicPRegister& pg,
                       const LogicVRegister& src);
   LogicVRegister orv(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicPRegister& pg,
                      const LogicVRegister& src);
   LogicVRegister saddv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister sminv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister smaxv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister uaddv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister uminv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);
   LogicVRegister umaxv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicPRegister& pg,
                        const LogicVRegister& src);

   LogicVRegister interleave_top_bottom(VectorFormat vform,
                                        LogicVRegister dst,
                                        const LogicVRegister& src);

   template <typename T>
   struct TFPPairOp {
     typedef T (Simulator::*type)(T a, T b);
   };

   template <typename T>
   LogicVRegister FPPairedAcrossHelper(VectorFormat vform,
                                       LogicVRegister dst,
                                       const LogicVRegister& src,
                                       typename TFPPairOp<T>::type fn,
                                       uint64_t inactive_value);

   LogicVRegister FPPairedAcrossHelper(
       VectorFormat vform,
       LogicVRegister dst,
       const LogicVRegister& src,
       typename TFPPairOp<vixl::internal::SimFloat16>::type fn16,
       typename TFPPairOp<float>::type fn32,
       typename TFPPairOp<double>::type fn64,
       uint64_t inactive_value);

   LogicVRegister fminv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister fmaxv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
   LogicVRegister fminnmv(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src);
   LogicVRegister fmaxnmv(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src);
   LogicVRegister faddv(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);

   static const uint32_t CRC32_POLY = 0x04C11DB7;
   static const uint32_t CRC32C_POLY = 0x1EDC6F41;
   uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
   template <typename T>
   uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
   uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);

   void SysOp_W(int op, int64_t val);

   template <typename T>
   T FPRecipSqrtEstimate(T op);
   template <typename T>
   T FPRecipEstimate(T op, FPRounding rounding);
   template <typename T, typename R>
   R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);

   void FPCompare(double val0, double val1, FPTrapFlags trap);
   double FPRoundInt(double value, FPRounding round_mode);
   double FPRoundInt(double value, FPRounding round_mode, FrintMode frint_mode);
   double FPRoundIntCommon(double value, FPRounding round_mode);
   double recip_sqrt_estimate(double a);
   double recip_estimate(double a);
   double FPRecipSqrtEstimate(double a);
   double FPRecipEstimate(double a);
   double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
   double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
   float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
   float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
   ::vixl::internal::SimFloat16 FixedToFloat16(int64_t src,
                                               int fbits,
                                               FPRounding round_mode);
   ::vixl::internal::SimFloat16 UFixedToFloat16(uint64_t src,
                                                int fbits,
                                                FPRounding round_mode);
   int16_t FPToInt16(double value, FPRounding rmode);
   int32_t FPToInt32(double value, FPRounding rmode);
   int64_t FPToInt64(double value, FPRounding rmode);
   uint16_t FPToUInt16(double value, FPRounding rmode);
   uint32_t FPToUInt32(double value, FPRounding rmode);
   uint64_t FPToUInt64(double value, FPRounding rmode);
   int32_t FPToFixedJS(double value);

   template <typename T>
   T FPAdd(T op1, T op2);

   template <typename T>
   T FPNeg(T op);

   template <typename T>
   T FPDiv(T op1, T op2);

   template <typename T>
   T FPMax(T a, T b);

   template <typename T>
   T FPMaxNM(T a, T b);

   template <typename T>
   T FPMin(T a, T b);

   template <typename T>
   T FPMinNM(T a, T b);

   template <typename T>
   T FPMul(T op1, T op2);

   template <typename T>
   T FPMulx(T op1, T op2);

   template <typename T>
   T FPMulAdd(T a, T op1, T op2);

   template <typename T>
   T FPSqrt(T op);

   template <typename T>
   T FPSub(T op1, T op2);

   template <typename T>
   T FPRecipStepFused(T op1, T op2);

   template <typename T>
   T FPRSqrtStepFused(T op1, T op2);

   // This doesn't do anything at the moment. We'll need it if we want support
   // for cumulative exception bits or floating-point exceptions.
   void FPProcessException() {}

   bool FPProcessNaNs(const Instruction* instr);

   // Pseudo Printf instruction
   void DoPrintf(const Instruction* instr);

   // Pseudo-instructions to configure CPU features dynamically.
   void DoConfigureCPUFeatures(const Instruction* instr);

   void DoSaveCPUFeatures(const Instruction* instr);
   void DoRestoreCPUFeatures(const Instruction* instr);

   // General arithmetic helpers ----------------------------

   // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or
   // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must
   // be a valid int<n>_t.
   uint64_t IncDecN(uint64_t acc,
                    int64_t delta,
                    unsigned n,
                    bool is_saturating = false,
                    bool is_signed = false);

   // SVE helpers -------------------------------------------
   LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op,
                                                      VectorFormat vform,
                                                      LogicVRegister zd,
                                                      const LogicVRegister& zn,
                                                      const LogicVRegister& zm);

   LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
                                            LogicPRegister Pd,
                                            const LogicPRegister& pn,
                                            const LogicPRegister& pm);

   LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,
                                      VectorFormat vform,
                                      LogicVRegister zd,
                                      uint64_t imm);
   enum UnpackType { kHiHalf, kLoHalf };
   enum ExtendType { kSignedExtend, kUnsignedExtend };
   LogicVRegister unpk(VectorFormat vform,
                       LogicVRegister zd,
                       const LogicVRegister& zn,
                       UnpackType unpack_type,
                       ExtendType extend_type);

   LogicPRegister SVEIntCompareVectorsHelper(Condition cc,
                                             VectorFormat vform,
                                             LogicPRegister dst,
                                             const LogicPRegister& mask,
                                             const LogicVRegister& src1,
                                             const LogicVRegister& src2,
                                             bool is_wide_elements = false,
                                             FlagsUpdate flags = SetFlags);

   void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
                                            VectorFormat vform,
                                            SVEOffsetModifier mod);

   // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`.
   //
   // `zt_code` specifies the code of the first register (zt). Each additional
   // register (up to `reg_count`) is `(zt_code + i) % 32`.
   //
   // This helper calls LogZWrite in the proper way, according to `addr`.
   void SVEStructuredStoreHelper(VectorFormat vform,
                                 const LogicPRegister& pg,
                                 unsigned zt_code,
                                 const LogicSVEAddressVector& addr);
   // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
   void SVEStructuredLoadHelper(VectorFormat vform,
                                const LogicPRegister& pg,
                                unsigned zt_code,
                                const LogicSVEAddressVector& addr,
                                bool is_signed = false);

   enum SVEFaultTolerantLoadType {
     // - Elements active in both FFR and pg are accessed as usual. If the access
     //   fails, the corresponding lane and all subsequent lanes are filled with
     //   an unpredictable value, and made inactive in FFR.
     //
     // - Elements active in FFR but not pg are set to zero.
     //
     // - Elements that are not active in FFR are filled with an unpredictable
     //   value, regardless of pg.
     kSVENonFaultLoad,

     // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the
     // first active element is always accessed, regardless of FFR, and will
     // generate a real fault if it is inaccessible. If the lane is not active in
     // FFR, the actual value loaded into the result is still unpredictable.
     kSVEFirstFaultLoad
   };

   // Load with first-faulting or non-faulting load semantics, respecting and
   // updating FFR.
   void SVEFaultTolerantLoadHelper(VectorFormat vform,
                                   const LogicPRegister& pg,
                                   unsigned zt_code,
                                   const LogicSVEAddressVector& addr,
                                   SVEFaultTolerantLoadType type,
                                   bool is_signed);

   LogicVRegister SVEBitwiseShiftHelper(Shift shift_op,
                                        VectorFormat vform,
                                        LogicVRegister dst,
                                        const LogicVRegister& src1,
                                        const LogicVRegister& src2,
                                        bool is_wide_elements);

   // Pack all even- or odd-numbered elements of source vector side by side and
   // place in elements of lower half the destination vector, and leave the upper
   // half all zero.
   //    [...| H | G | F | E | D | C | B | A ]
   // => [...................| G | E | C | A ]
   LogicVRegister pack_even_elements(VectorFormat vform,
                                     LogicVRegister dst,
                                     const LogicVRegister& src);

   //    [...| H | G | F | E | D | C | B | A ]
   // => [...................| H | F | D | B ]
   LogicVRegister pack_odd_elements(VectorFormat vform,
                                    LogicVRegister dst,
                                    const LogicVRegister& src);

   LogicVRegister adcl(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2,
                       bool top);

   template <typename T>
   LogicVRegister FTMaddHelper(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src1,
                               const LogicVRegister& src2,
                               uint64_t coeff_pos,
                               uint64_t coeff_neg);

   // Return the first or last active lane, or -1 if none are active.
   int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const;
   int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const;

   int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const;

   // Count active and true lanes in `pn`.
   int CountActiveAndTrueLanes(VectorFormat vform,
                               const LogicPRegister& pg,
                               const LogicPRegister& pn) const;

   // Count the number of lanes referred to by `pattern`, given the vector
   // length. If `pattern` is not a recognised SVEPredicateConstraint, this
   // returns zero.
   int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const;

   // Simulate a runtime call.
   void DoRuntimeCall(const Instruction* instr);

   // Processor state ---------------------------------------

   // Simulated monitors for exclusive access instructions.
   SimExclusiveLocalMonitor local_monitor_;
   SimExclusiveGlobalMonitor global_monitor_;

   // Output stream.
   FILE* stream_;
   PrintDisassembler* print_disasm_;

   // General purpose registers. Register 31 is the stack pointer.
   SimRegister registers_[kNumberOfRegisters];

   // Vector registers
   SimVRegister vregisters_[kNumberOfVRegisters];

   // SVE predicate registers.
   SimPRegister pregisters_[kNumberOfPRegisters];

   // SVE first-fault register.
   SimFFRRegister ffr_register_;

   // A pseudo SVE predicate register with all bits set to true.
   SimPRegister pregister_all_true_;

   // Program Status Register.
   // bits[31, 27]: Condition flags N, Z, C, and V.
   //               (Negative, Zero, Carry, Overflow)
   SimSystemRegister nzcv_;

   // Floating-Point Control Register
   SimSystemRegister fpcr_;

   // Only a subset of FPCR features are supported by the simulator. This helper
   // checks that the FPCR settings are supported.
   //
   // This is checked when floating-point instructions are executed, not when
   // FPCR is set. This allows generated code to modify FPCR for external
   // functions, or to save and restore it when entering and leaving generated
   // code.
   void AssertSupportedFPCR() {
     // No flush-to-zero support.
     VIXL_ASSERT(ReadFpcr().GetFZ() == 0);
     // Ties-to-even rounding only.
     VIXL_ASSERT(ReadFpcr().GetRMode() == FPTieEven);
     // No alternative half-precision support.
     VIXL_ASSERT(ReadFpcr().GetAHP() == 0);
   }

   static int CalcNFlag(uint64_t result, unsigned reg_size) {
     return (result >> (reg_size - 1)) & 1;
   }

   static int CalcZFlag(uint64_t result) { return (result == 0) ? 1 : 0; }

   static const uint32_t kConditionFlagsMask = 0xf0000000;

   // Stack
   byte* stack_;
   static const int stack_protection_size_ = 256;
   // 8 KB stack.
   // TODO: Make this configurable, or automatically allocate space as it runs
   // out (like the OS would try to do).
   static const int stack_size_ = 8 * 1024 + 2 * stack_protection_size_;
   byte* stack_limit_;

   Decoder* decoder_;
   // Indicates if the pc has been modified by the instruction and should not be
   // automatically incremented.
   bool pc_modified_;
   const Instruction* pc_;

   // If non-NULL, the last instruction was a movprfx, and validity needs to be
   // checked.
   Instruction const* movprfx_;

   // Branch type register, used for branch target identification.
   BType btype_;

   // Next value of branch type register after the current instruction has been
   // decoded.
   BType next_btype_;

   // Global flag for enabling guarded pages.
   // TODO: implement guarding at page granularity, rather than globally.
   bool guard_pages_;

   static const char* xreg_names[];
   static const char* wreg_names[];
   static const char* breg_names[];
   static const char* hreg_names[];
   static const char* sreg_names[];
   static const char* dreg_names[];
   static const char* vreg_names[];
   static const char* zreg_names[];
   static const char* preg_names[];

  private:
   using FormToVisitorFnMap =
       std::map<const std::string,
                const std::function<void(Simulator*, const Instruction*)>>;
   static FormToVisitorFnMap form_to_visitor_;
   uint32_t form_hash_;

   static const PACKey kPACKeyIA;
   static const PACKey kPACKeyIB;
   static const PACKey kPACKeyDA;
   static const PACKey kPACKeyDB;
   static const PACKey kPACKeyGA;

   bool CanReadMemory(uintptr_t address, size_t size);

   // CanReadMemory needs dummy file descriptors, so we use a pipe. We can save
   // some system call overhead by opening them on construction, rather than on
   // every call to CanReadMemory.
   int dummy_pipe_fd_[2];

   template <typename T>
   static T FPDefaultNaN();

   // Standard NaN processing.
   template <typename T>
   T FPProcessNaN(T op) {
     VIXL_ASSERT(IsNaN(op));
     if (IsSignallingNaN(op)) {
       FPProcessException();
     }
     return (ReadDN() == kUseDefaultNaN) ? FPDefaultNaN<T>() : ToQuietNaN(op);
   }

   template <typename T>
   T FPProcessNaNs(T op1, T op2) {
     if (IsSignallingNaN(op1)) {
       return FPProcessNaN(op1);
     } else if (IsSignallingNaN(op2)) {
       return FPProcessNaN(op2);
     } else if (IsNaN(op1)) {
       VIXL_ASSERT(IsQuietNaN(op1));
       return FPProcessNaN(op1);
     } else if (IsNaN(op2)) {
       VIXL_ASSERT(IsQuietNaN(op2));
       return FPProcessNaN(op2);
     } else {
       return 0.0;
     }
   }

   template <typename T>
   T FPProcessNaNs3(T op1, T op2, T op3) {
     if (IsSignallingNaN(op1)) {
       return FPProcessNaN(op1);
     } else if (IsSignallingNaN(op2)) {
       return FPProcessNaN(op2);
     } else if (IsSignallingNaN(op3)) {
       return FPProcessNaN(op3);
     } else if (IsNaN(op1)) {
       VIXL_ASSERT(IsQuietNaN(op1));
       return FPProcessNaN(op1);
     } else if (IsNaN(op2)) {
       VIXL_ASSERT(IsQuietNaN(op2));
       return FPProcessNaN(op2);
     } else if (IsNaN(op3)) {
       VIXL_ASSERT(IsQuietNaN(op3));
       return FPProcessNaN(op3);
     } else {
       return 0.0;
     }
   }

   // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of
   // the destination is set to all true (0xff) when the corresponding
   // predicate flag is set, and false (0x00) otherwise.
   SimVRegister ExpandToSimVRegister(const SimPRegister& preg);

   // Set each predicate flag in pd where the corresponding assigned-sized lane
   // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite
   // operation to ExpandToSimVRegister(), except that any non-zero lane is
   // interpreted as true.
   void ExtractFromSimVRegister(VectorFormat vform,
                                SimPRegister& pd,  // NOLINT(runtime/references)
                                SimVRegister vreg);

   bool coloured_trace_;

   // A set of TraceParameters flags.
   int trace_parameters_;

   // Indicates whether the exclusive-access warning has been printed.
   bool print_exclusive_access_warning_;
   void PrintExclusiveAccessWarning();

   CPUFeaturesAuditor cpu_features_auditor_;
   std::vector<CPUFeatures> saved_cpu_features_;

   // State for *rand48 functions, used to simulate randomness with repeatable
   // behaviour (so that tests are deterministic). This is used to simulate RNDR
   // and RNDRRS, as well as to simulate a source of entropy for architecturally
   // undefined behaviour.
   uint16_t rand_state_[3];

   // A configurable size of SVE vector registers.
   unsigned vector_length_;
 };

 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
 // Base case of the recursive template used to emulate C++14
 // `std::index_sequence`.
 template <size_t... I>
 struct Simulator::emulated_make_index_sequence_helper<0, I...>
     : Simulator::emulated_index_sequence<I...> {};
 #endif

 }  // namespace aarch64
 }  // namespace vixl

 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64

 #endif  // VIXL_AARCH64_SIMULATOR_AARCH64_H_