src/a64/macro-assembler-a64.cc - platform/external/vixl - Git at Google

 // Copyright 2013, ARM Limited
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "a64/macro-assembler-a64.h"
 namespace vixl {


 LiteralPool::LiteralPool(Assembler* assm)
     : assm_(assm), first_use_(-1), monitor_(0) {
 }


 LiteralPool::~LiteralPool() {
   VIXL_ASSERT(IsEmpty());
   VIXL_ASSERT(!IsBlocked());
 }


 void LiteralPool::Reset() {
   std::vector<RawLiteral*>::iterator it, end;
   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
     delete *it;
   }
   entries_.clear();
   first_use_ = -1;
   monitor_ = 0;
 }


 size_t LiteralPool::Size() const {
   size_t size = 0;
   std::vector<RawLiteral*>::const_iterator it, end;
   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
     size += (*it)->size();
   }

   // account for the pool header.
   return size + kInstructionSize;
 }


 void LiteralPool::Release() {
   if (--monitor_ == 0) {
     // Has the literal pool been blocked for too long?
     VIXL_ASSERT(assm_->CursorOffset() < MaxCursorOffset());
   }
 }


 void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
   if (IsEmpty() || IsBlocked()) return;

   ptrdiff_t distance = assm_->CursorOffset() + amount - first_use_;
   if (distance >= kRecommendedLiteralPoolRange) {
     Emit(option);
   }
 }


 void LiteralPool::Emit(EmitOption option) {
   // There is an issue if we are asked to emit a blocked or empty pool.
   VIXL_ASSERT(!IsBlocked());
   VIXL_ASSERT(!IsEmpty());

   size_t pool_size = Size();
   size_t emit_size = pool_size;
   if (option == kBranchRequired) emit_size += kInstructionSize;
   Label end_of_pool;

   CodeBufferCheckScope guard(assm_,
                              emit_size,
                              CodeBufferCheckScope::kCheck,
                              CodeBufferCheckScope::kExactSize);
   if (option == kBranchRequired) assm_->b(&end_of_pool);

   // Marker indicating the size of the literal pool in 32-bit words.
   VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
   assm_->ldr(xzr, pool_size / kWRegSizeInBytes);

   // Now populate the literal pool.
   std::vector<RawLiteral*>::iterator it, end;
   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
     VIXL_ASSERT((*it)->IsUsed());
     assm_->place(*it);
     delete *it;
   }

   if (option == kBranchRequired) assm_->bind(&end_of_pool);

   entries_.clear();
   first_use_ = -1;
 }


 ptrdiff_t LiteralPool::NextCheckOffset() {
   if (IsEmpty()) {
     return assm_->CursorOffset() + kRecommendedLiteralPoolRange;
   }

   VIXL_ASSERT(
       ((assm_->CursorOffset() - first_use_) < kRecommendedLiteralPoolRange) ||
        IsBlocked());

   return first_use_ + kRecommendedLiteralPoolRange;
 }


 EmissionCheckScope::EmissionCheckScope(MacroAssembler* masm, size_t size) {
   masm->EnsureEmitFor(size);
 #ifdef DEBUG
   masm_ = masm;
   masm->Bind(&start_);
   size_ = size;
   masm->AcquireBuffer();
 #endif
 }


 EmissionCheckScope::~EmissionCheckScope() {
 #ifdef DEBUG
   masm_->ReleaseBuffer();
   VIXL_ASSERT(masm_->SizeOfCodeGeneratedSince(&start_) <= size_);
 #endif
 }


 MacroAssembler::MacroAssembler(size_t capacity,
                                PositionIndependentCodeOption pic)
     : Assembler(capacity, pic),
 #ifdef DEBUG
       allow_macro_instructions_(true),
 #endif
       sp_(sp),
       tmp_list_(ip0, ip1),
       fptmp_list_(d31),
       literal_pool_(this) {
   checkpoint_ = NextCheckOffset();
 }


 MacroAssembler::MacroAssembler(byte * buffer,
                                size_t capacity,
                                PositionIndependentCodeOption pic)
     : Assembler(buffer, capacity, pic),
 #ifdef DEBUG
       allow_macro_instructions_(true),
 #endif
       sp_(sp),
       tmp_list_(ip0, ip1),
       fptmp_list_(d31),
       literal_pool_(this) {
   checkpoint_ = NextCheckOffset();
 }


 MacroAssembler::~MacroAssembler() {
 }


 void MacroAssembler::Reset() {
   Assembler::Reset();

   VIXL_ASSERT(!literal_pool_.IsBlocked());
   literal_pool_.Reset();

   checkpoint_ = NextCheckOffset();
 }


 void MacroAssembler::FinalizeCode() {
   if (!literal_pool_.IsEmpty()) literal_pool_.Emit();

   Assembler::FinalizeCode();
 }


 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
   if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
     B(static_cast<Condition>(type), label);
   } else {
     switch (type) {
       case always:        B(label);              break;
       case never:         break;
       case reg_zero:      Cbz(reg, label);       break;
       case reg_not_zero:  Cbnz(reg, label);      break;
       case reg_bit_clear: Tbz(reg, bit, label);  break;
       case reg_bit_set:   Tbnz(reg, bit, label); break;
       default:
         VIXL_UNREACHABLE();
     }
   }
 }

 void MacroAssembler::And(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, AND);
 }


 void MacroAssembler::Ands(const Register& rd,
                           const Register& rn,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, ANDS);
 }


 void MacroAssembler::Tst(const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   Ands(AppropriateZeroRegFor(rn), rn, operand);
 }


 void MacroAssembler::Bic(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, BIC);
 }


 void MacroAssembler::Bics(const Register& rd,
                           const Register& rn,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, BICS);
 }


 void MacroAssembler::Orr(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, ORR);
 }


 void MacroAssembler::Orn(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, ORN);
 }


 void MacroAssembler::Eor(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, EOR);
 }


 void MacroAssembler::Eon(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   LogicalMacro(rd, rn, operand, EON);
 }


 void MacroAssembler::LogicalMacro(const Register& rd,
                                   const Register& rn,
                                   const Operand& operand,
                                   LogicalOp op) {
   // The worst case for size is logical immediate to sp:
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction to do the operation
   //  * 1 instruction to move to sp
   MacroEmissionCheckScope guard(this);
   UseScratchRegisterScope temps(this);

   if (operand.IsImmediate()) {
     int64_t immediate = operand.immediate();
     unsigned reg_size = rd.size();

     // If the operation is NOT, invert the operation and immediate.
     if ((op & NOT) == NOT) {
       op = static_cast<LogicalOp>(op & ~NOT);
       immediate = ~immediate;
     }

     // Ignore the top 32 bits of an immediate if we're moving to a W register.
     if (rd.Is32Bits()) {
       // Check that the top 32 bits are consistent.
       VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
                   ((immediate >> kWRegSize) == -1));
       immediate &= kWRegMask;
     }

     VIXL_ASSERT(rd.Is64Bits() || is_uint32(immediate));

     // Special cases for all set or all clear immediates.
     if (immediate == 0) {
       switch (op) {
         case AND:
           Mov(rd, 0);
           return;
         case ORR:  // Fall through.
         case EOR:
           Mov(rd, rn);
           return;
         case ANDS:  // Fall through.
         case BICS:
           break;
         default:
           VIXL_UNREACHABLE();
       }
     } else if ((rd.Is64Bits() && (immediate == -1)) ||
                (rd.Is32Bits() && (immediate == 0xffffffff))) {
       switch (op) {
         case AND:
           Mov(rd, rn);
           return;
         case ORR:
           Mov(rd, immediate);
           return;
         case EOR:
           Mvn(rd, rn);
           return;
         case ANDS:  // Fall through.
         case BICS:
           break;
         default:
           VIXL_UNREACHABLE();
       }
     }

     unsigned n, imm_s, imm_r;
     if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
       // Immediate can be encoded in the instruction.
       LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
     } else {
       // Immediate can't be encoded: synthesize using move immediate.
       Register temp = temps.AcquireSameSizeAs(rn);
       Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate);

       if (rd.Is(sp)) {
         // If rd is the stack pointer we cannot use it as the destination
         // register so we use the temp register as an intermediate again.
         Logical(temp, rn, imm_operand, op);
         Mov(sp, temp);
       } else {
         Logical(rd, rn, imm_operand, op);
       }
     }
   } else if (operand.IsExtendedRegister()) {
     VIXL_ASSERT(operand.reg().size() <= rd.size());
     // Add/sub extended supports shift <= 4. We want to support exactly the
     // same modes here.
     VIXL_ASSERT(operand.shift_amount() <= 4);
     VIXL_ASSERT(operand.reg().Is64Bits() ||
            ((operand.extend() != UXTX) && (operand.extend() != SXTX)));

     temps.Exclude(operand.reg());
     Register temp = temps.AcquireSameSizeAs(rn);
     EmitExtendShift(temp, operand.reg(), operand.extend(),
                     operand.shift_amount());
     Logical(rd, rn, Operand(temp), op);
   } else {
     // The operand can be encoded in the instruction.
     VIXL_ASSERT(operand.IsShiftedRegister());
     Logical(rd, rn, operand, op);
   }
 }


 void MacroAssembler::Mov(const Register& rd,
                          const Operand& operand,
                          DiscardMoveMode discard_mode) {
   VIXL_ASSERT(allow_macro_instructions_);
   // The worst case for size is mov immediate with up to 4 instructions.
   MacroEmissionCheckScope guard(this);

   if (operand.IsImmediate()) {
     // Call the macro assembler for generic immediates.
     Mov(rd, operand.immediate());
   } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
     // Emit a shift instruction if moving a shifted register. This operation
     // could also be achieved using an orr instruction (like orn used by Mvn),
     // but using a shift instruction makes the disassembly clearer.
     EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount());
   } else if (operand.IsExtendedRegister()) {
     // Emit an extend instruction if moving an extended register. This handles
     // extend with post-shift operations, too.
     EmitExtendShift(rd, operand.reg(), operand.extend(),
                     operand.shift_amount());
   } else {
     // Otherwise, emit a register move only if the registers are distinct, or
     // if they are not X registers.
     //
     // Note that mov(w0, w0) is not a no-op because it clears the top word of
     // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W
     // registers is not required to clear the top word of the X register. In
     // this case, the instruction is discarded.
     //
     // If the sp is an operand, add #0 is emitted, otherwise, orr #0.
     if (!rd.Is(operand.reg()) || (rd.Is32Bits() &&
                                   (discard_mode == kDontDiscardForSameWReg))) {
       mov(rd, operand.reg());
     }
   }
 }


 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   // The worst case for size is mvn immediate with up to 4 instructions.
   MacroEmissionCheckScope guard(this);

   if (operand.IsImmediate()) {
     // Call the macro assembler for generic immediates.
     Mvn(rd, operand.immediate());
   } else if (operand.IsExtendedRegister()) {
     UseScratchRegisterScope temps(this);
     temps.Exclude(operand.reg());

     // Emit two instructions for the extend case. This differs from Mov, as
     // the extend and invert can't be achieved in one instruction.
     Register temp = temps.AcquireSameSizeAs(rd);
     EmitExtendShift(temp, operand.reg(), operand.extend(),
                     operand.shift_amount());
     mvn(rd, Operand(temp));
   } else {
     // Otherwise, register and shifted register cases can be handled by the
     // assembler directly, using orn.
     mvn(rd, operand);
   }
 }


 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
   VIXL_ASSERT(allow_macro_instructions_);
   VIXL_ASSERT(is_uint32(imm) || is_int32(imm) || rd.Is64Bits());
   // The worst case for size is mov 64-bit immediate to sp:
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction to move to sp
   MacroEmissionCheckScope guard(this);

   // Immediates on Aarch64 can be produced using an initial value, and zero to
   // three move keep operations.
   //
   // Initial values can be generated with:
   //  1. 64-bit move zero (movz).
   //  2. 32-bit move inverted (movn).
   //  3. 64-bit move inverted.
   //  4. 32-bit orr immediate.
   //  5. 64-bit orr immediate.
   // Move-keep may then be used to modify each of the 16-bit half words.
   //
   // The code below supports all five initial value generators, and
   // applying move-keep operations to move-zero and move-inverted initial
   // values.

   // Try to move the immediate in one instruction, and if that fails, switch to
   // using multiple instructions.
   if (!TryOneInstrMoveImmediate(rd, imm)) {
     unsigned reg_size = rd.size();

     // Generic immediate case. Imm will be represented by
     //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
     // A move-zero or move-inverted is generated for the first non-zero or
     // non-0xffff immX, and a move-keep for subsequent non-zero immX.

     uint64_t ignored_halfword = 0;
     bool invert_move = false;
     // If the number of 0xffff halfwords is greater than the number of 0x0000
     // halfwords, it's more efficient to use move-inverted.
     if (CountClearHalfWords(~imm, reg_size) >
         CountClearHalfWords(imm, reg_size)) {
       ignored_halfword = 0xffff;
       invert_move = true;
     }

     // Mov instructions can't move values into the stack pointer, so set up a
     // temporary register, if needed.
     UseScratchRegisterScope temps(this);
     Register temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;

     // Iterate through the halfwords. Use movn/movz for the first non-ignored
     // halfword, and movk for subsequent halfwords.
     VIXL_ASSERT((reg_size % 16) == 0);
     bool first_mov_done = false;
     for (unsigned i = 0; i < (temp.size() / 16); i++) {
       uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
       if (imm16 != ignored_halfword) {
         if (!first_mov_done) {
           if (invert_move) {
             movn(temp, ~imm16 & 0xffff, 16 * i);
           } else {
             movz(temp, imm16, 16 * i);
           }
           first_mov_done = true;
         } else {
           // Construct a wider constant.
           movk(temp, imm16, 16 * i);
         }
       }
     }

     VIXL_ASSERT(first_mov_done);

     // Move the temporary if the original destination register was the stack
     // pointer.
     if (rd.IsSP()) {
       mov(rd, temp);
     }
   }
 }


 unsigned MacroAssembler::CountClearHalfWords(uint64_t imm, unsigned reg_size) {
   VIXL_ASSERT((reg_size % 8) == 0);
   int count = 0;
   for (unsigned i = 0; i < (reg_size / 16); i++) {
     if ((imm & 0xffff) == 0) {
       count++;
     }
     imm >>= 16;
   }
   return count;
 }


 // The movz instruction can generate immediates containing an arbitrary 16-bit
 // value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000.
 bool MacroAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
   return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1);
 }


 // The movn instruction can generate immediates containing an arbitrary 16-bit
 // value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
 bool MacroAssembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
   return IsImmMovz(~imm, reg_size);
 }


 void MacroAssembler::Ccmp(const Register& rn,
                           const Operand& operand,
                           StatusFlags nzcv,
                           Condition cond) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate() && (operand.immediate() < 0)) {
     ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMN);
   } else {
     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
   }
 }


 void MacroAssembler::Ccmn(const Register& rn,
                           const Operand& operand,
                           StatusFlags nzcv,
                           Condition cond) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate() && (operand.immediate() < 0)) {
     ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMP);
   } else {
     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
   }
 }


 void MacroAssembler::ConditionalCompareMacro(const Register& rn,
                                              const Operand& operand,
                                              StatusFlags nzcv,
                                              Condition cond,
                                              ConditionalCompareOp op) {
   VIXL_ASSERT((cond != al) && (cond != nv));
   // The worst case for size is ccmp immediate:
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction for ccmp
   MacroEmissionCheckScope guard(this);

   if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) ||
       (operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
     // The immediate can be encoded in the instruction, or the operand is an
     // unshifted register: call the assembler.
     ConditionalCompare(rn, operand, nzcv, cond, op);
   } else {
     UseScratchRegisterScope temps(this);
     // The operand isn't directly supported by the instruction: perform the
     // operation on a temporary register.
     Register temp = temps.AcquireSameSizeAs(rn);
     Mov(temp, operand);
     ConditionalCompare(rn, temp, nzcv, cond, op);
   }
 }


 void MacroAssembler::Csel(const Register& rd,
                           const Register& rn,
                           const Operand& operand,
                           Condition cond) {
   VIXL_ASSERT(allow_macro_instructions_);
   VIXL_ASSERT(!rd.IsZero());
   VIXL_ASSERT(!rn.IsZero());
   VIXL_ASSERT((cond != al) && (cond != nv));
   // The worst case for size is csel immediate:
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction for csel
   MacroEmissionCheckScope guard(this);

   if (operand.IsImmediate()) {
     // Immediate argument. Handle special cases of 0, 1 and -1 using zero
     // register.
     int64_t imm = operand.immediate();
     Register zr = AppropriateZeroRegFor(rn);
     if (imm == 0) {
       csel(rd, rn, zr, cond);
     } else if (imm == 1) {
       csinc(rd, rn, zr, cond);
     } else if (imm == -1) {
       csinv(rd, rn, zr, cond);
     } else {
       UseScratchRegisterScope temps(this);
       Register temp = temps.AcquireSameSizeAs(rn);
       Mov(temp, operand.immediate());
       csel(rd, rn, temp, cond);
     }
   } else if (operand.IsShiftedRegister() && (operand.shift_amount() == 0)) {
     // Unshifted register argument.
     csel(rd, rn, operand.reg(), cond);
   } else {
     // All other arguments.
     UseScratchRegisterScope temps(this);
     Register temp = temps.AcquireSameSizeAs(rn);
     Mov(temp, operand);
     csel(rd, rn, temp, cond);
   }
 }


 void MacroAssembler::Add(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate() && (operand.immediate() < 0) &&
       IsImmAddSub(-operand.immediate())) {
     AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, SUB);
   } else {
     AddSubMacro(rd, rn, operand, LeaveFlags, ADD);
   }
 }


 void MacroAssembler::Adds(const Register& rd,
                           const Register& rn,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate() && (operand.immediate() < 0) &&
       IsImmAddSub(-operand.immediate())) {
     AddSubMacro(rd, rn, -operand.immediate(), SetFlags, SUB);
   } else {
     AddSubMacro(rd, rn, operand, SetFlags, ADD);
   }
 }


 void MacroAssembler::Sub(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate() && (operand.immediate() < 0) &&
       IsImmAddSub(-operand.immediate())) {
     AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, ADD);
   } else {
     AddSubMacro(rd, rn, operand, LeaveFlags, SUB);
   }
 }


 void MacroAssembler::Subs(const Register& rd,
                           const Register& rn,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate() && (operand.immediate() < 0) &&
       IsImmAddSub(-operand.immediate())) {
     AddSubMacro(rd, rn, -operand.immediate(), SetFlags, ADD);
   } else {
     AddSubMacro(rd, rn, operand, SetFlags, SUB);
   }
 }


 void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   Adds(AppropriateZeroRegFor(rn), rn, operand);
 }


 void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   Subs(AppropriateZeroRegFor(rn), rn, operand);
 }


 void MacroAssembler::Fcmp(const FPRegister& fn, double value) {
   VIXL_ASSERT(allow_macro_instructions_);
   // The worst case for size is:
   //  * 1 to materialise the constant, using literal pool if necessary
   //  * 1 instruction for fcmp
   MacroEmissionCheckScope guard(this);
   if (value != 0.0) {
     UseScratchRegisterScope temps(this);
     FPRegister tmp = temps.AcquireSameSizeAs(fn);
     Fmov(tmp, value);
     fcmp(fn, tmp);
   } else {
     fcmp(fn, value);
   }
 }


 void MacroAssembler::Fmov(FPRegister fd, double imm) {
   VIXL_ASSERT(allow_macro_instructions_);
   // Floating point immediates are loaded through the literal pool.
   MacroEmissionCheckScope guard(this);

   if (fd.Is32Bits()) {
     Fmov(fd, static_cast<float>(imm));
     return;
   }

   VIXL_ASSERT(fd.Is64Bits());
   if (IsImmFP64(imm)) {
     fmov(fd, imm);
   } else if ((imm == 0.0) && (copysign(1.0, imm) == 1.0)) {
     fmov(fd, xzr);
   } else {
     RawLiteral* literal = literal_pool_.Add(imm);
     ldr(fd, literal);
   }
 }


 void MacroAssembler::Fmov(FPRegister fd, float imm) {
   VIXL_ASSERT(allow_macro_instructions_);
   // Floating point immediates are loaded through the literal pool.
   MacroEmissionCheckScope guard(this);

   if (fd.Is64Bits()) {
     Fmov(fd, static_cast<double>(imm));
     return;
   }

   VIXL_ASSERT(fd.Is32Bits());
   if (IsImmFP32(imm)) {
     fmov(fd, imm);
   } else if ((imm == 0.0) && (copysign(1.0, imm) == 1.0)) {
     fmov(fd, wzr);
   } else {
     RawLiteral* literal = literal_pool_.Add(imm);
     ldr(fd, literal);
   }
 }


 void MacroAssembler::Neg(const Register& rd,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate()) {
     Mov(rd, -operand.immediate());
   } else {
     Sub(rd, AppropriateZeroRegFor(rd), operand);
   }
 }


 void MacroAssembler::Negs(const Register& rd,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   Subs(rd, AppropriateZeroRegFor(rd), operand);
 }


 bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
                                               int64_t imm) {
   unsigned n, imm_s, imm_r;
   int reg_size = dst.size();

   if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
     // Immediate can be represented in a move zero instruction. Movz can't write
     // to the stack pointer.
     movz(dst, imm);
     return true;
   } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
     // Immediate can be represented in a move negative instruction. Movn can't
     // write to the stack pointer.
     movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
     return true;
   } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
     // Immediate can be represented in a logical orr instruction.
     VIXL_ASSERT(!dst.IsZero());
     LogicalImmediate(dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR);
     return true;
   }
   return false;
 }


 Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
                                                   int64_t imm) {
   int reg_size = dst.size();

   // Encode the immediate in a single move instruction, if possible.
   if (TryOneInstrMoveImmediate(dst, imm)) {
     // The move was successful; nothing to do here.
   } else {
     // Pre-shift the immediate to the least-significant bits of the register.
     int shift_low = CountTrailingZeros(imm, reg_size);
     int64_t imm_low = imm >> shift_low;

     // Pre-shift the immediate to the most-significant bits of the register,
     // inserting set bits in the least-significant bits.
     int shift_high = CountLeadingZeros(imm, reg_size);
     int64_t imm_high = (imm << shift_high) | ((1 << shift_high) - 1);

     if (TryOneInstrMoveImmediate(dst, imm_low)) {
       // The new immediate has been moved into the destination's low bits:
       // return a new leftward-shifting operand.
       return Operand(dst, LSL, shift_low);
     } else if (TryOneInstrMoveImmediate(dst, imm_high)) {
       // The new immediate has been moved into the destination's high bits:
       // return a new rightward-shifting operand.
       return Operand(dst, LSR, shift_high);
     } else {
       Mov(dst, imm);
     }
   }
   return Operand(dst);
 }


 void MacroAssembler::AddSubMacro(const Register& rd,
                                  const Register& rn,
                                  const Operand& operand,
                                  FlagsUpdate S,
                                  AddSubOp op) {
   // Worst case is add/sub immediate:
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction for add/sub
   MacroEmissionCheckScope guard(this);

   if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
       (S == LeaveFlags)) {
     // The instruction would be a nop. Avoid generating useless code.
     return;
   }

   if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) ||
       (rn.IsZero() && !operand.IsShiftedRegister())                ||
       (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
     UseScratchRegisterScope temps(this);
     Register temp = temps.AcquireSameSizeAs(rn);
     if (operand.IsImmediate()) {
       Operand imm_operand =
           MoveImmediateForShiftedOp(temp, operand.immediate());
       AddSub(rd, rn, imm_operand, S, op);
     } else {
       Mov(temp, operand);
       AddSub(rd, rn, temp, S, op);
     }
   } else {
     AddSub(rd, rn, operand, S, op);
   }
 }


 void MacroAssembler::Adc(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
 }


 void MacroAssembler::Adcs(const Register& rd,
                           const Register& rn,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
 }


 void MacroAssembler::Sbc(const Register& rd,
                          const Register& rn,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
 }


 void MacroAssembler::Sbcs(const Register& rd,
                           const Register& rn,
                           const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
 }


 void MacroAssembler::Ngc(const Register& rd,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   Register zr = AppropriateZeroRegFor(rd);
   Sbc(rd, zr, operand);
 }


 void MacroAssembler::Ngcs(const Register& rd,
                          const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
   Register zr = AppropriateZeroRegFor(rd);
   Sbcs(rd, zr, operand);
 }


 void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
                                           const Register& rn,
                                           const Operand& operand,
                                           FlagsUpdate S,
                                           AddSubWithCarryOp op) {
   VIXL_ASSERT(rd.size() == rn.size());
   // Worst case is addc/subc immediate:
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction for add/sub
   MacroEmissionCheckScope guard(this);
   UseScratchRegisterScope temps(this);

   if (operand.IsImmediate() ||
       (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
     // Add/sub with carry (immediate or ROR shifted register.)
     Register temp = temps.AcquireSameSizeAs(rn);
     Mov(temp, operand);
     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
     // Add/sub with carry (shifted register).
     VIXL_ASSERT(operand.reg().size() == rd.size());
     VIXL_ASSERT(operand.shift() != ROR);
     VIXL_ASSERT(is_uintn(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
                     operand.shift_amount()));
     temps.Exclude(operand.reg());
     Register temp = temps.AcquireSameSizeAs(rn);
     EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   } else if (operand.IsExtendedRegister()) {
     // Add/sub with carry (extended register).
     VIXL_ASSERT(operand.reg().size() <= rd.size());
     // Add/sub extended supports a shift <= 4. We want to support exactly the
     // same modes.
     VIXL_ASSERT(operand.shift_amount() <= 4);
     VIXL_ASSERT(operand.reg().Is64Bits() ||
            ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
     temps.Exclude(operand.reg());
     Register temp = temps.AcquireSameSizeAs(rn);
     EmitExtendShift(temp, operand.reg(), operand.extend(),
                     operand.shift_amount());
     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   } else {
     // The addressing mode is directly supported by the instruction.
     AddSubWithCarry(rd, rn, operand, S, op);
   }
 }


 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                         \
 void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) {  \
   VIXL_ASSERT(allow_macro_instructions_);                             \
   LoadStoreMacro(REG, addr, OP);                                      \
 }
 LS_MACRO_LIST(DEFINE_FUNCTION)
 #undef DEFINE_FUNCTION

 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
                                     const MemOperand& addr,
                                     LoadStoreOp op) {
   // Worst case is ldr/str pre/post index:
   //  * 1 instruction for ldr/str
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction to update the base
   MacroEmissionCheckScope guard(this);

   int64_t offset = addr.offset();
   LSDataSize size = CalcLSDataSize(op);

   // Check if an immediate offset fits in the immediate field of the
   // appropriate instruction. If not, emit two instructions to perform
   // the operation.
   if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) &&
       !IsImmLSUnscaled(offset)) {
     // Immediate offset that can't be encoded using unsigned or unscaled
     // addressing modes.
     UseScratchRegisterScope temps(this);
     Register temp = temps.AcquireSameSizeAs(addr.base());
     Mov(temp, addr.offset());
     LoadStore(rt, MemOperand(addr.base(), temp), op);
   } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
     // Post-index beyond unscaled addressing range.
     LoadStore(rt, MemOperand(addr.base()), op);
     Add(addr.base(), addr.base(), Operand(offset));
   } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
     // Pre-index beyond unscaled addressing range.
     Add(addr.base(), addr.base(), Operand(offset));
     LoadStore(rt, MemOperand(addr.base()), op);
   } else {
     // Encodable in one load/store instruction.
     LoadStore(rt, addr, op);
   }
 }


 #define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP)  \
 void MacroAssembler::FN(const REGTYPE REG,           \
                         const REGTYPE REG2,          \
                         const MemOperand& addr) {    \
   VIXL_ASSERT(allow_macro_instructions_);            \
   LoadStorePairMacro(REG, REG2, addr, OP);           \
 }
 LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
 #undef DEFINE_FUNCTION

 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
                                         const CPURegister& rt2,
                                         const MemOperand& addr,
                                         LoadStorePairOp op) {
   // TODO(all): Should we support register offset for load-store-pair?
   VIXL_ASSERT(!addr.IsRegisterOffset());
   // Worst case is ldp/stp immediate:
   //  * 1 instruction for ldp/stp
   //  * up to 4 instructions to materialise the constant
   //  * 1 instruction to update the base
   MacroEmissionCheckScope guard(this);

   int64_t offset = addr.offset();
   LSDataSize size = CalcLSPairDataSize(op);

   // Check if the offset fits in the immediate field of the appropriate
   // instruction. If not, emit two instructions to perform the operation.
   if (IsImmLSPair(offset, size)) {
     // Encodable in one load/store pair instruction.
     LoadStorePair(rt, rt2, addr, op);
   } else {
     Register base = addr.base();
     if (addr.IsImmediateOffset()) {
       UseScratchRegisterScope temps(this);
       Register temp = temps.AcquireSameSizeAs(base);
       Add(temp, base, offset);
       LoadStorePair(rt, rt2, MemOperand(temp), op);
     } else if (addr.IsPostIndex()) {
       LoadStorePair(rt, rt2, MemOperand(base), op);
       Add(base, base, offset);
     } else {
       VIXL_ASSERT(addr.IsPreIndex());
       Add(base, base, offset);
       LoadStorePair(rt, rt2, MemOperand(base), op);
     }
   }
 }

 void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
                           const CPURegister& src2, const CPURegister& src3) {
   VIXL_ASSERT(allow_macro_instructions_);
   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   VIXL_ASSERT(src0.IsValid());

   int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
   int size = src0.SizeInBytes();

   PrepareForPush(count, size);
   PushHelper(count, size, src0, src1, src2, src3);
 }


 void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
                          const CPURegister& dst2, const CPURegister& dst3) {
   // It is not valid to pop into the same register more than once in one
   // instruction, not even into the zero register.
   VIXL_ASSERT(allow_macro_instructions_);
   VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   VIXL_ASSERT(dst0.IsValid());

   int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
   int size = dst0.SizeInBytes();

   PrepareForPop(count, size);
   PopHelper(count, size, dst0, dst1, dst2, dst3);
 }


 void MacroAssembler::PushCPURegList(CPURegList registers) {
   int size = registers.RegisterSizeInBytes();

   PrepareForPush(registers.Count(), size);
   // Push up to four registers at a time because if the current stack pointer is
   // sp and reg_size is 32, registers must be pushed in blocks of four in order
   // to maintain the 16-byte alignment for sp.
   VIXL_ASSERT(allow_macro_instructions_);
   while (!registers.IsEmpty()) {
     int count_before = registers.Count();
     const CPURegister& src0 = registers.PopHighestIndex();
     const CPURegister& src1 = registers.PopHighestIndex();
     const CPURegister& src2 = registers.PopHighestIndex();
     const CPURegister& src3 = registers.PopHighestIndex();
     int count = count_before - registers.Count();
     PushHelper(count, size, src0, src1, src2, src3);
   }
 }


 void MacroAssembler::PopCPURegList(CPURegList registers) {
   int size = registers.RegisterSizeInBytes();

   PrepareForPop(registers.Count(), size);
   // Pop up to four registers at a time because if the current stack pointer is
   // sp and reg_size is 32, registers must be pushed in blocks of four in order
   // to maintain the 16-byte alignment for sp.
   VIXL_ASSERT(allow_macro_instructions_);
   while (!registers.IsEmpty()) {
     int count_before = registers.Count();
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     const CPURegister& dst2 = registers.PopLowestIndex();
     const CPURegister& dst3 = registers.PopLowestIndex();
     int count = count_before - registers.Count();
     PopHelper(count, size, dst0, dst1, dst2, dst3);
   }
 }


 void MacroAssembler::PushMultipleTimes(int count, Register src) {
   VIXL_ASSERT(allow_macro_instructions_);
   int size = src.SizeInBytes();

   PrepareForPush(count, size);
   // Push up to four registers at a time if possible because if the current
   // stack pointer is sp and the register size is 32, registers must be pushed
   // in blocks of four in order to maintain the 16-byte alignment for sp.
   while (count >= 4) {
     PushHelper(4, size, src, src, src, src);
     count -= 4;
   }
   if (count >= 2) {
     PushHelper(2, size, src, src, NoReg, NoReg);
     count -= 2;
   }
   if (count == 1) {
     PushHelper(1, size, src, NoReg, NoReg, NoReg);
     count -= 1;
   }
   VIXL_ASSERT(count == 0);
 }


 void MacroAssembler::PushHelper(int count, int size,
                                 const CPURegister& src0,
                                 const CPURegister& src1,
                                 const CPURegister& src2,
                                 const CPURegister& src3) {
   // Ensure that we don't unintentionally modify scratch or debug registers.
   // Worst case for size is 2 stp.
   InstructionAccurateScope scope(this, 2,
                                  InstructionAccurateScope::kMaximumSize);

   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   VIXL_ASSERT(size == src0.SizeInBytes());

   // When pushing multiple registers, the store order is chosen such that
   // Push(a, b) is equivalent to Push(a) followed by Push(b).
   switch (count) {
     case 1:
       VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
       str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
       break;
     case 2:
       VIXL_ASSERT(src2.IsNone() && src3.IsNone());
       stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
       break;
     case 3:
       VIXL_ASSERT(src3.IsNone());
       stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
       str(src0, MemOperand(StackPointer(), 2 * size));
       break;
     case 4:
       // Skip over 4 * size, then fill in the gap. This allows four W registers
       // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
       // all times.
       stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
       stp(src1, src0, MemOperand(StackPointer(), 2 * size));
       break;
     default:
       VIXL_UNREACHABLE();
   }
 }


 void MacroAssembler::PopHelper(int count, int size,
                                const CPURegister& dst0,
                                const CPURegister& dst1,
                                const CPURegister& dst2,
                                const CPURegister& dst3) {
   // Ensure that we don't unintentionally modify scratch or debug registers.
   // Worst case for size is 2 ldp.
   InstructionAccurateScope scope(this, 2,
                                  InstructionAccurateScope::kMaximumSize);

   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   VIXL_ASSERT(size == dst0.SizeInBytes());

   // When popping multiple registers, the load order is chosen such that
   // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
   switch (count) {
     case 1:
       VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
       ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
       break;
     case 2:
       VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
       ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
       break;
     case 3:
       VIXL_ASSERT(dst3.IsNone());
       ldr(dst2, MemOperand(StackPointer(), 2 * size));
       ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
       break;
     case 4:
       // Load the higher addresses first, then load the lower addresses and skip
       // the whole block in the second instruction. This allows four W registers
       // to be popped using sp, whilst maintaining 16-byte alignment for sp at
       // all times.
       ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
       ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
       break;
     default:
       VIXL_UNREACHABLE();
   }
 }


 void MacroAssembler::PrepareForPush(int count, int size) {
   if (sp.Is(StackPointer())) {
     // If the current stack pointer is sp, then it must be aligned to 16 bytes
     // on entry and the total size of the specified registers must also be a
     // multiple of 16 bytes.
     VIXL_ASSERT((count * size) % 16 == 0);
   } else {
     // Even if the current stack pointer is not the system stack pointer (sp),
     // the system stack pointer will still be modified in order to comply with
     // ABI rules about accessing memory below the system stack pointer.
     BumpSystemStackPointer(count * size);
   }
 }


 void MacroAssembler::PrepareForPop(int count, int size) {
   USE(count);
   USE(size);
   if (sp.Is(StackPointer())) {
     // If the current stack pointer is sp, then it must be aligned to 16 bytes
     // on entry and the total size of the specified registers must also be a
     // multiple of 16 bytes.
     VIXL_ASSERT((count * size) % 16 == 0);
   }
 }

 void MacroAssembler::Poke(const Register& src, const Operand& offset) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (offset.IsImmediate()) {
     VIXL_ASSERT(offset.immediate() >= 0);
   }

   Str(src, MemOperand(StackPointer(), offset));
 }


 void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
   VIXL_ASSERT(allow_macro_instructions_);
   if (offset.IsImmediate()) {
     VIXL_ASSERT(offset.immediate() >= 0);
   }

   Ldr(dst, MemOperand(StackPointer(), offset));
 }


 void MacroAssembler::PeekCPURegList(CPURegList registers, int offset) {
   VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
   VIXL_ASSERT(offset >= 0);
   int size = registers.RegisterSizeInBytes();

   while (registers.Count() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
     offset += 2 * size;
   }
   if (!registers.IsEmpty()) {
     Ldr(registers.PopLowestIndex(),
         MemOperand(StackPointer(), offset));
   }
 }


 void MacroAssembler::PokeCPURegList(CPURegList registers, int offset) {
   VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
   VIXL_ASSERT(offset >= 0);
   int size = registers.RegisterSizeInBytes();

   while (registers.Count() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     Stp(dst0, dst1, MemOperand(StackPointer(), offset));
     offset += 2 * size;
   }
   if (!registers.IsEmpty()) {
     Str(registers.PopLowestIndex(),
         MemOperand(StackPointer(), offset));
   }
 }


 void MacroAssembler::Claim(const Operand& size) {
   VIXL_ASSERT(allow_macro_instructions_);

   if (size.IsZero()) {
     return;
   }

   if (size.IsImmediate()) {
     VIXL_ASSERT(size.immediate() > 0);
     if (sp.Is(StackPointer())) {
       VIXL_ASSERT((size.immediate() % 16) == 0);
     }
   }

   if (!sp.Is(StackPointer())) {
     BumpSystemStackPointer(size);
   }

   Sub(StackPointer(), StackPointer(), size);
 }


 void MacroAssembler::Drop(const Operand& size) {
   VIXL_ASSERT(allow_macro_instructions_);

   if (size.IsZero()) {
     return;
   }

   if (size.IsImmediate()) {
     VIXL_ASSERT(size.immediate() > 0);
     if (sp.Is(StackPointer())) {
       VIXL_ASSERT((size.immediate() % 16) == 0);
     }
   }

   Add(StackPointer(), StackPointer(), size);
 }


 void MacroAssembler::PushCalleeSavedRegisters() {
   // Ensure that the macro-assembler doesn't use any scratch registers.
   // 10 stp will be emitted.
   // TODO(all): Should we use GetCalleeSaved and SavedFP.
   InstructionAccurateScope scope(this, 10);

   // This method must not be called unless the current stack pointer is sp.
   VIXL_ASSERT(sp.Is(StackPointer()));

   MemOperand tos(sp, -2 * kXRegSizeInBytes, PreIndex);

   stp(x29, x30, tos);
   stp(x27, x28, tos);
   stp(x25, x26, tos);
   stp(x23, x24, tos);
   stp(x21, x22, tos);
   stp(x19, x20, tos);

   stp(d14, d15, tos);
   stp(d12, d13, tos);
   stp(d10, d11, tos);
   stp(d8, d9, tos);
 }


 void MacroAssembler::PopCalleeSavedRegisters() {
   // Ensure that the macro-assembler doesn't use any scratch registers.
   // 10 ldp will be emitted.
   // TODO(all): Should we use GetCalleeSaved and SavedFP.
   InstructionAccurateScope scope(this, 10);

   // This method must not be called unless the current stack pointer is sp.
   VIXL_ASSERT(sp.Is(StackPointer()));

   MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);

   ldp(d8, d9, tos);
   ldp(d10, d11, tos);
   ldp(d12, d13, tos);
   ldp(d14, d15, tos);

   ldp(x19, x20, tos);
   ldp(x21, x22, tos);
   ldp(x23, x24, tos);
   ldp(x25, x26, tos);
   ldp(x27, x28, tos);
   ldp(x29, x30, tos);
 }

 void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
   VIXL_ASSERT(!sp.Is(StackPointer()));
   // TODO: Several callers rely on this not using scratch registers, so we use
   // the assembler directly here. However, this means that large immediate
   // values of 'space' cannot be handled.
   InstructionAccurateScope scope(this, 1);
   sub(sp, StackPointer(), space);
 }


 // This is the main Printf implementation. All callee-saved registers are
 // preserved, but NZCV and the caller-saved registers may be clobbered.
 void MacroAssembler::PrintfNoPreserve(const char * format,
                                       const CPURegister& arg0,
                                       const CPURegister& arg1,
                                       const CPURegister& arg2,
                                       const CPURegister& arg3) {
   // We cannot handle a caller-saved stack pointer. It doesn't make much sense
   // in most cases anyway, so this restriction shouldn't be too serious.
   VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));

   // The provided arguments, and their proper PCS registers.
   CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
   CPURegister pcs[kPrintfMaxArgCount];

   int arg_count = kPrintfMaxArgCount;

   // The PCS varargs registers for printf. Note that x0 is used for the printf
   // format string.
   static const CPURegList kPCSVarargs =
       CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count);
   static const CPURegList kPCSVarargsFP =
       CPURegList(CPURegister::kFPRegister, kDRegSize, 0, arg_count - 1);

   // We can use caller-saved registers as scratch values, except for the
   // arguments and the PCS registers where they might need to go.
   UseScratchRegisterScope temps(this);
   temps.Include(kCallerSaved);
   temps.Include(kCallerSavedFP);
   temps.Exclude(kPCSVarargs);
   temps.Exclude(kPCSVarargsFP);
   temps.Exclude(arg0, arg1, arg2, arg3);

   // Copies of the arg lists that we can iterate through.
   CPURegList pcs_varargs = kPCSVarargs;
   CPURegList pcs_varargs_fp = kPCSVarargsFP;

   // Place the arguments. There are lots of clever tricks and optimizations we
   // could use here, but Printf is a debug tool so instead we just try to keep
   // it simple: Move each input that isn't already in the right place to a
   // scratch register, then move everything back.
   for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
     // Work out the proper PCS register for this argument.
     if (args[i].IsRegister()) {
       pcs[i] = pcs_varargs.PopLowestIndex().X();
       // We might only need a W register here. We need to know the size of the
       // argument so we can properly encode it for the simulator call.
       if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
     } else if (args[i].IsFPRegister()) {
       // In C, floats are always cast to doubles for varargs calls.
       pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
     } else {
       VIXL_ASSERT(args[i].IsNone());
       arg_count = i;
       break;
     }

     // If the argument is already in the right place, leave it where it is.
     if (args[i].Aliases(pcs[i])) continue;

     // Otherwise, if the argument is in a PCS argument register, allocate an
     // appropriate scratch register and then move it out of the way.
     if (kPCSVarargs.IncludesAliasOf(args[i]) ||
         kPCSVarargsFP.IncludesAliasOf(args[i])) {
       if (args[i].IsRegister()) {
         Register old_arg = Register(args[i]);
         Register new_arg = temps.AcquireSameSizeAs(old_arg);
         Mov(new_arg, old_arg);
         args[i] = new_arg;
       } else {
         FPRegister old_arg = FPRegister(args[i]);
         FPRegister new_arg = temps.AcquireSameSizeAs(old_arg);
         Fmov(new_arg, old_arg);
         args[i] = new_arg;
       }
     }
   }

   // Do a second pass to move values into their final positions and perform any
   // conversions that may be required.
   for (int i = 0; i < arg_count; i++) {
     VIXL_ASSERT(pcs[i].type() == args[i].type());
     if (pcs[i].IsRegister()) {
       Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
     } else {
       VIXL_ASSERT(pcs[i].IsFPRegister());
       if (pcs[i].size() == args[i].size()) {
         Fmov(FPRegister(pcs[i]), FPRegister(args[i]));
       } else {
         Fcvt(FPRegister(pcs[i]), FPRegister(args[i]));
       }
     }
   }

   // Load the format string into x0, as per the procedure-call standard.
   //
   // To make the code as portable as possible, the format string is encoded
   // directly in the instruction stream. It might be cleaner to encode it in a
   // literal pool, but since Printf is usually used for debugging, it is
   // beneficial for it to be minimally dependent on other features.
   temps.Exclude(x0);
   Label format_address;
   Adr(x0, &format_address);

   // Emit the format string directly in the instruction stream.
   {
     BlockLiteralPoolScope scope(this);
     // Data emitted:
     //   branch
     //   strlen(format) + 1 (includes null termination)
     //   padding to next instruction
     //   unreachable
     EmissionCheckScope guard(
         this,
         AlignUp(strlen(format) + 1, kInstructionSize) + 2 * kInstructionSize);
     Label after_data;
     B(&after_data);
     Bind(&format_address);
     EmitString(format);
     Unreachable();
     Bind(&after_data);
   }

   // We don't pass any arguments on the stack, but we still need to align the C
   // stack pointer to a 16-byte boundary for PCS compliance.
   if (!sp.Is(StackPointer())) {
     Bic(sp, StackPointer(), 0xf);
   }

   // Actually call printf. This part needs special handling for the simulator,
   // since the system printf function will use a different instruction set and
   // the procedure-call standard will not be compatible.
 #ifdef USE_SIMULATOR
   {
     InstructionAccurateScope scope(this, kPrintfLength / kInstructionSize);
     hlt(kPrintfOpcode);
     dc32(arg_count);          // kPrintfArgCountOffset

     // Determine the argument pattern.
     uint32_t arg_pattern_list = 0;
     for (int i = 0; i < arg_count; i++) {
       uint32_t arg_pattern;
       if (pcs[i].IsRegister()) {
         arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
       } else {
         VIXL_ASSERT(pcs[i].Is64Bits());
         arg_pattern = kPrintfArgD;
       }
       VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits));
       arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
     }
     dc32(arg_pattern_list);   // kPrintfArgPatternListOffset
   }
 #else
   Register tmp = temps.AcquireX();
   Mov(tmp, reinterpret_cast<uintptr_t>(printf));
   Blr(tmp);
 #endif
 }


 void MacroAssembler::Printf(const char * format,
                             CPURegister arg0,
                             CPURegister arg1,
                             CPURegister arg2,
                             CPURegister arg3) {
   // We can only print sp if it is the current stack pointer.
   if (!sp.Is(StackPointer())) {
     VIXL_ASSERT(!sp.Aliases(arg0));
     VIXL_ASSERT(!sp.Aliases(arg1));
     VIXL_ASSERT(!sp.Aliases(arg2));
     VIXL_ASSERT(!sp.Aliases(arg3));
   }

   // Make sure that the macro assembler doesn't try to use any of our arguments
   // as scratch registers.
   UseScratchRegisterScope exclude_all(this);
   exclude_all.ExcludeAll();

   // Preserve all caller-saved registers as well as NZCV.
   // If sp is the stack pointer, PushCPURegList asserts that the size of each
   // list is a multiple of 16 bytes.
   PushCPURegList(kCallerSaved);
   PushCPURegList(kCallerSavedFP);

   { UseScratchRegisterScope temps(this);
     // We can use caller-saved registers as scratch values (except for argN).
     temps.Include(kCallerSaved);
     temps.Include(kCallerSavedFP);
     temps.Exclude(arg0, arg1, arg2, arg3);

     // If any of the arguments are the current stack pointer, allocate a new
     // register for them, and adjust the value to compensate for pushing the
     // caller-saved registers.
     bool arg0_sp = StackPointer().Aliases(arg0);
     bool arg1_sp = StackPointer().Aliases(arg1);
     bool arg2_sp = StackPointer().Aliases(arg2);
     bool arg3_sp = StackPointer().Aliases(arg3);
     if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
       // Allocate a register to hold the original stack pointer value, to pass
       // to PrintfNoPreserve as an argument.
       Register arg_sp = temps.AcquireX();
       Add(arg_sp, StackPointer(),
           kCallerSaved.TotalSizeInBytes() + kCallerSavedFP.TotalSizeInBytes());
       if (arg0_sp) arg0 = Register(arg_sp.code(), arg0.size());
       if (arg1_sp) arg1 = Register(arg_sp.code(), arg1.size());
       if (arg2_sp) arg2 = Register(arg_sp.code(), arg2.size());
       if (arg3_sp) arg3 = Register(arg_sp.code(), arg3.size());
     }

     // Preserve NZCV.
     Register tmp = temps.AcquireX();
     Mrs(tmp, NZCV);
     Push(tmp, xzr);
     temps.Release(tmp);

     PrintfNoPreserve(format, arg0, arg1, arg2, arg3);

     // Restore NZCV.
     tmp = temps.AcquireX();
     Pop(xzr, tmp);
     Msr(NZCV, tmp);
     temps.Release(tmp);
   }

   PopCPURegList(kCallerSavedFP);
   PopCPURegList(kCallerSaved);
 }

 void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
   VIXL_ASSERT(allow_macro_instructions_);

 #ifdef USE_SIMULATOR
   // The arguments to the trace pseudo instruction need to be contiguous in
   // memory, so make sure we don't try to emit a literal pool.
   InstructionAccurateScope scope(this, kTraceLength / kInstructionSize);

   Label start;
   bind(&start);

   // Refer to instructions-a64.h for a description of the marker and its
   // arguments.
   hlt(kTraceOpcode);

   VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
   dc32(parameters);

   VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
   dc32(command);
 #else
   // Emit nothing on real hardware.
   USE(parameters);
   USE(command);
 #endif
 }


 void MacroAssembler::Log(TraceParameters parameters) {
   VIXL_ASSERT(allow_macro_instructions_);

 #ifdef USE_SIMULATOR
   // The arguments to the log pseudo instruction need to be contiguous in
   // memory, so make sure we don't try to emit a literal pool.
   InstructionAccurateScope scope(this, kLogLength / kInstructionSize);

   Label start;
   bind(&start);

   // Refer to instructions-a64.h for a description of the marker and its
   // arguments.
   hlt(kLogOpcode);

   VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
   dc32(parameters);
 #else
   // Emit nothing on real hardware.
   USE(parameters);
 #endif
 }


 void MacroAssembler::EnableInstrumentation() {
   VIXL_ASSERT(!isprint(InstrumentStateEnable));
   InstructionAccurateScope scope(this, 1);
   movn(xzr, InstrumentStateEnable);
 }


 void MacroAssembler::DisableInstrumentation() {
   VIXL_ASSERT(!isprint(InstrumentStateDisable));
   InstructionAccurateScope scope(this, 1);
   movn(xzr, InstrumentStateDisable);
 }


 void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
   VIXL_ASSERT(strlen(marker_name) == 2);

   // We allow only printable characters in the marker names. Unprintable
   // characters are reserved for controlling features of the instrumentation.
   VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));

   InstructionAccurateScope scope(this, 1);
   movn(xzr, (marker_name[1] << 8) | marker_name[0]);
 }


 UseScratchRegisterScope::~UseScratchRegisterScope() {
   available_->set_list(old_available_);
   availablefp_->set_list(old_availablefp_);
 }


 bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
   return available_->IncludesAliasOf(reg) || availablefp_->IncludesAliasOf(reg);
 }


 Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) {
   int code = AcquireNextAvailable(available_).code();
   return Register(code, reg.size());
 }


 FPRegister UseScratchRegisterScope::AcquireSameSizeAs(const FPRegister& reg) {
   int code = AcquireNextAvailable(availablefp_).code();
   return FPRegister(code, reg.size());
 }


 void UseScratchRegisterScope::Release(const CPURegister& reg) {
   if (reg.IsRegister()) {
     ReleaseByCode(available_, reg.code());
   } else if (reg.IsFPRegister()) {
     ReleaseByCode(availablefp_, reg.code());
   } else {
     VIXL_ASSERT(reg.IsNone());
   }
 }


 void UseScratchRegisterScope::Include(const CPURegList& list) {
   if (list.type() == CPURegister::kRegister) {
     // Make sure that neither sp nor xzr are included the list.
     IncludeByRegList(available_, list.list() & ~(xzr.Bit() | sp.Bit()));
   } else {
     VIXL_ASSERT(list.type() == CPURegister::kFPRegister);
     IncludeByRegList(availablefp_, list.list());
   }
 }


 void UseScratchRegisterScope::Include(const Register& reg1,
                                       const Register& reg2,
                                       const Register& reg3,
                                       const Register& reg4) {
   RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
   // Make sure that neither sp nor xzr are included the list.
   include &= ~(xzr.Bit() | sp.Bit());

   IncludeByRegList(available_, include);
 }


 void UseScratchRegisterScope::Include(const FPRegister& reg1,
                                       const FPRegister& reg2,
                                       const FPRegister& reg3,
                                       const FPRegister& reg4) {
   RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
   IncludeByRegList(availablefp_, include);
 }


 void UseScratchRegisterScope::Exclude(const CPURegList& list) {
   if (list.type() == CPURegister::kRegister) {
     ExcludeByRegList(available_, list.list());
   } else {
     VIXL_ASSERT(list.type() == CPURegister::kFPRegister);
     ExcludeByRegList(availablefp_, list.list());
   }
 }


 void UseScratchRegisterScope::Exclude(const Register& reg1,
                                       const Register& reg2,
                                       const Register& reg3,
                                       const Register& reg4) {
   RegList exclude = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
   ExcludeByRegList(available_, exclude);
 }


 void UseScratchRegisterScope::Exclude(const FPRegister& reg1,
                                       const FPRegister& reg2,
                                       const FPRegister& reg3,
                                       const FPRegister& reg4) {
   RegList excludefp = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
   ExcludeByRegList(availablefp_, excludefp);
 }


 void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
                                       const CPURegister& reg2,
                                       const CPURegister& reg3,
                                       const CPURegister& reg4) {
   RegList exclude = 0;
   RegList excludefp = 0;

   const CPURegister regs[] = {reg1, reg2, reg3, reg4};

   for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) {
     if (regs[i].IsRegister()) {
       exclude |= regs[i].Bit();
     } else if (regs[i].IsFPRegister()) {
       excludefp |= regs[i].Bit();
     } else {
       VIXL_ASSERT(regs[i].IsNone());
     }
   }

   ExcludeByRegList(available_, exclude);
   ExcludeByRegList(availablefp_, excludefp);
 }


 void UseScratchRegisterScope::ExcludeAll() {
   ExcludeByRegList(available_, available_->list());
   ExcludeByRegList(availablefp_, availablefp_->list());
 }


 CPURegister UseScratchRegisterScope::AcquireNextAvailable(
     CPURegList* available) {
   VIXL_CHECK(!available->IsEmpty());
   CPURegister result = available->PopLowestIndex();
   VIXL_ASSERT(!AreAliased(result, xzr, sp));
   return result;
 }


 void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
   ReleaseByRegList(available, static_cast<RegList>(1) << code);
 }


 void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
                                                RegList regs) {
   available->set_list(available->list() | regs);
 }


 void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
                                                RegList regs) {
   available->set_list(available->list() | regs);
 }


 void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
                                                RegList exclude) {
   available->set_list(available->list() & ~exclude);
 }

 }  // namespace vixl