blob: 92d446080b73130091e4e075fd92ea709a60e8d8 [file] [log] [blame]
//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// AArch64 Instruction definitions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
//
// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<0>,
SDTCisVT<3, i32>]>;
// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
def SDT_AArch64Brcond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>]>;
def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, OtherVT>]>;
def SDT_AArch64CSel : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
SDTCisVT<4, i32>]>;
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
[SDTCisFP<0>,
SDTCisSameAs<0, 1>]>;
def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisInt<2>, SDTCisInt<3>]>;
def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisSameAs<0,3>]>;
def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
// ldr x1, [x0, #:tlsdesc_lo12:var]
// add x0, x0, #:tlsdesc_lo12:var
// .tlsdesccall var
// blr x1
// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
// number of operands (the variable)
def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
[SDTCisPtrTy<0>]>;
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
SDTCisSameAs<1, 4>]>;
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
SDCallSeqStart<[ SDTCisVT<0, i32> ]>,
[SDNPHasChain, SDNPOutGlue]>;
def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",
SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def AArch64call : SDNode<"AArch64ISD::CALL",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
[SDNPHasChain]>;
def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
[SDNPHasChain]>;
def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
[SDNPHasChain]>;
def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
[SDNPHasChain]>;
def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
[SDNPHasChain]>;
def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
[SDNPCommutative]>;
def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
[SDNPCommutative]>;
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
(AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
[SDNPHasChain, SDNPSideEffect]>;
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
SDT_AArch64TLSDescCallSeq,
[SDNPInGlue, SDNPOutGlue, SDNPHasChain,
SDNPVariadic]>;
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AArch64 Instruction Predicate Definitions.
//
def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
def ForCodeSize : Predicate<"ForCodeSize">;
def NotForCodeSize : Predicate<"!ForCodeSize">;
include "AArch64InstrFormats.td"
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Miscellaneous instructions.
//===----------------------------------------------------------------------===//
let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
[(AArch64callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
let isReMaterializable = 1, isCodeGenOnly = 1 in {
// FIXME: The following pseudo instructions are only needed because remat
// cannot handle multiple instructions. When that changes, they can be
// removed, along with the AArch64Wrapper node.
let AddedComplexity = 10 in
def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
[(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
Sched<[WriteLDAdr]>;
// The MOVaddr instruction should match only when the add is not folded
// into a load or store address.
def MOVaddr
: Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
tglobaladdr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrJT
: Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
tjumptable:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrCP
: Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
tconstpool:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrBA
: Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
tblockaddress:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrTLS
: Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
tglobaltlsaddr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrEXT
: Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
texternalsym:$low))]>,
Sched<[WriteAdrAdr]>;
} // isReMaterializable, isCodeGenOnly
def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
(LOADgot tglobaltlsaddr:$addr)>;
def : Pat<(AArch64LOADgot texternalsym:$addr),
(LOADgot texternalsym:$addr)>;
def : Pat<(AArch64LOADgot tconstpool:$addr),
(LOADgot tconstpool:$addr)>;
//===----------------------------------------------------------------------===//
// System instructions.
//===----------------------------------------------------------------------===//
def HINT : HintI<"hint">;
def : InstAlias<"nop", (HINT 0b000)>;
def : InstAlias<"yield",(HINT 0b001)>;
def : InstAlias<"wfe", (HINT 0b010)>;
def : InstAlias<"wfi", (HINT 0b011)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
// model patterns with sufficiently fine granularity.
let mayLoad = ?, mayStore = ? in {
def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
[(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
[(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
def ISB : CRmSystemI<barrier_op, 0b110, "isb",
[(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
}
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
def MRS : MRSI;
def MSR : MSRI;
def MSRpstate: MSRpstateI;
// The thread pointer (on Linux, at least, where this has been implemented) is
// TPIDR_EL0.
def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
// Generic system instructions
def SYSxt : SystemXtI<0, "sys">;
def SYSLxt : SystemLXtI<1, "sysl">;
def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
(SYSxt imm0_7:$op1, sys_cr_op:$Cn,
sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
//===----------------------------------------------------------------------===//
// Move immediate instructions.
//===----------------------------------------------------------------------===//
defm MOVK : InsertImmediate<0b11, "movk">;
defm MOVN : MoveImmediate<0b00, "movn">;
let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : MoveImmediate<0b10, "movz">;
// First group of aliases covers an implicit "lsl #0".
def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>;
def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>;
def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>;
// Final group of aliases covers true "mov $Rd, $imm" cases.
multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
int width, int shift> {
def _asmoperand : AsmOperandClass {
let Name = basename # width # "_lsl" # shift # "MovAlias";
let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
# shift # ">";
let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
}
def _movimm : Operand<i32> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
}
def : InstAlias<"mov $Rd, $imm",
(INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
}
defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
isAsCheapAsAMove = 1 in {
// FIXME: The following pseudo instructions are only needed because remat
// cannot handle multiple instructions. When that changes, we can select
// directly to the real instructions and get rid of these pseudos.
def MOVi32imm
: Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
[(set GPR32:$dst, imm:$src)]>,
Sched<[WriteImm]>;
def MOVi64imm
: Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
[(set GPR64:$dst, imm:$src)]>,
Sched<[WriteImm]>;
} // isReMaterializable, isCodeGenOnly
// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
// eventual expansion code fewer bits to worry about getting right. Marshalling
// the types is a little tricky though:
def i64imm_32bit : ImmLeaf<i64, [{
return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
}]>;
def trunc_imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32);
}]>;
def : Pat<(i64 i64imm_32bit:$src),
(SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
}]>;
def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
}]>;
def : Pat<(f32 fpimm:$in),
(COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
def : Pat<(f64 fpimm:$in),
(COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
tglobaladdr:$g1, tglobaladdr:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
tglobaladdr:$g2, 32),
tglobaladdr:$g1, 16),
tglobaladdr:$g0, 0)>;
def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
tblockaddress:$g1, tblockaddress:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
tblockaddress:$g2, 32),
tblockaddress:$g1, 16),
tblockaddress:$g0, 0)>;
def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
tconstpool:$g1, tconstpool:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
tconstpool:$g2, 32),
tconstpool:$g1, 16),
tconstpool:$g0, 0)>;
def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
tjumptable:$g1, tjumptable:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
tjumptable:$g2, 32),
tjumptable:$g1, 16),
tjumptable:$g0, 0)>;
//===----------------------------------------------------------------------===//
// Arithmetic instructions.
//===----------------------------------------------------------------------===//
// Add/subtract with carry.
defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
// Add/subtract
defm ADD : AddSub<0, "add", add>;
defm SUB : AddSub<1, "sub">;
def : InstAlias<"mov $dst, $src",
(ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
(SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
(SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
(SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
(SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
(SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
(SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
(SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
(SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
// Because of the immediate format for add/sub-imm instructions, the
// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
// These patterns capture that transformation.
let AddedComplexity = 1 in {
def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
}
// Because of the immediate format for add/sub-imm instructions, the
// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
// These patterns capture that transformation.
let AddedComplexity = 1 in {
def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
}
def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
def : InstAlias<"neg $dst, $src$shift",
(SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
def : InstAlias<"neg $dst, $src$shift",
(SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
def : InstAlias<"negs $dst, $src$shift",
(SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
def : InstAlias<"negs $dst, $src$shift",
(SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
// Unsigned/Signed divide
defm UDIV : Div<0, "udiv", udiv>;
defm SDIV : Div<1, "sdiv", sdiv>;
let isCodeGenOnly = 1 in {
defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>;
defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>;
}
// Variable shift
defm ASRV : Shift<0b10, "asr", sra>;
defm LSLV : Shift<0b00, "lsl", shl>;
defm LSRV : Shift<0b01, "lsr", srl>;
defm RORV : Shift<0b11, "ror", rotr>;
def : ShiftAlias<"asrv", ASRVWr, GPR32>;
def : ShiftAlias<"asrv", ASRVXr, GPR64>;
def : ShiftAlias<"lslv", LSLVWr, GPR32>;
def : ShiftAlias<"lslv", LSLVXr, GPR64>;
def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
def : ShiftAlias<"rorv", RORVWr, GPR32>;
def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
let AddedComplexity = 7 in {
defm MADD : MulAccum<0, "madd", add>;
defm MSUB : MulAccum<1, "msub", sub>;
def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
(MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
(MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
} // AddedComplexity = 7
let AddedComplexity = 5 in {
def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
(SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
(UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
(SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
(UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
} // AddedComplexity = 5
def : MulAccumWAlias<"mul", MADDWrrr>;
def : MulAccumXAlias<"mul", MADDXrrr>;
def : MulAccumWAlias<"mneg", MSUBWrrr>;
def : MulAccumXAlias<"mneg", MSUBXrrr>;
def : WideMulAccumAlias<"smull", SMADDLrrr>;
def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
def : WideMulAccumAlias<"umull", UMADDLrrr>;
def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
// Multiply-high
def SMULHrr : MulHi<0b010, "smulh", mulhs>;
def UMULHrr : MulHi<0b110, "umulh", mulhu>;
// CRC32
def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
//===----------------------------------------------------------------------===//
// Logical instructions.
//===----------------------------------------------------------------------===//
// (immediate)
defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
defm AND : LogicalImm<0b00, "and", and, "bic">;
defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
defm ORR : LogicalImm<0b01, "orr", or, "orn">;
// FIXME: these aliases *are* canonical sometimes (when movz can't be
// used). Actually, it seems to be working right now, but putting logical_immXX
// here is a bit dodgy on the AsmParser side too.
def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
logical_imm32:$imm), 0>;
def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
logical_imm64:$imm), 0>;
// (register)
defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
defm BICS : LogicalRegS<0b11, 1, "bics",
BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
defm AND : LogicalReg<0b00, 0, "and", and>;
defm BIC : LogicalReg<0b00, 1, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
defm EON : LogicalReg<0b10, 1, "eon",
BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
defm ORN : LogicalReg<0b01, 1, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
defm ORR : LogicalReg<0b01, 0, "orr", or>;
def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
def : InstAlias<"mvn $Wd, $Wm$sh",
(ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
def : InstAlias<"mvn $Xd, $Xm$sh",
(ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
def : InstAlias<"tst $src1, $src2",
(ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
def : InstAlias<"tst $src1, $src2$sh",
(ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
def : InstAlias<"tst $src1, $src2$sh",
(ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
//===----------------------------------------------------------------------===//
// One operand data processing instructions.
//===----------------------------------------------------------------------===//
defm CLS : OneOperandData<0b101, "cls">;
defm CLZ : OneOperandData<0b100, "clz", ctlz>;
defm RBIT : OneOperandData<0b000, "rbit">;
def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>;
def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>;
def REV16Wr : OneWRegData<0b001, "rev16",
UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
def : Pat<(cttz GPR32:$Rn),
(CLZWr (RBITWr GPR32:$Rn))>;
def : Pat<(cttz GPR64:$Rn),
(CLZXr (RBITXr GPR64:$Rn))>;
def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
(i32 1))),
(CLSWr GPR32:$Rn)>;
def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
(i64 1))),
(CLSXr GPR64:$Rn)>;
// Unlike the other one operand instructions, the instructions with the "rev"
// mnemonic do *not* just different in the size bit, but actually use different
// opcode bits for the different sizes.
def REVWr : OneWRegData<0b010, "rev", bswap>;
def REVXr : OneXRegData<0b011, "rev", bswap>;
def REV32Xr : OneXRegData<0b010, "rev32",
UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
// The bswap commutes with the rotr so we want a pattern for both possible
// orders.
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in
defm EXTR : ExtractImm<"extr">;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
(EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
(EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
//===----------------------------------------------------------------------===//
// Other bitfield immediate instructions.
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
defm SBFM : BitfieldImm<0b00, "sbfm">;
defm UBFM : BitfieldImm<0b10, "ubfm">;
}
def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
// min(7, 31 - shift_amt)
def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
// min(15, 31 - shift_amt)
def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
// min(7, 63 - shift_amt)
def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
// min(15, 63 - shift_amt)
def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
// min(31, 63 - shift_amt)
def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 31 ? 31 : enc;
return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
(UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
(i64 (i32shift_b imm0_31:$imm)))>;
def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
(UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_b imm0_63:$imm)))>;
let AddedComplexity = 10 in {
def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
(SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
(SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
}
def : InstAlias<"asr $dst, $src, $shift",
(SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
def : InstAlias<"asr $dst, $src, $shift",
(SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
(UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
(UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
def : InstAlias<"lsr $dst, $src, $shift",
(UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
def : InstAlias<"lsr $dst, $src, $shift",
(UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
//===----------------------------------------------------------------------===//
// Conditionally set flags instructions.
//===----------------------------------------------------------------------===//
defm CCMN : CondSetFlagsImm<0, "ccmn">;
defm CCMP : CondSetFlagsImm<1, "ccmp">;
defm CCMN : CondSetFlagsReg<0, "ccmn">;
defm CCMP : CondSetFlagsReg<1, "ccmp">;
//===----------------------------------------------------------------------===//
// Conditional select instructions.
//===----------------------------------------------------------------------===//
defm CSEL : CondSelect<0, 0b00, "csel">;
def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
(CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
(CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
(CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
(CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
(CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr WZR, WZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
(CSINCXr XZR, XZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
(CSINVWr WZR, WZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
(CSINVXr XZR, XZR, (i32 imm:$cc))>;
// The inverse of the condition code from the alias instruction is what is used
// in the aliased instruction. The parser all ready inverts the condition code
// for these aliases.
def : InstAlias<"cset $dst, $cc",
(CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
def : InstAlias<"cset $dst, $cc",
(CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
def : InstAlias<"csetm $dst, $cc",
(CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
def : InstAlias<"csetm $dst, $cc",
(CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
def : InstAlias<"cinc $dst, $src, $cc",
(CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
def : InstAlias<"cinc $dst, $src, $cc",
(CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
def : InstAlias<"cinv $dst, $src, $cc",
(CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
def : InstAlias<"cinv $dst, $src, $cc",
(CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
def : InstAlias<"cneg $dst, $src, $cc",
(CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
def : InstAlias<"cneg $dst, $src, $cc",
(CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
//===----------------------------------------------------------------------===//
// PC-relative instructions.
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def ADR : ADRI<0, "adr", adrlabel, []>;
} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
[(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
} // isReMaterializable = 1
// page address of a constant pool entry, block address
def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
//===----------------------------------------------------------------------===//
// Unconditional branch (register) instructions.
//===----------------------------------------------------------------------===//
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : BranchReg<0b0010, "ret", []>;
def DRPS : SpecialReturn<0b0101, "drps">;
def ERET : SpecialReturn<0b0100, "eret">;
} // isReturn = 1, isTerminator = 1, isBarrier = 1
// Default to the LR register.
def : InstAlias<"ret", (RET LR)>;
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
} // isCall
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
// Create a separate pseudo-instruction for codegen to use so that we don't
// flag lr as used in every function. It'll be restored before the RET by the
// epilogue if it's legitimately used.
def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> {
let isTerminator = 1;
let isBarrier = 1;
let isReturn = 1;
}
// This is a directive-like pseudo-instruction. The purpose is to insert an
// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
// (which in the usual case is a BLR).
let hasSideEffects = 1 in
def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
let AsmString = ".tlsdesccall $sym";
}
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
isCodeGenOnly = 1 in
def TLSDESC_CALLSEQ
: Pseudo<(outs), (ins i64imm:$sym),
[(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
(TLSDESC_CALLSEQ texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
def Bcc : BranchCond;
//===----------------------------------------------------------------------===//
// Compare-and-branch instructions.
//===----------------------------------------------------------------------===//
defm CBZ : CmpBranch<0, "cbz", AArch64cbz>;
defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
//===----------------------------------------------------------------------===//
// Test-bit-and-branch instructions.
//===----------------------------------------------------------------------===//
defm TBZ : TestBranch<0, "tbz", AArch64tbz>;
defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
//===----------------------------------------------------------------------===//
// Unconditional branch (immediate) instructions.
//===----------------------------------------------------------------------===//
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
def B : BranchImm<0, "b", [(br bb:$addr)]>;
} // isBranch, isTerminator, isBarrier
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
} // isCall
def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
//===----------------------------------------------------------------------===//
// Exception generation instructions.
//===----------------------------------------------------------------------===//
def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
def SMC : ExceptionGeneration<0b000, 0b11, "smc">;
def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
// DCPSn defaults to an immediate operand of zero if unspecified.
def : InstAlias<"dcps1", (DCPS1 0)>;
def : InstAlias<"dcps2", (DCPS2 0)>;
def : InstAlias<"dcps3", (DCPS3 0)>;
//===----------------------------------------------------------------------===//
// Load instructions.
//===----------------------------------------------------------------------===//
// Pair (indexed, offset)
defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">;
defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">;
defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">;
defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">;
defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">;
defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">;
// Pair (pre-indexed)
def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">;
def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">;
def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">;
def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">;
def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">;
def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
// Pair (post-indexed)
def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">;
def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">;
def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">;
def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">;
def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">;
def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
// Pair (no allocate)
defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">;
defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">;
defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">;
defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">;
defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">;
//---
// (register offset)
//---
// Integer
defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
// Floating-point
defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>;
defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>;
defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>;
defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>;
defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>;
// Load sign-extended half-word
defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
// Load sign-extended byte
defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
// Load sign-extended word
defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
// Pre-fetch.
defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
// FIXME: We could do the same for bitconvert to floating point vectors.
multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
ValueType ScalTy, ValueType VecTy,
Instruction LOADW, Instruction LOADX,
SubRegIndex sub> {
def : Pat<(VecTy (scalar_to_vector (ScalTy
(loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
sub)>;
def : Pat<(VecTy (scalar_to_vector (ScalTy
(loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
sub)>;
}
let AddedComplexity = 10 in {
defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>;
defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))))),
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
}
// Match all load 64 bits width whose type is compatible with FPR64
multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
Instruction LOADW, Instruction LOADX> {
def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
let Predicates = [IsLE] in {
// We must do vector loads with LD1 in big-endian.
defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
}
defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>;
// Match all load 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
// We must do vector loads with LD1 in big-endian.
defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
}
} // AddedComplexity = 10
// zextload -> i64
multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
Instruction INSTW, Instruction INSTX> {
def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
(SUBREG_TO_REG (i64 0),
(INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
sub_32)>;
def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
(SUBREG_TO_REG (i64 0),
(INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
sub_32)>;
}
let AddedComplexity = 10 in {
defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>;
defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>;
// zextloadi1 -> zextloadi8
defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
// extload -> zextload
defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
// extloadi1 -> zextloadi8
defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>;
}
// zextload -> i64
multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
Instruction INSTW, Instruction INSTX> {
def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
(INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
(INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
// extload -> zextload
defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
// zextloadi1 -> zextloadi8
defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
}
//---
// (unsigned immediate)
//---
defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr",
[(set GPR64:$Rt,
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr",
[(set GPR32:$Rt,
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr",
[(set FPR8:$Rt,
(load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr",
[(set (f16 FPR16:$Rt),
(load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr",
[(set (f32 FPR32:$Rt),
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr",
[(set (f64 FPR64:$Rt),
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr",
[(set (f128 FPR128:$Rt),
(load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
// FIXME: We could do the same for bitconvert to floating point vectors.
def : Pat <(v8i8 (scalar_to_vector (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
def : Pat <(v16i8 (scalar_to_vector (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
def : Pat <(v4i16 (scalar_to_vector (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
def : Pat <(v8i16 (scalar_to_vector (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
def : Pat <(v2i32 (scalar_to_vector (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
def : Pat <(v4i32 (scalar_to_vector (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
def : Pat <(v1i64 (scalar_to_vector (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat <(v2i64 (scalar_to_vector (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
// Match all load 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
// We must use LD1 to perform vector loads in big-endian.
def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
// Match all load 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
// We must use LD1 to perform vector loads in big-endian.
def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
[(set GPR32:$Rt,
(zextloadi16 (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)))]>;
defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
[(set GPR32:$Rt,
(zextloadi8 (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)))]>;
// zextload -> i64
def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
// zextloadi1 -> zextloadi8
def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
// extload -> zextload
def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
// load sign-extended half-word
defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
[(set GPR32:$Rt,
(sextloadi16 (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)))]>;
defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
[(set GPR64:$Rt,
(sextloadi16 (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)))]>;
// load sign-extended byte
defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
[(set GPR32:$Rt,
(sextloadi8 (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)))]>;
defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
[(set GPR64:$Rt,
(sextloadi8 (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)))]>;
// load sign-extended word
defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
[(set GPR64:$Rt,
(sextloadi32 (am_indexed32 GPR64sp:$Rn,
uimm12s4:$offset)))]>;
// load zero-extended word
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch imm:$Rt,
(am_indexed64 GPR64sp:$Rn,
uimm12s8:$offset))]>;
def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
//---
// (literal)
def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">;
def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">;
def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">;
def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">;
def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">;
// load sign-extended word
def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">;
// prefetch
def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
//---
// (unscaled immediate)
defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur",
[(set GPR64:$Rt,
(load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur",
[(set GPR32:$Rt,
(load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur",
[(set FPR8:$Rt,
(load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur",
[(set FPR16:$Rt,
(load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur",
[(set (f32 FPR32:$Rt),
(load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur",
[(set (f64 FPR64:$Rt),
(load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur",
[(set (f128 FPR128:$Rt),
(load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURHH
: LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
[(set GPR32:$Rt,
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURBB
: LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
[(set GPR32:$Rt,
(zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
// Match all load 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
// Match all load 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
}
// anyext -> zext
def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
// unscaled zext
def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
//---
// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
// Define new assembler match classes as we want to only match these when
// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
// associate a DiagnosticType either, as we want the diagnostic for the
// canonical form (the scaled operand) to take precedence.
class SImm9OffsetOperand<int Width> : AsmOperandClass {
let Name = "SImm9OffsetFB" # Width;
let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
let RenderMethod = "addImmOperands";
}
def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
def simm9_offset_fb8 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB8Operand;
}
def simm9_offset_fb16 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB16Operand;
}
def simm9_offset_fb32 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB32Operand;
}
def simm9_offset_fb64 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB64Operand;
}
def simm9_offset_fb128 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB128Operand;
}
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
// zextload -> i64
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
// load sign-extended half-word
defm LDURSHW
: LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
[(set GPR32:$Rt,
(sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURSHX
: LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
[(set GPR64:$Rt,
(sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
// load sign-extended byte
defm LDURSBW
: LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
[(set GPR32:$Rt,
(sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURSBX
: LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
[(set GPR64:$Rt,
(sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
// load sign-extended word
defm LDURSW
: LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
[(set GPR64:$Rt,
(sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
(LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
(LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
(LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
(LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
(LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
(LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
(LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch imm:$Rt,
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
//---
// (unscaled immediate, unprivileged)
defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
// load sign-extended half-word
defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
// load sign-extended byte
defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
// load sign-extended word
defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
//---
// (immediate pre-indexed)
def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">;
def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">;
def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">;
def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">;
def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">;
def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">;
def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">;
// load sign-extended half-word
def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
// load sign-extended byte
def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
// load zero-extended byte
def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">;
def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">;
// load sign-extended word
def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
//---
// (immediate post-indexed)
def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">;
def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">;
def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">;
def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">;
def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">;
def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">;
def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">;
// load sign-extended half-word
def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
// load sign-extended byte
def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
// load zero-extended byte
def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">;
def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">;
// load sign-extended word
def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
//===----------------------------------------------------------------------===//
// Store instructions.
//===----------------------------------------------------------------------===//
// Pair (indexed, offset)
// FIXME: Use dedicated range-checked addressing mode operand here.
defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">;
defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">;
defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">;
defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">;
defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">;
// Pair (pre-indexed)
def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">;
def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">;
def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">;
def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">;
def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">;
// Pair (pre-indexed)
def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">;
def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">;
def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">;
def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">;
def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">;
// Pair (no allocate)
defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">;
defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">;
defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">;
defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">;
defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">;
//---
// (Register offset)
// Integer
defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>;
defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
// Floating-point
defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>;
defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>;
multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
Instruction STRW, Instruction STRX> {
def : Pat<(storeop GPR64:$Rt,
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
(STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(storeop GPR64:$Rt,
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
(STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
// truncstore i64
defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>;
defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>;
}
multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
Instruction STRW, Instruction STRX> {
def : Pat<(store (VecTy FPR:$Rt),
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
(STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(store (VecTy FPR:$Rt),
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
(STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
// Match all store 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
}
defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
// Match all store 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
}
} // AddedComplexity = 10
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
ValueType VecTy, ValueType STy,
SubRegIndex SubRegIdx,
Instruction STRW, Instruction STRX> {
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
(STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
(STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 19 in {
defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>;
defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>;
defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>;
defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>;
defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>;
defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>;
}
//---
// (unsigned immediate)
defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
[(store GPR64:$Rt,
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str",
[(store GPR32:$Rt,
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str",
[(store FPR8:$Rt,
(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str",
[(store (f16 FPR16:$Rt),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str",
[(store (f32 FPR32:$Rt),
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str",
[(store (f64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>;
defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh",
[(truncstorei16 GPR32:$Rt,
(am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset))]>;
defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb",
[(truncstorei8 GPR32:$Rt,
(am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset))]>;
// Match all store 64 bits width whose type is compatible with FPR64
let AddedComplexity = 10 in {
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
def : Pat<(store (v2f32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v8i8 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v4i16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v2i32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v4f16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v1i64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
// Match all store 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
def : Pat<(store (v4f32 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v2f64 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v16i8 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v8i16 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn