[patches] Cherry pick CLS for: Fix 2 SVE issues
8901f8beea3 AArch64 SVE restore SVE registers after expression
dc9f65be455 [AArch64][SVE] Fix handling of stack protection with SVE
Test: N/A
Change-Id: I3bd5141166dfba61c0840ef0ec54cce790f74834
diff --git a/patches/PATCHES.json b/patches/PATCHES.json
index 7460cb2..244412a 100644
--- a/patches/PATCHES.json
+++ b/patches/PATCHES.json
@@ -38,6 +38,18 @@
"start_version": 437112
},
{
+ "end_version": 437539,
+ "metadata": {
+ "info": [],
+ "title": "[UPSTREAM] AArch64 SVE restore SVE registers after expression"
+ },
+ "platforms": [
+ "android"
+ ],
+ "rel_patch_path": "cherry/8901f8beea3a70f92be8c0b80313260502f03727.patch",
+ "start_version": 437112
+ },
+ {
"end_version": 438499,
"metadata": {
"info": [
@@ -184,6 +196,18 @@
"start_version": 437112
},
{
+ "end_version": 446311,
+ "metadata": {
+ "info": [],
+ "title": "[UPSTREAM] [AArch64][SVE] Fix handling of stack protection with SVE"
+ },
+ "platforms": [
+ "android"
+ ],
+ "rel_patch_path": "cherry/dc9f65be4555406262ff693c8bac5f1f0b960a97.patch",
+ "start_version": 437112
+ },
+ {
"end_version": 446420,
"metadata": {
"info": [],
diff --git a/patches/cherry/8901f8beea3a70f92be8c0b80313260502f03727.patch b/patches/cherry/8901f8beea3a70f92be8c0b80313260502f03727.patch
new file mode 100644
index 0000000..4391810
--- /dev/null
+++ b/patches/cherry/8901f8beea3a70f92be8c0b80313260502f03727.patch
@@ -0,0 +1,410 @@
+From 8901f8beea3a70f92be8c0b80313260502f03727 Mon Sep 17 00:00:00 2001
+From: Muhammad Omair Javaid <omair.javaid@linaro.org>
+Date: Thu, 9 Sep 2021 16:04:43 +0500
+Subject: [PATCH] AArch64 SVE restore SVE registers after expression
+
+This patch fixes register save/restore on expression call to also include SVE registers.
+
+This will fix expression calls like:
+
+re re p1
+
+<Register Value P1 before expression>
+
+p <var-name or function call>
+
+re re p1
+
+<Register Value P1 after expression>
+
+In above example register P1 should remain the same before and after the expression evaluation.
+
+Reviewed By: DavidSpickett
+
+Differential Revision: https://reviews.llvm.org/D108739
+---
+ .../NativeRegisterContextLinux_arm64.cpp | 142 ++++++++++++++----
+ .../Linux/NativeRegisterContextLinux_arm64.h | 2 +-
+ .../Process/Utility/RegisterInfoPOSIX_arm64.h | 1 +
+ .../TestSVERegisters.py | 91 ++++++-----
+ .../rw_access_static_config/main.c | 19 ++-
+ 5 files changed, 189 insertions(+), 66 deletions(-)
+
+diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
+index ebde0a499acf..f28bddcb9a99 100644
+--- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
++++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
+@@ -46,8 +46,6 @@
+ #define HWCAP_PACA (1 << 30)
+ #define HWCAP2_MTE (1 << 18)
+
+-#define REG_CONTEXT_SIZE (GetGPRSize() + GetFPRSize())
+-
+ using namespace lldb;
+ using namespace lldb_private;
+ using namespace lldb_private::process_linux;
+@@ -452,30 +450,73 @@ Status NativeRegisterContextLinux_arm64::WriteRegister(
+
+ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues(
+ lldb::DataBufferSP &data_sp) {
+- Status error;
++ // AArch64 register data must contain GPRs, either FPR or SVE registers
++ // and optional MTE register. Pointer Authentication (PAC) registers are
++ // read-only and will be skiped.
+
+- data_sp.reset(new DataBufferHeap(REG_CONTEXT_SIZE, 0));
++ // In order to create register data checkpoint we first read all register
++ // values if not done already and calculate total size of register set data.
++ // We store all register values in data_sp by copying full PTrace data that
++ // corresponds to register sets enabled by current register context.
+
++ Status error;
++ uint32_t reg_data_byte_size = GetGPRBufferSize();
+ error = ReadGPR();
+ if (error.Fail())
+ return error;
+
+- error = ReadFPR();
++ // If SVE is enabled we need not copy FPR separately.
++ if (GetRegisterInfo().IsSVEEnabled()) {
++ reg_data_byte_size += GetSVEBufferSize();
++ error = ReadAllSVE();
++ } else {
++ reg_data_byte_size += GetFPRSize();
++ error = ReadFPR();
++ }
+ if (error.Fail())
+ return error;
+
++ if (GetRegisterInfo().IsMTEEnabled()) {
++ reg_data_byte_size += GetMTEControlSize();
++ error = ReadMTEControl();
++ if (error.Fail())
++ return error;
++ }
++
++ data_sp.reset(new DataBufferHeap(reg_data_byte_size, 0));
+ uint8_t *dst = data_sp->GetBytes();
+- ::memcpy(dst, GetGPRBuffer(), GetGPRSize());
+- dst += GetGPRSize();
+- ::memcpy(dst, GetFPRBuffer(), GetFPRSize());
++
++ ::memcpy(dst, GetGPRBuffer(), GetGPRBufferSize());
++ dst += GetGPRBufferSize();
++
++ if (GetRegisterInfo().IsSVEEnabled()) {
++ ::memcpy(dst, GetSVEBuffer(), GetSVEBufferSize());
++ dst += GetSVEBufferSize();
++ } else {
++ ::memcpy(dst, GetFPRBuffer(), GetFPRSize());
++ dst += GetFPRSize();
++ }
++
++ if (GetRegisterInfo().IsMTEEnabled())
++ ::memcpy(dst, GetMTEControl(), GetMTEControlSize());
+
+ return error;
+ }
+
+ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues(
+ const lldb::DataBufferSP &data_sp) {
+- Status error;
++ // AArch64 register data must contain GPRs, either FPR or SVE registers
++ // and optional MTE register. Pointer Authentication (PAC) registers are
++ // read-only and will be skiped.
++
++ // We store all register values in data_sp by copying full PTrace data that
++ // corresponds to register sets enabled by current register context. In order
++ // to restore from register data checkpoint we will first restore GPRs, based
++ // on size of remaining register data either SVE or FPRs should be restored
++ // next. SVE is not enabled if we have register data size less than or equal
++ // to size of GPR + FPR + MTE.
+
++ Status error;
+ if (!data_sp) {
+ error.SetErrorStringWithFormat(
+ "NativeRegisterContextLinux_arm64::%s invalid data_sp provided",
+@@ -483,14 +524,6 @@ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues(
+ return error;
+ }
+
+- if (data_sp->GetByteSize() != REG_CONTEXT_SIZE) {
+- error.SetErrorStringWithFormat(
+- "NativeRegisterContextLinux_arm64::%s data_sp contained mismatched "
+- "data size, expected %" PRIu64 ", actual %" PRIu64,
+- __FUNCTION__, REG_CONTEXT_SIZE, data_sp->GetByteSize());
+- return error;
+- }
+-
+ uint8_t *src = data_sp->GetBytes();
+ if (src == nullptr) {
+ error.SetErrorStringWithFormat("NativeRegisterContextLinux_arm64::%s "
+@@ -499,19 +532,79 @@ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues(
+ __FUNCTION__);
+ return error;
+ }
+- ::memcpy(GetGPRBuffer(), src, GetRegisterInfoInterface().GetGPRSize());
++
++ uint64_t reg_data_min_size = GetGPRBufferSize() + GetFPRSize();
++ if (data_sp->GetByteSize() < reg_data_min_size) {
++ error.SetErrorStringWithFormat(
++ "NativeRegisterContextLinux_arm64::%s data_sp contained insufficient "
++ "register data bytes, expected at least %" PRIu64 ", actual %" PRIu64,
++ __FUNCTION__, reg_data_min_size, data_sp->GetByteSize());
++ return error;
++ }
++
++ // Register data starts with GPRs
++ ::memcpy(GetGPRBuffer(), src, GetGPRBufferSize());
++ m_gpr_is_valid = true;
+
+ error = WriteGPR();
+ if (error.Fail())
+ return error;
+
+- src += GetRegisterInfoInterface().GetGPRSize();
+- ::memcpy(GetFPRBuffer(), src, GetFPRSize());
++ src += GetGPRBufferSize();
++
++ // Verify if register data may contain SVE register values.
++ bool contains_sve_reg_data =
++ (data_sp->GetByteSize() > (reg_data_min_size + GetSVEHeaderSize()));
++
++ if (contains_sve_reg_data) {
++ // We have SVE register data first write SVE header.
++ ::memcpy(GetSVEHeader(), src, GetSVEHeaderSize());
++ if (!sve_vl_valid(m_sve_header.vl)) {
++ m_sve_header_is_valid = false;
++ error.SetErrorStringWithFormat("NativeRegisterContextLinux_arm64::%s "
++ "Invalid SVE header in data_sp",
++ __FUNCTION__);
++ return error;
++ }
++ m_sve_header_is_valid = true;
++ error = WriteSVEHeader();
++ if (error.Fail())
++ return error;
++
++ // SVE header has been written configure SVE vector length if needed.
++ ConfigureRegisterContext();
++
++ // Make sure data_sp contains sufficient data to write all SVE registers.
++ reg_data_min_size = GetGPRBufferSize() + GetSVEBufferSize();
++ if (data_sp->GetByteSize() < reg_data_min_size) {
++ error.SetErrorStringWithFormat(
++ "NativeRegisterContextLinux_arm64::%s data_sp contained insufficient "
++ "register data bytes, expected %" PRIu64 ", actual %" PRIu64,
++ __FUNCTION__, reg_data_min_size, data_sp->GetByteSize());
++ return error;
++ }
++
++ ::memcpy(GetSVEBuffer(), src, GetSVEBufferSize());
++ m_sve_buffer_is_valid = true;
++ error = WriteAllSVE();
++ src += GetSVEBufferSize();
++ } else {
++ ::memcpy(GetFPRBuffer(), src, GetFPRSize());
++ m_fpu_is_valid = true;
++ error = WriteFPR();
++ src += GetFPRSize();
++ }
+
+- error = WriteFPR();
+ if (error.Fail())
+ return error;
+
++ if (GetRegisterInfo().IsMTEEnabled() &&
++ data_sp->GetByteSize() > reg_data_min_size) {
++ ::memcpy(GetMTEControl(), src, GetMTEControlSize());
++ m_mte_ctrl_is_valid = true;
++ error = WriteMTEControl();
++ }
++
+ return error;
+ }
+
+@@ -864,13 +957,6 @@ uint32_t NativeRegisterContextLinux_arm64::CalculateSVEOffset(
+ return sve_reg_offset;
+ }
+
+-void *NativeRegisterContextLinux_arm64::GetSVEBuffer() {
+- if (m_sve_state == SVEState::FPSIMD)
+- return m_sve_ptrace_payload.data() + sve::ptrace_fpsimd_offset;
+-
+- return m_sve_ptrace_payload.data();
+-}
+-
+ std::vector<uint32_t> NativeRegisterContextLinux_arm64::GetExpeditedRegisters(
+ ExpeditedRegs expType) const {
+ std::vector<uint32_t> expedited_reg_nums =
+diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
+index 4dfc78b5b282..2f8a4a601181 100644
+--- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
++++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
+@@ -139,7 +139,7 @@ private:
+
+ void *GetMTEControl() { return &m_mte_ctrl_reg; }
+
+- void *GetSVEBuffer();
++ void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); };
+
+ size_t GetSVEHeaderSize() { return sizeof(m_sve_header); }
+
+diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+index ba873ba4436b..96cab49d5ac8 100644
+--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
++++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+@@ -110,6 +110,7 @@ public:
+
+ bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
+ bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
++ bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
+
+ bool IsSVEReg(unsigned reg) const;
+ bool IsSVEZReg(unsigned reg) const;
+diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
+index b243a6692d85..dd2277b92273 100644
+--- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
++++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
+@@ -17,6 +17,51 @@ class RegisterCommandsTestCase(TestBase):
+ self.assertEqual(reg_value.GetByteSize(), expected,
+ 'Verify "%s" == %i' % (name, expected))
+
++ def check_sve_regs_read(self, z_reg_size):
++ p_reg_size = int(z_reg_size / 8)
++
++ for i in range(32):
++ z_regs_value = '{' + \
++ ' '.join('0x{:02x}'.format(i + 1)
++ for _ in range(z_reg_size)) + '}'
++ self.expect("register read " + 'z%i' %
++ (i), substrs=[z_regs_value])
++
++ p_value_bytes = ['0xff', '0x55', '0x11', '0x01', '0x00']
++ for i in range(16):
++ p_regs_value = '{' + \
++ ' '.join(p_value_bytes[i % 5] for _ in range(p_reg_size)) + '}'
++ self.expect("register read " + 'p%i' % (i), substrs=[p_regs_value])
++
++ self.expect("register read ffr", substrs=[p_regs_value])
++
++ def check_sve_regs_read_after_write(self, z_reg_size):
++ p_reg_size = int(z_reg_size / 8)
++
++ z_regs_value = '{' + \
++ ' '.join(('0x9d' for _ in range(z_reg_size))) + '}'
++
++ p_regs_value = '{' + \
++ ' '.join(('0xee' for _ in range(p_reg_size))) + '}'
++
++ for i in range(32):
++ self.runCmd('register write ' + 'z%i' %
++ (i) + " '" + z_regs_value + "'")
++
++ for i in range(32):
++ self.expect("register read " + 'z%i' % (i), substrs=[z_regs_value])
++
++ for i in range(16):
++ self.runCmd('register write ' + 'p%i' %
++ (i) + " '" + p_regs_value + "'")
++
++ for i in range(16):
++ self.expect("register read " + 'p%i' % (i), substrs=[p_regs_value])
++
++ self.runCmd('register write ' + 'ffr ' + "'" + p_regs_value + "'")
++
++ self.expect("register read " + 'ffr', substrs=[p_regs_value])
++
+ mydir = TestBase.compute_mydir(__file__)
+
+ @no_debug_info_test
+@@ -117,43 +162,17 @@ class RegisterCommandsTestCase(TestBase):
+
+ z_reg_size = vg_reg_value * 8
+
+- p_reg_size = int(z_reg_size / 8)
+-
+- for i in range(32):
+- z_regs_value = '{' + \
+- ' '.join('0x{:02x}'.format(i + 1)
+- for _ in range(z_reg_size)) + '}'
+- self.expect("register read " + 'z%i' %
+- (i), substrs=[z_regs_value])
++ self.check_sve_regs_read(z_reg_size)
+
+- p_value_bytes = ['0xff', '0x55', '0x11', '0x01', '0x00']
+- for i in range(16):
+- p_regs_value = '{' + \
+- ' '.join(p_value_bytes[i % 5] for _ in range(p_reg_size)) + '}'
+- self.expect("register read " + 'p%i' % (i), substrs=[p_regs_value])
++ # Evaluate simple expression and print function expr_eval_func address.
++ self.expect("p expr_eval_func", substrs=["= 0x"])
+
+- self.expect("register read ffr", substrs=[p_regs_value])
++ # Evaluate expression call function expr_eval_func.
++ self.expect_expr("expr_eval_func()",
++ result_type="int", result_value="1")
+
+- z_regs_value = '{' + \
+- ' '.join(('0x9d' for _ in range(z_reg_size))) + '}'
++ # We called a jitted function above which must not have changed SVE
++ # vector length or register values.
++ self.check_sve_regs_read(z_reg_size)
+
+- p_regs_value = '{' + \
+- ' '.join(('0xee' for _ in range(p_reg_size))) + '}'
+-
+- for i in range(32):
+- self.runCmd('register write ' + 'z%i' %
+- (i) + " '" + z_regs_value + "'")
+-
+- for i in range(32):
+- self.expect("register read " + 'z%i' % (i), substrs=[z_regs_value])
+-
+- for i in range(16):
+- self.runCmd('register write ' + 'p%i' %
+- (i) + " '" + p_regs_value + "'")
+-
+- for i in range(16):
+- self.expect("register read " + 'p%i' % (i), substrs=[p_regs_value])
+-
+- self.runCmd('register write ' + 'ffr ' + "'" + p_regs_value + "'")
+-
+- self.expect("register read " + 'ffr', substrs=[p_regs_value])
++ self.check_sve_regs_read_after_write(z_reg_size)
+diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
+index 0c2573864eeb..79ff587ab732 100644
+--- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
++++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
+@@ -1,4 +1,6 @@
+-int main() {
++#include <sys/prctl.h>
++
++void write_sve_regs() {
+ asm volatile("setffr\n\t");
+ asm volatile("ptrue p0.b\n\t");
+ asm volatile("ptrue p1.h\n\t");
+@@ -49,5 +51,20 @@ int main() {
+ asm volatile("cpy z29.b, p5/z, #30\n\t");
+ asm volatile("cpy z30.b, p10/z, #31\n\t");
+ asm volatile("cpy z31.b, p15/z, #32\n\t");
++}
++
++// This function will be called using jitted expression call. We change vector
++// length and write SVE registers. Our program context should restore to
++// orignal vector length and register values after expression evaluation.
++int expr_eval_func() {
++ prctl(PR_SVE_SET_VL, 8 * 2);
++ write_sve_regs();
++ prctl(PR_SVE_SET_VL, 8 * 4);
++ write_sve_regs();
++ return 1;
++}
++
++int main() {
++ write_sve_regs();
+ return 0; // Set a break point here.
+ }
+--
+2.34.1.703.g22d0c6ccf7-goog
+
diff --git a/patches/cherry/dc9f65be4555406262ff693c8bac5f1f0b960a97.patch b/patches/cherry/dc9f65be4555406262ff693c8bac5f1f0b960a97.patch
new file mode 100644
index 0000000..2d452d9
--- /dev/null
+++ b/patches/cherry/dc9f65be4555406262ff693c8bac5f1f0b960a97.patch
@@ -0,0 +1,594 @@
+From dc9f65be4555406262ff693c8bac5f1f0b960a97 Mon Sep 17 00:00:00 2001
+From: John Brawn <john.brawn@arm.com>
+Date: Tue, 14 Dec 2021 11:11:41 +0000
+Subject: [PATCH] [AArch64][SVE] Fix handling of stack protection with SVE
+
+Fix a couple of things that were causing stack protection to not work
+correctly in functions that have scalable vectors on the stack:
+ * Use TypeSize when determining if accesses to a variable are
+ considered out-of-bounds so that the behaviour is correct for
+ scalable vectors.
+ * When stack protection is enabled move the stack protector location
+ to the top of the SVE locals, so that any overflow in them (or the
+ other locals which are below that) will be detected.
+
+Fixes: https://github.com/llvm/llvm-project/issues/51137
+
+Differential Revision: https://reviews.llvm.org/D111631
+---
+ llvm/include/llvm/CodeGen/StackProtector.h | 2 +-
+ llvm/lib/CodeGen/LocalStackSlotAllocation.cpp | 6 +-
+ llvm/lib/CodeGen/PrologEpilogInserter.cpp | 14 +-
+ llvm/lib/CodeGen/StackProtector.cpp | 21 +-
+ .../Target/AArch64/AArch64FrameLowering.cpp | 11 +
+ .../Target/AArch64/AArch64ISelLowering.cpp | 20 +-
+ .../AArch64/stack-guard-reassign-sve.mir | 47 +++
+ llvm/test/CodeGen/AArch64/stack-guard-sve.ll | 338 ++++++++++++++++++
+ 8 files changed, 447 insertions(+), 12 deletions(-)
+ create mode 100644 llvm/test/CodeGen/AArch64/stack-guard-reassign-sve.mir
+ create mode 100644 llvm/test/CodeGen/AArch64/stack-guard-sve.ll
+
+diff --git a/llvm/include/llvm/CodeGen/StackProtector.h b/llvm/include/llvm/CodeGen/StackProtector.h
+index f6513e8d4ea0..57456b3f6c16 100644
+--- a/llvm/include/llvm/CodeGen/StackProtector.h
++++ b/llvm/include/llvm/CodeGen/StackProtector.h
+@@ -95,7 +95,7 @@ private:
+ bool InStruct = false) const;
+
+ /// Check whether a stack allocation has its address taken.
+- bool HasAddressTaken(const Instruction *AI, uint64_t AllocSize);
++ bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize);
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+index ee2387d1e8e6..37fd3e4853ac 100644
+--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
++++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+@@ -210,7 +210,11 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+- AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
++ // Only place the stack protector in the local stack area if the target
++ // allows it.
++ if (TFI.isStackIdSafeForLocalArea(MFI.getStackID(StackProtectorFI)))
++ AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown,
++ MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+index 29a88480fd9f..8d8a6126dad0 100644
+--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
++++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+@@ -953,12 +953,22 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
+ // LocalStackSlotPass didn't already allocate a slot for it.
+ // If we are told to use the LocalStackAllocationBlock, the stack protector
+ // is expected to be already pre-allocated.
+- if (!MFI.getUseLocalStackAllocationBlock())
++ if (MFI.getStackID(StackProtectorFI) != TargetStackID::Default) {
++ // If the stack protector isn't on the default stack then it's up to the
++ // target to set the stack offset.
++ assert(MFI.getObjectOffset(StackProtectorFI) != 0 &&
++ "Offset of stack protector on non-default stack expected to be "
++ "already set.");
++ assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) &&
++ "Stack protector on non-default stack expected to not be "
++ "pre-allocated by LocalStackSlotPass.");
++ } else if (!MFI.getUseLocalStackAllocationBlock()) {
+ AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
+ Skew);
+- else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
++ } else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) {
+ llvm_unreachable(
+ "Stack protector not pre-allocated by LocalStackSlotPass.");
++ }
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
+index 7445f77c955d..6765fd274686 100644
+--- a/llvm/lib/CodeGen/StackProtector.cpp
++++ b/llvm/lib/CodeGen/StackProtector.cpp
+@@ -162,7 +162,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
+ }
+
+ bool StackProtector::HasAddressTaken(const Instruction *AI,
+- uint64_t AllocSize) {
++ TypeSize AllocSize) {
+ const DataLayout &DL = M->getDataLayout();
+ for (const User *U : AI->users()) {
+ const auto *I = cast<Instruction>(U);
+@@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
+ // the bounds of the allocated object.
+ Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
+ if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
+- MemLoc->Size.getValue() > AllocSize)
++ !TypeSize::isKnownGE(AllocSize,
++ TypeSize::getFixed(MemLoc->Size.getValue())))
+ return true;
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+@@ -203,13 +204,19 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
+ // would use it could also be out-of-bounds meaning stack protection is
+ // required.
+ const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+- unsigned TypeSize = DL.getIndexTypeSizeInBits(I->getType());
+- APInt Offset(TypeSize, 0);
+- APInt MaxOffset(TypeSize, AllocSize);
+- if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.ugt(MaxOffset))
++ unsigned IndexSize = DL.getIndexTypeSizeInBits(I->getType());
++ APInt Offset(IndexSize, 0);
++ if (!GEP->accumulateConstantOffset(DL, Offset))
++ return true;
++ TypeSize OffsetSize = TypeSize::Fixed(Offset.getLimitedValue());
++ if (!TypeSize::isKnownGT(AllocSize, OffsetSize))
+ return true;
+ // Adjust AllocSize to be the space remaining after this offset.
+- if (HasAddressTaken(I, AllocSize - Offset.getLimitedValue()))
++ // We can't subtract a fixed size from a scalable one, so in that case
++ // assume the scalable value is of minimum size.
++ TypeSize NewAllocSize =
++ TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
++ if (HasAddressTaken(I, NewAllocSize))
+ return true;
+ break;
+ }
+diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+index b630f4f0df5f..638e45b30d99 100644
+--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
++++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+@@ -3041,10 +3041,21 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
+
+ // Create a buffer of SVE objects to allocate and sort it.
+ SmallVector<int, 8> ObjectsToAllocate;
++ // If we have a stack protector, and we've previously decided that we have SVE
++ // objects on the stack and thus need it to go in the SVE stack area, then it
++ // needs to go first.
++ int StackProtectorFI = -1;
++ if (MFI.hasStackProtectorIndex()) {
++ StackProtectorFI = MFI.getStackProtectorIndex();
++ if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
++ ObjectsToAllocate.push_back(StackProtectorFI);
++ }
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
+ unsigned StackID = MFI.getStackID(I);
+ if (StackID != TargetStackID::ScalableVector)
+ continue;
++ if (I == StackProtectorFI)
++ continue;
+ if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+ continue;
+ if (MFI.isDeadObjectIndex(I))
+diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+index 792e268137b1..e313c72ec7b2 100644
+--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
++++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+@@ -18584,7 +18584,25 @@ AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
+ }
+
+ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
+- MF.getFrameInfo().computeMaxCallFrameSize(MF);
++ MachineFrameInfo &MFI = MF.getFrameInfo();
++ // If we have any vulnerable SVE stack objects then the stack protector
++ // needs to be placed at the top of the SVE stack area, as the SVE locals
++ // are placed above the other locals, so we allocate it as if it were a
++ // scalable vector.
++ // FIXME: It may be worthwhile having a specific interface for this rather
++ // than doing it here in finalizeLowering.
++ if (MFI.hasStackProtectorIndex()) {
++ for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
++ if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
++ MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
++ MFI.setStackID(MFI.getStackProtectorIndex(),
++ TargetStackID::ScalableVector);
++ MFI.setObjectAlignment(MFI.getStackProtectorIndex(), Align(16));
++ break;
++ }
++ }
++ }
++ MFI.computeMaxCallFrameSize(MF);
+ TargetLoweringBase::finalizeLowering(MF);
+ }
+
+diff --git a/llvm/test/CodeGen/AArch64/stack-guard-reassign-sve.mir b/llvm/test/CodeGen/AArch64/stack-guard-reassign-sve.mir
+new file mode 100644
+index 000000000000..6af66df29030
+--- /dev/null
++++ b/llvm/test/CodeGen/AArch64/stack-guard-reassign-sve.mir
+@@ -0,0 +1,47 @@
++# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -start-before=localstackalloc -stop-after=prologepilog -o - %s | FileCheck %s
++
++--- |
++ @__stack_chk_guard = external global i8*
++ define i32 @main(i32, i8**) {
++ %StackGuardSlot = alloca i8*
++ unreachable
++ }
++...
++---
++name: main
++tracksRegLiveness: true
++frameInfo:
++# CHECK: stackSize: 544
++# CHECK: localFrameSize: 516
++ stackProtector: '%stack.3.StackGuardSlot'
++stack:
++# Stack objects 0 and 1 should end up in the local stack area, objects 2 and 3
++# should end up in the SVE stack area with 3 (the stack guard) on top.
++ - { id: 0, size: 512, alignment: 1, stack-id: default }
++# CHECK: - { id: 0, name: '', type: default, offset: -528, size: 512, alignment: 1,
++# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
++# CHECK-NEXT: local-offset: -512, debug-info-variable: '', debug-info-expression: '',
++# CHECK-NEXT: debug-info-location: '' }
++ - { id: 1, size: 4, alignment: 4, stack-id: default }
++# CHECK: - { id: 1, name: '', type: default, offset: -532, size: 4, alignment: 4,
++# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
++# CHECK-NEXT: local-offset: -516, debug-info-variable: '', debug-info-expression: '',
++# CHECK-NEXT: debug-info-location: '' }
++ - { id: 2, size: 16, alignment: 16, stack-id: scalable-vector }
++# CHECK: - { id: 2, name: '', type: default, offset: -32, size: 16, alignment: 16,
++# CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true,
++# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
++ - { id: 3, name: StackGuardSlot, size: 8, alignment: 16, stack-id: scalable-vector }
++# CHECK: - { id: 3, name: StackGuardSlot, type: default, offset: -16, size: 8,
++# CHECK-NEXT: alignment: 16, stack-id: scalable-vector, callee-saved-register: '',
++# CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
++# CHECK-NEXT: debug-info-location: '' }
++body: |
++ bb.0:
++ %25:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load (s64) from @__stack_chk_guard)
++ STRXui killed %25, %stack.3.StackGuardSlot, 0 :: (volatile store (s64) into %stack.3.StackGuardSlot)
++ %28:gpr64 = LDRXui %stack.3.StackGuardSlot, 0 :: (volatile load (s64) from %stack.3.StackGuardSlot)
++ %29:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load (s64) from @__stack_chk_guard)
++ RET_ReallyLR implicit undef $w0, implicit killed %28, implicit killed %29
++
++...
+diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
+new file mode 100644
+index 000000000000..32669e411e8c
+--- /dev/null
++++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
+@@ -0,0 +1,338 @@
++; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
++
++declare dso_local void @val_fn(<vscale x 4 x float>)
++declare dso_local void @ptr_fn(<vscale x 4 x float>*)
++
++; An alloca of a scalable vector shouldn't trigger stack protection.
++
++; CHECK-LABEL: call_value:
++; CHECK-NOT: mov x19, sp
++; CHECK: addvl sp, sp, #-1
++; CHECK-NOT: __stack_chk_guard
++; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-1, mul vl]
++define void @call_value() #0 {
++entry:
++ %x = alloca <vscale x 4 x float>, align 16
++ store <vscale x 4 x float> zeroinitializer, <vscale x 4 x float>* %x, align 16
++ %0 = load <vscale x 4 x float>, <vscale x 4 x float>* %x, align 16
++ call void @val_fn(<vscale x 4 x float> %0)
++ ret void
++}
++
++; CHECK-LABEL: call_value_strong:
++; CHECK-NOT: mov x19, sp
++; CHECK: addvl sp, sp, #-1
++; CHECK-NOT: __stack_chk_guard
++; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-1, mul vl]
++define void @call_value_strong() #1 {
++entry:
++ %x = alloca <vscale x 4 x float>, align 16
++ store <vscale x 4 x float> zeroinitializer, <vscale x 4 x float>* %x, align 16
++ %0 = load <vscale x 4 x float>, <vscale x 4 x float>* %x, align 16
++ call void @val_fn(<vscale x 4 x float> %0)
++ ret void
++}
++
++; Address-taking of a scalable vector should trigger stack protection only with
++; sspstrong, and the scalable vector should be be placed below the stack guard.
++
++; CHECK-LABEL: call_ptr:
++; CHECK-NOT: mov x19, sp
++; CHECK: addvl sp, sp, #-1
++; CHECK-NOT: __stack_chk_guard
++; CHECK: addvl x0, x29, #-1
++; CHECK: bl ptr_fn
++define void @call_ptr() #0 {
++entry:
++ %x = alloca <vscale x 4 x float>, align 16
++ call void @ptr_fn(<vscale x 4 x float>* %x)
++ ret void
++}
++
++; CHECK-LABEL: call_ptr_strong:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-2
++; CHECK-DAG: addvl [[ADDR:x[0-9]+]], x29, #-1
++; CHECK-DAG: ldr [[VAL:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
++; CHECK-DAG: str [[VAL]], {{\[}}[[ADDR]]]
++; CHECK-DAG: addvl x0, x29, #-2
++; CHECK: bl ptr_fn
++define void @call_ptr_strong() #1 {
++entry:
++ %x = alloca <vscale x 4 x float>, align 16
++ call void @ptr_fn(<vscale x 4 x float>* %x)
++ ret void
++}
++
++; Check that both variables are addressed in the same way
++
++; CHECK-LABEL: call_both:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-2
++; CHECK-NOT: __stack_chk_guard
++; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-1, mul vl]
++; CHECK: bl val_fn
++; CHECK: addvl x0, x29, #-2
++; CHECK: bl ptr_fn
++define void @call_both() #0 {
++entry:
++ %x = alloca <vscale x 4 x float>, align 16
++ %y = alloca <vscale x 4 x float>, align 16
++ store <vscale x 4 x float> zeroinitializer, <vscale x 4 x float>* %x, align 16
++ %0 = load <vscale x 4 x float>, <vscale x 4 x float>* %x, align 16
++ call void @val_fn(<vscale x 4 x float> %0)
++ call void @ptr_fn(<vscale x 4 x float>* %y)
++ ret void
++}
++
++; CHECK-LABEL: call_both_strong:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-3
++; CHECK-DAG: addvl [[ADDR:x[0-9]+]], x29, #-1
++; CHECK-DAG: ldr [[VAL:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
++; CHECK-DAG: str [[VAL]], {{\[}}[[ADDR]]]
++; CHECK-DAG: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-2, mul vl]
++; CHECK: bl val_fn
++; CHECK: addvl x0, x29, #-3
++; CHECK: bl ptr_fn
++define void @call_both_strong() #1 {
++entry:
++ %x = alloca <vscale x 4 x float>, align 16
++ %y = alloca <vscale x 4 x float>, align 16
++ store <vscale x 4 x float> zeroinitializer, <vscale x 4 x float>* %x, align 16
++ %0 = load <vscale x 4 x float>, <vscale x 4 x float>* %x, align 16
++ call void @val_fn(<vscale x 4 x float> %0)
++ call void @ptr_fn(<vscale x 4 x float>* %y)
++ ret void
++}
++
++; Pushed callee-saved regs should be above the stack guard
++
++; CHECK-LABEL: callee_save:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-18
++; CHECK: str {{z[0-9]+}}, [sp, #{{[0-9]+}}, mul vl]
++; CHECK-NOT: mov x29, sp
++; CHECK: addvl sp, sp, #-1
++; CHECK-NOT: __stack_chk_guard
++; CHECK: addvl [[REG:x[0-9]+]], x29, #-11
++; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, {{\[}}[[REG]], #-8, mul vl]
++define void @callee_save(<vscale x 4 x float> %x) #0 {
++entry:
++ %x.addr = alloca <vscale x 4 x float>, align 16
++ store <vscale x 4 x float> %x, <vscale x 4 x float>* %x.addr, align 16
++ call void @ptr_fn(<vscale x 4 x float>* %x.addr)
++ ret void
++}
++
++; CHECK-LABEL: callee_save_strong:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-18
++; CHECK: str {{z[0-9]+}}, [sp, #{{[0-9]+}}, mul vl]
++; CHECK: addvl sp, sp, #-2
++; CHECK-DAG: addvl [[ADDR:x[0-9]+]], x29, #-19
++; CHECK-DAG: ldr [[VAL:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
++; CHECK-DAG: str [[VAL]], {{\[}}[[ADDR]]]
++; CHECK-DAG: addvl [[ADDR2:x[0-9]+]], x29, #-12
++; CHECK-DAG: st1w { z0.s }, p0, {{\[}}[[ADDR2]], #-8, mul vl]
++define void @callee_save_strong(<vscale x 4 x float> %x) #1 {
++entry:
++ %x.addr = alloca <vscale x 4 x float>, align 16
++ store <vscale x 4 x float> %x, <vscale x 4 x float>* %x.addr, align 16
++ call void @ptr_fn(<vscale x 4 x float>* %x.addr)
++ ret void
++}
++
++; Check that local stack allocation works correctly both when we have a stack
++; guard but no vulnerable SVE objects, and when we do have such objects.
++
++; CHECK-LABEL: local_stack_alloc:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-2
++; CHECK: sub sp, sp, #16, lsl #12
++; CHECK: sub sp, sp, #16
++
++; Stack guard is placed below the SVE stack area
++; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
++; CHECK-DAG: addvl [[STACK_GUARD_POS:x[0-9]+]], x29, #-2
++; CHECK-DAG: stur [[STACK_GUARD]], {{\[}}[[STACK_GUARD_POS]], #-8]
++
++; char_arr is below the stack guard
++; CHECK-DAG: sub [[CHAR_ARR_1:x[0-9]+]], x29, #16
++; CHECK-DAG: addvl [[CHAR_ARR_2:x[0-9]+]], [[CHAR_ARR_1]], #-2
++; CHECK-DAG: strb wzr, {{\[}}[[CHAR_ARR_2]]]
++
++; large1 is accessed via a virtual base register
++; CHECK-DAG: add [[LARGE1:x[0-9]+]], sp, #8, lsl #12
++; CHECK-DAG: stp x0, x0, {{\[}}[[LARGE1]]]
++
++; large2 is at the bottom of the stack
++; CHECK-DAG: stp x0, x0, [sp]
++
++; vec1 and vec2 are in the SVE stack immediately below fp
++; CHECK-DAG: addvl x0, x29, #-1
++; CHECK-DAG: bl ptr_fn
++; CHECK-DAG: addvl x0, x29, #-2
++; CHECK-DAG: bl ptr_fn
++define void @local_stack_alloc(i64 %val) #0 {
++entry:
++ %char_arr = alloca [8 x i8], align 4
++ %gep0 = getelementptr [8 x i8], [8 x i8]* %char_arr, i64 0, i64 0
++ store i8 0, i8* %gep0, align 8
++ %large1 = alloca [4096 x i64], align 8
++ %large2 = alloca [4096 x i64], align 8
++ %vec_1 = alloca <vscale x 4 x float>, align 16
++ %vec_2 = alloca <vscale x 4 x float>, align 16
++ %gep1 = getelementptr [4096 x i64], [4096 x i64]* %large1, i64 0, i64 0
++ %gep2 = getelementptr [4096 x i64], [4096 x i64]* %large1, i64 0, i64 1
++ store i64 %val, i64* %gep1, align 8
++ store i64 %val, i64* %gep2, align 8
++ %gep3 = getelementptr [4096 x i64], [4096 x i64]* %large2, i64 0, i64 0
++ %gep4 = getelementptr [4096 x i64], [4096 x i64]* %large2, i64 0, i64 1
++ store i64 %val, i64* %gep3, align 8
++ store i64 %val, i64* %gep4, align 8
++ call void @ptr_fn(<vscale x 4 x float>* %vec_1)
++ call void @ptr_fn(<vscale x 4 x float>* %vec_2)
++ ret void
++}
++
++; CHECK-LABEL: local_stack_alloc_strong:
++; CHECK: mov x29, sp
++; CHECK: addvl sp, sp, #-3
++; CHECK: sub sp, sp, #16, lsl #12
++; CHECK: sub sp, sp, #16
++
++; Stack guard is placed at the top of the SVE stack area
++; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
++; CHECK-DAG: addvl [[STACK_GUARD_POS:x[0-9]+]], x29, #-1
++; CHECK-DAG: str [[STACK_GUARD]], {{\[}}[[STACK_GUARD_POS]]]
++
++; char_arr is below the SVE stack area
++; CHECK-DAG: addvl [[CHAR_ARR:x[0-9]+]], x29, #-3
++; CHECK-DAG: sturb wzr, {{\[}}[[CHAR_ARR]], #-8]
++
++; large1 is accessed via a virtual base register
++; CHECK-DAG: add [[LARGE1:x[0-9]+]], sp, #8, lsl #12
++; CHECK-DAG: stp x0, x0, {{\[}}[[LARGE1]], #8]
++
++; large2 is at the bottom of the stack
++; CHECK-DAG: stp x0, x0, [sp, #8]
++
++; vec1 and vec2 are in the SVE stack area below the stack guard
++; CHECK-DAG: addvl x0, x29, #-2
++; CHECK-DAG: bl ptr_fn
++; CHECK-DAG: addvl x0, x29, #-3
++; CHECK-DAG: bl ptr_fn
++define void @local_stack_alloc_strong(i64 %val) #1 {
++entry:
++ %char_arr = alloca [8 x i8], align 4
++ %gep0 = getelementptr [8 x i8], [8 x i8]* %char_arr, i64 0, i64 0
++ store i8 0, i8* %gep0, align 8
++ %large1 = alloca [4096 x i64], align 8
++ %large2 = alloca [4096 x i64], align 8
++ %vec_1 = alloca <vscale x 4 x float>, align 16
++ %vec_2 = alloca <vscale x 4 x float>, align 16
++ %gep1 = getelementptr [4096 x i64], [4096 x i64]* %large1, i64 0, i64 0
++ %gep2 = getelementptr [4096 x i64], [4096 x i64]* %large1, i64 0, i64 1
++ store i64 %val, i64* %gep1, align 8
++ store i64 %val, i64* %gep2, align 8
++ %gep3 = getelementptr [4096 x i64], [4096 x i64]* %large2, i64 0, i64 0
++ %gep4 = getelementptr [4096 x i64], [4096 x i64]* %large2, i64 0, i64 1
++ store i64 %val, i64* %gep3, align 8
++ store i64 %val, i64* %gep4, align 8
++ call void @ptr_fn(<vscale x 4 x float>* %vec_1)
++ call void @ptr_fn(<vscale x 4 x float>* %vec_2)
++ ret void
++}
++
++; A GEP addressing into a vector of <vscale x 4 x float> is in-bounds for
++; offsets up to 3, but out-of-bounds (and so triggers stack protection with
++; sspstrong) after that.
++
++; CHECK-LABEL: vector_gep_3:
++; CHECK-NOT: __stack_chk_guard
++define void @vector_gep_3() #0 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 3
++ store float 0.0, float* %gep, align 4
++ ret void
++}
++
++; CHECK-LABEL: vector_gep_4:
++; CHECK-NOT: __stack_chk_guard
++define void @vector_gep_4() #0 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 4
++ store float 0.0, float* %gep, align 4
++ ret void
++}
++
++; CHECK-LABEL: vector_gep_twice:
++; CHECK-NOT: __stack_chk_guard
++define void @vector_gep_twice() #0 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep1 = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 3
++ store float 0.0, float* %gep1, align 4
++ %gep2 = getelementptr float, float* %gep1, i64 1
++ store float 0.0, float* %gep2, align 4
++ ret void
++}
++
++; CHECK-LABEL: vector_gep_n:
++; CHECK-NOT: __stack_chk_guard
++define void @vector_gep_n(i64 %n) #0 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 %n
++ store float 0.0, float* %gep, align 4
++ ret void
++}
++
++; CHECK-LABEL: vector_gep_3_strong:
++; CHECK-NOT: __stack_chk_guard
++define void @vector_gep_3_strong() #1 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 3
++ store float 0.0, float* %gep, align 4
++ ret void
++}
++
++; CHECK-LABEL: vector_gep_4_strong:
++; CHECK: __stack_chk_guard
++define void @vector_gep_4_strong(i64 %val) #1 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 4
++ store float 0.0, float* %gep, align 4
++ ret void
++}
++
++
++; CHECK-LABEL: vector_gep_twice_strong:
++; CHECK: __stack_chk_guard
++define void @vector_gep_twice_strong() #1 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep1 = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 3
++ store float 0.0, float* %gep1, align 4
++ %gep2 = getelementptr float, float* %gep1, i64 1
++ store float 0.0, float* %gep2, align 4
++ ret void
++}
++
++; CHECK-LABEL: vector_gep_n_strong:
++; CHECK: __stack_chk_guard
++define void @vector_gep_n_strong(i64 %n) #1 {
++entry:
++ %vec = alloca <vscale x 4 x float>, align 16
++ %gep = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %vec, i64 0, i64 %n
++ store float 0.0, float* %gep, align 4
++ ret void
++}
++
++attributes #0 = { ssp "frame-pointer"="non-leaf" }
++attributes #1 = { sspstrong "frame-pointer"="non-leaf" }
+--
+2.34.1.703.g22d0c6ccf7-goog
+