diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -336,7 +336,8 @@ } bool IsLarge = false; - if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { + Type *AllocType = AI->getAllocatedType(); + if (ContainsProtectableArray(AllocType, IsLarge, Strong)) { Layout.insert(std::make_pair(AI, IsLarge ? MachineFrameInfo::SSPLK_LargeArray : MachineFrameInfo::SSPLK_SmallArray)); @@ -351,8 +352,12 @@ continue; } - if (Strong && HasAddressTaken(AI, M->getDataLayout().getTypeAllocSize( - AI->getAllocatedType()))) { + // The exact size of scalable vectors isn't known, so go for the worst + // case of a single element. + bool isScalable = isa(AllocType); + uint64_t AllocSize = M->getDataLayout().getTypeAllocSize( + isScalable ? AllocType->getScalarType() : AllocType); + if (Strong && HasAddressTaken(AI, AllocSize)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -103,6 +103,11 @@ // layout, we don't need to add an unscaled offset to the framepointer before // accessing the SVE object in the frame. // +// When stack protection is enabled we need to place SVE stack objects below the +// the local variables of fixed size, as that's where the stack guard variable +// will be placed. Therefore they are moved to below the base pointer, as that +// way we can use VL-scaled addressing below bp to access them. +// // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads @@ -1118,6 +1123,8 @@ bool IsFunclet = MBB.isEHFuncletEntry(); + bool SVEBelowBP = MFI.getStackProtectorIndex() >= 0; + // At this point, we're going to decide whether or not the function uses a // redzone. In most cases, the function doesn't have a redzone so let's // assume that's false and set it to true in the case that there's a redzone. @@ -1418,7 +1425,7 @@ NumBytes = 0; } - StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; + StackOffset AllocateBefore = {}, AllocateAfter = SVEStackSize; MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; // Process the SVE callee-saves to determine what space needs to be @@ -1440,10 +1447,12 @@ -AllocateBefore, TII, MachineInstr::FrameSetup); - // Finally allocate remaining SVE stack space. - emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, - -AllocateAfter, TII, - MachineInstr::FrameSetup); + if (!SVEBelowBP) { + // Finally allocate remaining SVE stack space. + emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, + -AllocateAfter, TII, + MachineInstr::FrameSetup); + } // Allocate space for the rest of the frame. if (NumBytes) { @@ -1513,6 +1522,13 @@ } } + if (SVEBelowBP) { + // SVE locals are placed below the base pointer. + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, + -AllocateAfter, TII, + MachineInstr::FrameSetup); + } + // The very last FrameSetup instruction indicates the end of prologue. Emit a // SEH opcode indicating the prologue end. if (NeedsWinCFI && HasWinCFI) { @@ -2125,6 +2141,21 @@ "non-argument/CSR objects cannot be accessed through the frame pointer"); if (isSVE) { + if (MFI.getStackProtectorIndex() >= 0) { + // SVE locals, but not callee-saved registers, are placed below the base + // pointer, and the callee-saved registers shouldn't be accessed. + // Therefore the offset is reduced by the callee-saved size, though it's + // done as an addition as ObjectOffset is a negative offset. + int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize(); + assert(-ObjectOffset >= CalleeSavedSize && + "Unexpected access of callee-saved SVE register"); + assert(RegInfo->hasBasePointer(MF) && + "Base pointer expected to be available in functions with SVE " + "locals and stack protection"); + FrameReg = RegInfo->getBaseRegister(); + return StackOffset::getScalable(ObjectOffset + CalleeSavedSize); + } + StackOffset FPOffset = StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); StackOffset SPOffset = diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -384,6 +384,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); + // When stack protection is used SVE locals are placed below the base pointer. + if (MF.getInfo()->getStackSizeSVE() > 0 && + MFI.getStackProtectorIndex() >= 0) { + return true; + } + // In the presence of variable sized objects or funclets, if the fixed stack // size is large enough that referencing from the FP won't result in things // being in range relatively often, we can use a base pointer to allow access diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll @@ -0,0 +1,148 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +declare dso_local void @val_fn() +declare dso_local void @ptr_fn(*) + +; An alloca of a scalable vector shouldn't trigger stack protection. + +; CHECK-LABEL: call_value: +; CHECK-NOT: mov x19, sp +; CHECK: addvl sp, sp, #-1 +; CHECK-NOT: __stack_chk_guard +; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-1, mul vl] +define void @call_value() #0 { +entry: + %x = alloca , align 16 + store shufflevector ( insertelement ( poison, float 0.000000e+00, i32 0), poison, zeroinitializer), * %x, align 16 + %0 = load , * %x, align 16 + call void @val_fn( %0) + ret void +} + +; CHECK-LABEL: call_value_strong: +; CHECK-NOT: mov x19, sp +; CHECK: addvl sp, sp, #-1 +; CHECK-NOT: __stack_chk_guard +; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-1, mul vl] +define void @call_value_strong() #1 { +entry: + %x = alloca , align 16 + store shufflevector ( insertelement ( poison, float 0.000000e+00, i32 0), poison, zeroinitializer), * %x, align 16 + %0 = load , * %x, align 16 + call void @val_fn( %0) + ret void +} + +; Address-taking of a scalable vector should trigger stack protection only with +; sspstrong, and the scalable vector should be be placed below the stack guard. + +; CHECK-LABEL: call_ptr: +; CHECK-NOT: mov x19, sp +; CHECK: addvl sp, sp, #-1 +; CHECK-NOT: __stack_chk_guard +; CHECK: addvl x0, x29, #-1 +; CHECK: bl ptr_fn +define void @call_ptr() #0 { +entry: + %x = alloca , align 16 + call void @ptr_fn(* %x) + ret void +} + +; CHECK-LABEL: call_ptr_strong: +; CHECK: sub sp, sp, #16 +; CHECK: mov x19, sp +; CHECK: addvl sp, sp, #-1 +; CHECK-DAG: ldr [[REG:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard] +; CHECK-DAG: str [[REG]], [x19, #8] +; CHECK-DAG: addvl x0, x19, #-1 +; CHECK: bl ptr_fn +define void @call_ptr_strong() #1 { +entry: + %x = alloca , align 16 + call void @ptr_fn(* %x) + ret void +} + +; Check that both variables are addressed in the same way (sp-relative with ssp, +; bp-relative with sspstrong) + +; CHECK-LABEL: call_both: +; CHECK-NOT: mov x19, sp +; CHECK: addvl sp, sp, #-2 +; CHECK-NOT: __stack_chk_guard +; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-1, mul vl] +; CHECK: bl val_fn +; CHECK: addvl x0, x29, #-2 +; CHECK: bl ptr_fn +define void @call_both() #0 { +entry: + %x = alloca , align 16 + %y = alloca , align 16 + store shufflevector ( insertelement ( poison, float 0.000000e+00, i32 0), poison, zeroinitializer), * %x, align 16 + %0 = load , * %x, align 16 + call void @val_fn( %0) + call void @ptr_fn(* %y) + ret void +} + +; CHECK-LABEL: call_both_strong: +; CHECK: sub sp, sp, #16 +; CHECK: mov x19, sp +; CHECK: addvl sp, sp, #-2 +; CHECK-DAG: ldr [[REG:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard] +; CHECK-DAG: str [[REG]], [x19, #8] +; CHECK-DAG: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x19, #-1, mul vl] +; CHECK: bl val_fn +; CHECK: addvl x0, x19, #-2 +; CHECK: bl ptr_fn +define void @call_both_strong() #1 { +entry: + %x = alloca , align 16 + %y = alloca , align 16 + store shufflevector ( insertelement ( poison, float 0.000000e+00, i32 0), poison, zeroinitializer), * %x, align 16 + %0 = load , * %x, align 16 + call void @val_fn( %0) + call void @ptr_fn(* %y) + ret void +} + +; Pushed callee-saved regs should be above the stack guard + +; CHECK-LABEL: callee_save: +; CHECK: mov x29, sp +; CHECK: addvl sp, sp, #-18 +; CHECK: str {{z[0-9]+}}, [sp, #{{[0-9]+}}, mul vl] +; CHECK-NOT: mov x19, sp +; CHECK: addvl sp, sp, #-1 +; CHECK-NOT: __stack_chk_guard +; CHECK: addvl [[REG:x[0-9]+]], x29, #-11 +; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, {{\[}}[[REG]], #-8, mul vl] +define void @callee_save( %x) #0 { +entry: + %x.addr = alloca , align 16 + store %x, * %x.addr, align 16 + call void @ptr_fn(* %x.addr) + ret void +} + +; CHECK-LABEL: callee_save_strong: +; CHECK: mov x29, sp +; CHECK: addvl sp, sp, #-18 +; CHECK: str {{z[0-9]+}}, [sp, #{{[0-9]+}}, mul vl] +; CHECK: sub sp, sp, #16 +; CHECK: mov x19, sp +; CHECK: addvl sp, sp, #-1 +; CHECK-DAG: ldr [[REG:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard] +; CHECK-DAG: str [[REG]], [x19, #8] +; CHECK-DAG: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x19, #-1, mul vl] +define void @callee_save_strong( %x) #1 { +entry: + %x.addr = alloca , align 16 + store %x, * %x.addr, align 16 + call void @ptr_fn(* %x.addr) + ret void +} + +attributes #0 = { ssp "frame-pointer"="non-leaf" } +attributes #1 = { sspstrong "frame-pointer"="non-leaf" }