diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3429,7 +3429,11 @@ Register BaseReg = FrameReg; int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); if (BaseRegOffsetBytes < kMinOffset || - BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) { + BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset || + // BaseReg can be FP, which is not necessarily aligned to 16-bytes. In + // that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which + // is required for the offset of ST2G. + BaseRegOffsetBytes % 16 != 0) { Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, StackOffset::getFixed(BaseRegOffsetBytes), TII); @@ -3444,6 +3448,7 @@ InstrSize == 16 ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset) : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset); + assert(BaseRegOffsetBytes % 16 == 0); MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode)) .addReg(AArch64::SP) .addReg(BaseReg) diff --git a/llvm/test/CodeGen/AArch64/settag-merge-nonaligned-fp.ll b/llvm/test/CodeGen/AArch64/settag-merge-nonaligned-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/settag-merge-nonaligned-fp.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -aarch64-order-frame-objects=0 | FileCheck %s +; Regression test for bug that occured with FP that was not 16-byte aligned. +; We would miscalculate the offset for the st2g. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-android10000" + +%class.testcls = type <{ %class.testsubcls.1, %class.testsubcls.6, %class.testsubcls, %class.testsubcls.7, %class.testsubcls.8, %class.testsubcls.9, %struct.teststruct, float, float, %union.anon, [4 x i8] }> +%class.testsubcls.1 = type { ptr } +%class.testsubcls.6 = type { ptr } +%class.testsubcls = type { ptr } +%class.testsubcls.7 = type { ptr } +%class.testsubcls.8 = type { ptr } +%class.testsubcls.9 = type { ptr } +%struct.teststruct = type { float, float, float, float } +%union.anon = type { %struct.anon } +%struct.anon = type { i32 } +%class.testcls2 = type { %class.testcls3, %class.testsubcls, %class.testsubcls.1, %class.testsubcls.2 } +%class.testcls3 = type <{ ptr, [124 x i8], [4 x i8] }> +%class.testsubcls.2 = type { ptr } + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0 + +; Function Attrs: sanitize_memtag sspstrong +define void @test(ptr %agg.result, float %call) #1 personality ptr null { +entry: + %pathPaint = alloca %class.testcls, align 8 + %ref.tmp = alloca %class.testcls2, align 8 + call void @test1(ptr %pathPaint) + call void @llvm.lifetime.start.p0(i64 1, ptr %ref.tmp) + store i32 0, ptr %ref.tmp, align 4 + store float %call, ptr %agg.result, align 8 + call void @llvm.lifetime.end.p0(i64 0, ptr %ref.tmp) + %0 = atomicrmw add ptr null, i32 0 monotonic, align 4 + ret void +} + +; CHECK-LABEL: test +; CHECK: sub x8, x29, #104 +; CHECK: st2g sp, [x8, #32] +; CHECK: stg sp, [x8, #64] +; CHECK: st2g sp, [x8] + + +declare void @test1(ptr) + +; uselistorder directives +uselistorder ptr null, { 1, 2, 3, 0 } + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { sanitize_memtag sspstrong "frame-pointer"="non-leaf" "target-features"="+crc,+crypto,+dotprod,+fix-cortex-a53-835769,+fp-armv8,+fullfp16,+lse,+mte,+neon,+outline-atomics,+ras,+rcpc,+rdm,+v8a" }