diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1024,10 +1024,6 @@ F.needsUnwindTableEntry(); } -static bool isTargetDarwin(const MachineFunction &MF) { - return MF.getSubtarget().isTargetDarwin(); -} - static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } @@ -1185,7 +1181,7 @@ // For funclets the FP belongs to the containing function. if (!IsFunclet && HasFP) { // Only set up FP if we actually need to. - int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; + int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -1409,11 +1405,6 @@ } if (needsFrameMoves) { - const DataLayout &TD = MF.getDataLayout(); - const int StackGrowth = isTargetDarwin(MF) - ? (2 * -TD.getPointerSize(0)) - : -AFI->getCalleeSavedStackSize(); - Register FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo @@ -1481,10 +1472,15 @@ // .cfi_offset w28, -32 if (HasFP) { + const int OffsetToFirstCalleeSaveFromFP = + AFI->getCalleeSaveBaseToFrameRecordOffset() - + AFI->getCalleeSavedStackSize(); + Register FramePtr = RegInfo->getFrameRegister(MF); + // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth)); + MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1775,10 +1771,8 @@ // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { - int64_t OffsetToFrameRecord = - isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - {OffsetToFrameRecord, MVT::i8}, + {-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); } else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, @@ -1839,11 +1833,11 @@ const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); - unsigned FPAdjust = isTargetDarwin(MF) - ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo()); + int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); + int64_t FPAdjust = + CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } @@ -2231,6 +2225,14 @@ (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) && "Offset out of bounds for LDP/STP immediate"); + // Save the offset to frame record so that the FP register can point to the + // innermost frame record (spilled FP and LR registers). + if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR && + RPI.Reg2 == AArch64::FP) || + (IsWindows && RPI.Reg1 == AArch64::FP && + RPI.Reg2 == AArch64::LR))) + AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); + RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3442,8 +3442,8 @@ // First emit non-scalable frame offsets, or a simple 'mov'. if (Bytes || (!Offset && SrcReg != DestReg)) { - assert((DestReg != AArch64::SP || Bytes % 16 == 0) && - "SP increment/decrement not 16-byte aligned"); + assert((DestReg != AArch64::SP || Bytes % 8 == 0) && + "SP increment/decrement not 8-byte aligned"); unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; if (Bytes < 0) { Bytes = -Bytes; diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -135,6 +135,10 @@ /// e.g. Tail Call, Thunk, or Function if none apply. Optional OutliningStyle; + // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus + // CalleeSavedStackSize) to the address of the frame record. + int CalleeSaveBaseToFrameRecordOffset = 0; + public: AArch64FunctionInfo() = default; @@ -338,6 +342,13 @@ TaggedBasePointerOffset = Offset; } + int getCalleeSaveBaseToFrameRecordOffset() const { + return CalleeSaveBaseToFrameRecordOffset; + } + void setCalleeSaveBaseToFrameRecordOffset(int Offset) { + CalleeSaveBaseToFrameRecordOffset = Offset; + } + private: // Hold the lists of LOHs. MILOHContainer LOHContainerSet; diff --git a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll @@ -0,0 +1,22 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s + +; The purpose of this test is to verify that frame pointer (x29) +; is correctly setup in the presence of callee-saved floating +; point registers. The frame pointer should point to the frame +; record, which is located 16 bytes above the end of the CSR +; space when a single FP CSR is in use. +define void @test1(i32) #26 { +entry: + call void asm sideeffect "nop", "~{d8}"() #26 + ret void +} +; CHECK-LABEL: test1: +; CHECK: str d8, [sp, #-32]! +; CHECK-NEXT: stp x29, x30, [sp, #16] +; CHECK-NEXT: add x29, sp, #16 +; CHECK: nop +; CHECK: ldp x29, x30, [sp, #16] +; CHECK-NEXT: ldr d8, [sp], #32 +; CHECK-NEXT: ret + +attributes #26 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-frame-record.mir @@ -0,0 +1,29 @@ +# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s + +--- +name: TestFrameRecordLocation +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +body: | + bb.0: + $d8 = IMPLICIT_DEF + $d9 = IMPLICIT_DEF + $x19 = IMPLICIT_DEF + RET_ReallyLR + +# CHECK-LABEL: TestFrameRecordLocation + +# CHECK: stp d9, d8, [sp, #-48]! +# CHECK: stp x29, x30, [sp, #16] +# CHECK: str x19, [sp, #32] + +# CHECK: add x29, sp, #16 + +# CHECK: .cfi_def_cfa w29, 32 +# CHECK: .cfi_offset w19, -16 +# CHECK: .cfi_offset w30, -24 +# CHECK: .cfi_offset w29, -32 +# CHECK: .cfi_offset b8, -40 +# CHECK: .cfi_offset b9, -48 +... diff --git a/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +; The purpose of this test is to construct a scenario where an odd number +; of callee-saved GPRs as well as an odd number of callee-saved FPRs are +; used. This caused the frame pointer to be aligned to a multiple of 8 +; on non-Darwin platforms, rather than a multiple of 16 as usual. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = global i64 0, align 4 + + +define i64 @b() { +entry: + %call = tail call i64 @d() + %0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16 + %1 = ptrtoint i8* %0 to i64 + store i64 %1, i64* @a, align 4 + %call1 = call i64 @e() + %conv = sitofp i64 %call1 to float + %2 = load i64, i64* @a, align 4 + %call2 = call i64 @f(i64 %2) + %conv3 = fptosi float %conv to i64 + ret i64 %conv3 +} + +; CHECK-LABEL: b: +; CHECK: str d8, [sp, #-32]! +; CHECK-NEXT: stp x29, x30, [sp, #8] +; CHECK-NEXT: str x19, [sp, #24] +; CHECK-NEXT: add x29, sp, #8 + +; CHECK: sub sp, x29, #8 +; CHECK-NEXT: ldr x19, [sp, #24] +; CHECK-NEXT: ldp x29, x30, [sp, #8] +; CHECK-NEXT: ldr d8, [sp], #32 +; CHECK-NEXT: ret + +declare i64 @d() +declare i64 @e() +declare i64 @f(i64)