diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -71,6 +71,33 @@ } } +static inline bool isSplitFPArea1Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + case R8: case R9: case R10: case R12: + case SP: case PC: + return true; + default: + return false; + } +} + +static inline bool isSplitFPArea2Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R11: case LR: + return true; + default: + return false; + } +} + static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -73,6 +73,8 @@ // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; + } else if (STI.splitFramePointerPush(*MF)) { + return CSR_Win_SplitFP_SaveList; } else if (F.getCallingConv() == CallingConv::CFGuard_Check) { return CSR_Win_AAPCS_CFGuard_Check_SaveList; } else if (F.getCallingConv() == CallingConv::SwiftTail) { diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -289,6 +289,10 @@ R11, R10, R9, R8, (sequence "D%u", 15, 8))>; +def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8), + LR, R11)>; + // R8 is used to pass swifterror, remove it from CSR. def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, R8)>; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -300,7 +300,6 @@ break; case ARM::t2ADDri: // add.w r11, sp, #xx case ARM::t2ADDri12: // add.w r11, sp, #xx - case ARM::t2SUBri: // sub.w r4, r11, #xx case ARM::t2MOVTi16: // movt r4, #xx case ARM::t2MOVi16: // movw r4, #xx case ARM::tBL: // bl __chkstk @@ -633,15 +632,23 @@ /// Unfortunately we cannot determine this value in determineCalleeSaves() yet /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use /// this to produce a conservative estimate that we check in an assert() later. -static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) { +static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, + const MachineFunction &MF) { // For Thumb1, push.w isn't available, so the first push will always push // r7 and lr onto the stack first. if (AFI.isThumb1OnlyFunction()) return -AFI.getArgRegsSaveSize() - (2 * 4); // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). - int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; - return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4); + int MaxRegBytes = 8 * 4; + if (STI.splitFramePointerPush(MF)) { + // Here, r11 can be stored below all of r4-r15 (3 registers more than + // above), plus d8-d15. + MaxRegBytes = 11 * 4 + 8 * 8; + } + int FPCXTSaveSize = + (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; + return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes; } void ARMFrameLowering::emitPrologue(MachineFunction &MF, @@ -704,42 +711,80 @@ } // Determine spill area sizes. - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) { + if (STI.splitFramePointerPush(MF)) { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R11: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; GPRCS2Size += 4; break; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R12: + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; + } + } + } else { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) { + GPRCS2Size += 4; + break; + } + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; } - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; - break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; } } @@ -774,15 +819,23 @@ unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4); - unsigned DPRGapSize = - (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) % - DPRAlign.value(); + unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize; + if (!STI.splitFramePointerPush(MF)) { + DPRGapSize += GPRCS2Size; + } + DPRGapSize %= DPRAlign.value(); - unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + unsigned DPRCSOffset; + if (STI.splitFramePointerPush(MF)) { + DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize; + GPRCS2Offset = DPRCSOffset - GPRCS2Size; + } else { + DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + } int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(STI, *AFI) <= FPOffset && + assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + @@ -793,7 +846,7 @@ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) { + if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) { GPRCS2Push = LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } @@ -833,6 +886,15 @@ } else NumBytes = DPRCSOffset; + if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) { + GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + bool NeedsWinCFIStackAlloc = NeedsWinCFI; + if (STI.splitFramePointerPush(MF) && HasFP) + NeedsWinCFIStackAlloc = false; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { uint32_t NumWords = NumBytes >> 2; @@ -888,7 +950,7 @@ .setMIFlags(MachineInstr::FrameSetup) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - if (NeedsWinCFI) { + if (NeedsWinCFIStackAlloc) { SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc)) .addImm(NumBytes) .addImm(/*Wide=*/1) @@ -927,13 +989,20 @@ // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + MachineBasicBlock::iterator AfterPush; if (HasFP) { - MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + AfterPush = std::next(GPRCS1Push); unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, - dl, TII, FramePtr, ARM::SP, - PushSize + FramePtrOffsetInPush, - MachineInstr::FrameSetup); + int FPOffset = PushSize + FramePtrOffsetInPush; + if (STI.splitFramePointerPush(MF)) { + AfterPush = std::next(GPRCS2Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); + } else { + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffset, + MachineInstr::FrameSetup); + } if (!NeedsWinCFI) { if (FramePtrOffsetInPush + PushSize != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( @@ -956,8 +1025,11 @@ // Emit a SEH opcode indicating the prologue end. The rest of the prologue // instructions below don't need to be replayed to unwind the stack. if (NeedsWinCFI && MBBI != MBB.begin()) { - insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd)) + MachineBasicBlock::iterator End = MBBI; + if (HasFP && STI.splitFramePointerPush(MF)) + End = AfterPush; + insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup); + BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd)) .setMIFlag(MachineInstr::FrameSetup); MF.setHasWinCFI(true); } @@ -1483,7 +1555,8 @@ continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && + !STI.splitFramePointerPush(MF)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1847,12 +1920,21 @@ .addImm(-4) .add(predOps(ARMCC::AL)); } - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + if (STI.splitFramePointerPush(MF)) { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea1Register, 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea2Register, 0, MachineInstr::FrameSetup); + } else { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + } // The code above does not insert spill code for the aligned DPRCS2 registers. // The stack realignment code will be inserted between the push instructions @@ -1880,14 +1962,24 @@ emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; - unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; + unsigned LdrOpc = + AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, - NumAlignedDPRCS2Regs); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register, 0); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register, 0); + if (STI.splitFramePointerPush(MF)) { + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea2Register, 0); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea1Register, 0); + } else { + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea2Register, 0); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea1Register, 0); + } return true; } @@ -2287,7 +2379,7 @@ // // We could do slightly better on Thumb1; in some cases, an sp-relative // offset would be legal even though an fp-relative offset is not. - int MaxFPOffset = getMaxFPOffset(STI, *AFI); + int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF); bool HasLargeArgumentList = HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -447,6 +447,8 @@ isThumb1Only(); } + bool splitFramePointerPush(const MachineFunction &MF) const; + bool useStride4VFPs() const; bool useMovt() const; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -491,3 +492,12 @@ return isThumb2() && MF.getFunction().hasMinSize() && ARM::GPRRegClass.contains(PhysReg); } + +bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const { + const Function &F = MF.getFunction(); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || + !F.needsUnwindTableEntry()) + return false; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF); +} diff --git a/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll b/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll @@ -0,0 +1,62 @@ +;; Check that this produces the expected assembly output +; RUN: llc -mtriple=thumbv7-windows -o - %s -verify-machineinstrs | FileCheck %s +;; Also try to write an object file, which verifies that the SEH opcodes +;; match the actual prologue/epilogue length. +; RUN: llc -mtriple=thumbv7-windows -filetype=obj -o %t.obj %s -verify-machineinstrs + +; CHECK-LABEL: alloc_local: +; CHECK-NEXT: .seh_proc alloc_local +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10} +; CHECK-NEXT: .seh_save_regs_w {r4-r10} +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .seh_stackalloc 4 +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .seh_save_fregs {d8-d15} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .seh_save_sp r11 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movw r4, #1256 +; CHECK-NEXT: bl __chkstk +; CHECK-NEXT: sub.w sp, sp, r4 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #4 +; CHECK-NEXT: mov sp, r4 + +; CHECK: ldr.w [[TMP:r[0-9]]], [r11, #104] +; CHECK: mov r0, [[TMP]] + +; CHECK: .seh_startepilogue +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: .seh_save_sp r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .seh_save_fregs {d8-d15} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: .seh_stackalloc 4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10} +; CHECK-NEXT: .seh_save_regs_w {r4-r10} +; CHECK-NEXT: bx lr +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: .seh_endproc + +define arm_aapcs_vfpcc void @alloc_local(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) uwtable { +entry: + %buf2 = alloca [5000 x i8], align 16 + %vla = alloca i8, i32 %a, align 1 + call void @llvm.lifetime.start.p0(i64 5000, ptr nonnull %buf2) #3 + call arm_aapcs_vfpcc void @other(i32 noundef %e, ptr noundef nonnull %vla, ptr noundef nonnull %buf2) + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() + call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() + call void @llvm.lifetime.end.p0(i64 5000, ptr nonnull %buf2) #3 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +declare arm_aapcs_vfpcc void @other(i32 noundef, ptr noundef, ptr noundef)