diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -71,6 +71,31 @@ } } +static inline bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + case R8: case R9: case R10: case R12: + case SP: case PC: + return true; + default: + return false; + } +} + +static inline bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R11: case LR: + return true; + default: + return false; + } +} + static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -73,6 +73,8 @@ // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; + } else if (STI.splitFramePointerPush(*MF)) { + return CSR_Win_SplitFP_SaveList; } else if (F.getCallingConv() == CallingConv::CFGuard_Check) { return CSR_Win_AAPCS_CFGuard_Check_SaveList; } else if (F.getCallingConv() == CallingConv::SwiftTail) { diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -289,6 +289,10 @@ R11, R10, R9, R8, (sequence "D%u", 15, 8))>; +def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + LR, R11, + (sequence "D%u", 15, 8))>; + // R8 is used to pass swifterror, remove it from CSR. def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, R8)>; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -656,8 +656,12 @@ return -AFI.getArgRegsSaveSize() - (2 * 4); // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). - int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; - return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4); + int MaxRegsBeforeFP = 8; + if (STI.isTargetWindows()) + MaxRegsBeforeFP = 10; // FP can be stored below everything of r4-r15 + int FPCXTSaveSize = + (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; + return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - (MaxRegsBeforeFP * 4); } void ARMFrameLowering::emitPrologue(MachineFunction &MF, @@ -720,42 +724,80 @@ } // Determine spill area sizes. - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) { + if (STI.splitFramePointerPush(MF)) { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R11: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; GPRCS2Size += 4; break; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R12: + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; + } + } + } else { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) { + GPRCS2Size += 4; + break; + } + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; } - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; - break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; } } @@ -981,10 +1023,16 @@ if (HasFP) { MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, - dl, TII, FramePtr, ARM::SP, - PushSize + FramePtrOffsetInPush, - MachineInstr::FrameSetup); + int FPOffset = PushSize + FramePtrOffsetInPush; + if (STI.splitFramePointerPush(MF)) { + AfterPush = std::next(GPRCS2Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); + } else { + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffset, + MachineInstr::FrameSetup); + } if (!NeedsWinCFI) { if (FramePtrOffsetInPush + PushSize != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( @@ -1541,7 +1589,8 @@ continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && + !STI.splitFramePointerPush(MF)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1905,9 +1954,16 @@ .addImm(-4) .add(predOps(ARMCC::AL)); } - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, + bool SplitFramePointerPush = STI.splitFramePointerPush(MF); + bool (*SplitFunc1)(unsigned, bool) = &isARMArea1Register; + bool (*SplitFunc2)(unsigned, bool) = &isARMArea2Register; + if (SplitFramePointerPush) { + SplitFunc1 = &isSplitFPArea1Register; + SplitFunc2 = &isSplitFPArea2Register; + } + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, SplitFunc1, 0, MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, SplitFunc2, 0, MachineInstr::FrameSetup); emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); @@ -1942,10 +1998,16 @@ unsigned FltOpc = ARM::VLDMDIA_UPD; emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, NumAlignedDPRCS2Regs); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register, 0); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register, 0); + + bool SplitFramePointerPush = STI.splitFramePointerPush(MF); + bool (*SplitFunc1)(unsigned, bool) = &isARMArea1Register; + bool (*SplitFunc2)(unsigned, bool) = &isARMArea2Register; + if (SplitFramePointerPush) { + SplitFunc1 = &isSplitFPArea1Register; + SplitFunc2 = &isSplitFPArea2Register; + } + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, SplitFunc2, 0); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, SplitFunc1, 0); return true; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -447,6 +447,8 @@ isThumb1Only(); } + bool splitFramePointerPush(const MachineFunction &MF) const; + bool useStride4VFPs() const; bool useMovt() const; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -494,3 +495,12 @@ return isThumb2() && MF.getFunction().hasMinSize() && ARM::GPRRegClass.contains(PhysReg); } + +bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const { + const Function &F = MF.getFunction(); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || + !F.needsUnwindTableEntry()) + return false; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF); +} diff --git a/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll b/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll @@ -0,0 +1,87 @@ +;; Check that this produces the expected assembly output +; RUN: llc -mtriple=thumbv7-windows -o - %s | FileCheck %s +;; Also try to write an object file, which verifies that the SEH opcodes +;; match the actual prologue/epilogue length. +; RUN: llc -mtriple=thumbv7-windows -filetype=obj -o %t.obj %s + +; CHECK-LABEL: alloc_local: +; CHECK-NEXT: .seh_proc alloc_local +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10} +; CHECK-NEXT: .seh_save_regs_w {r4-r10} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .seh_set_fp r11 +; CHECK-NEXT: sub sp, #36 +; CHECK-NEXT: .seh_stackalloc 36 +; CHECK-NEXT: .seh_endprologue + +; CHECK: ldr.w [[TMP:r[0-9]]], [r11, #36] +; CHECK: mov r0, [[TMP]] + +; CHECK: .seh_startepilogue +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: .seh_set_fp r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10} +; CHECK-NEXT: .seh_save_regs_w {r4-r10} +; CHECK-NEXT: bx lr +; CHECK-NEXT: .seh_endepilogue_nop +; CHECK-NEXT: .seh_endproc + +define arm_aapcs_vfpcc void @alloc_local(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) uwtable { +entry: + %buf2 = alloca [28 x i8], align 1 + %0 = alloca i8, i32 %a, align 8 + call void @llvm.lifetime.start.p0(i64 28, ptr nonnull %buf2) + call arm_aapcs_vfpcc void @other(i32 noundef %e, ptr noundef nonnull %0, ptr noundef nonnull %buf2) + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() + call void @llvm.lifetime.end.p0(i64 28, ptr nonnull %buf2) + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +declare arm_aapcs_vfpcc void @other(i32 noundef, ptr noundef, ptr noundef) + +; CHECK-LABEL: align: +; CHECK-NEXT: .seh_proc align +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: push {r4, r7} +; CHECK-NEXT: .seh_save_regs {r4, r7} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .seh_set_fp r11 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: bfc r4, #0, #4 +; CHECK-NEXT: .seh_nop_w +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: .seh_endprologue + +; CHECK: .seh_startepilogue +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: .seh_set_fp r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: pop {r4, r7} +; CHECK-NEXT: .seh_save_regs {r4, r7} +; CHECK-NEXT: bx lr +; CHECK-NEXT: .seh_endepilogue_nop +; CHECK-NEXT: .seh_endproc + +define arm_aapcs_vfpcc void @align(i32 noundef %a) uwtable { +entry: + %buf = alloca [32 x i8], align 16 + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %buf) + call arm_aapcs_vfpcc void @other(i32 noundef %a, ptr noundef nonnull %buf, ptr noundef nonnull %buf) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %buf) + ret void +}