Index: lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -49,18 +49,13 @@ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { - if (STI.isTargetMachO()) - return ARM::R7; - else if (STI.isTargetWindows()) - return ARM::R11; - else // ARM EABI - return STI.isThumb() ? ARM::R7 : ARM::R11; + return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; } const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); - bool UseSplitPush = STI.splitFramePushPop(); + bool UseSplitPush = STI.splitFramePushPop(*MF); const MCPhysReg *RegList = STI.isTargetDarwin() ? CSR_iOS_SaveList Index: lib/Target/ARM/ARMFrameLowering.h =================================================================== --- lib/Target/ARM/ARMFrameLowering.h +++ lib/Target/ARM/ARMFrameLowering.h @@ -43,6 +43,7 @@ bool noFramePointerElim(const MachineFunction &MF) const override; + bool hasABIFP(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -52,14 +52,26 @@ MF.getSubtarget().useFastISel(); } +/// hasABIFP - Return true if the specified function should have a dedicated +/// frame pointer register, which must be compliant with the ABI. This is +/// true if frame pointer elimination is disabled for this function, and means +/// that the compiler may not change the layout of the frame record, and must +/// ensure that the frame pointer register is always valid. +bool ARMFrameLowering::hasABIFP(const MachineFunction &MF) const { + // iOS requires FP not to be clobbered for backtracing purpose. + return (STI.isTargetIOS() || + STI.isTargetWatchOS() || + MF.getTarget().Options.DisableFramePointerElim(MF)); +} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS() || STI.isTargetWatchOS()) + // ABI-required frame pointer. + if (hasABIFP(MF)) return true; const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -352,7 +364,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } @@ -556,7 +568,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; // fallthrough case ARM::R0: @@ -589,7 +601,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned Offset = MFI.getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( @@ -901,7 +913,7 @@ unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -982,7 +994,7 @@ bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1546,7 +1558,7 @@ if (Spilled) { NumGPRSpills++; - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1568,7 +1580,7 @@ break; } } else { - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1633,6 +1645,23 @@ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); + if (hasFP(MF)) { + SavedRegs.set(FramePtr); + // If the frame pointer is required by the ABI, also spill LR so that we + // emit a complete frame record. + if (hasABIFP(MF) && !LRSpilled) { + SavedRegs.set(ARM::LR); + LRSpilled = true; + NumGPRSpills++; + } + auto FPPos = find(UnspilledCS1GPRs, FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); + NumGPRSpills++; + if (FramePtr == ARM::R7) + CS1Spilled = true; + } + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { @@ -1647,14 +1676,6 @@ ExtraCSSpill = true; } - if (hasFP(MF)) { - SavedRegs.set(FramePtr); - auto FPPos = find(UnspilledCS1GPRs, FramePtr); - if (FPPos != UnspilledCS1GPRs.end()) - UnspilledCS1GPRs.erase(FPPos); - NumGPRSpills++; - } - // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -560,11 +560,20 @@ return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } + bool useR7AsFramePointer() const { + return isTargetMachO() || (!isTargetWindows() && isThumb()); + } /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent /// to lr. - bool splitFramePushPop() const { - return isTargetMachO(); + bool splitFramePushPop(const MachineFunction &MF) const { + // iOS always uses split push/pop + if (isTargetMachO()) + return true; + + // For other targets, we use split push/pop if the frame pointer is r7, and + // we are required to emit an ABI-compliant frame record. + return useR7AsFramePointer() && getFrameLowering()->hasABIFP(MF); } bool useStride4VFPs(const MachineFunction &MF) const; Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -150,7 +150,7 @@ case ARM::R9: case ARM::R10: case ARM::R11: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } @@ -212,7 +212,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; // fallthough case ARM::R0: Index: test/CodeGen/ARM/debug-frame-large-stack.ll =================================================================== --- test/CodeGen/ARM/debug-frame-large-stack.ll +++ test/CodeGen/ARM/debug-frame-large-stack.ll @@ -23,13 +23,16 @@ ; CHECK-ARM-LABEL: test2: ; CHECK-ARM: .cfi_startproc -; CHECK-ARM: push {r4, r5} -; CHECK-ARM: .cfi_def_cfa_offset 8 -; CHECK-ARM: .cfi_offset r5, -4 -; CHECK-ARM: .cfi_offset r4, -8 +; CHECK-ARM: push {r4, r5, r11, lr} +; CHECK-ARM: .cfi_def_cfa_offset 16 +; CHECK-ARM: .cfi_offset lr, -4 +; CHECK-ARM: .cfi_offset r11, -8 +; CHECK-ARM: .cfi_offset r5, -12 +; CHECK-ARM: .cfi_offset r4, -16 +; CHECK-ARM: add r11, sp, #8 +; CHECK-ARM: .cfi_def_cfa r11, 8 ; CHECK-ARM: sub sp, sp, #72 ; CHECK-ARM: sub sp, sp, #4096 -; CHECK-ARM: .cfi_def_cfa_offset 4176 ; CHECK-ARM: .cfi_endproc ; CHECK-ARM-FP_ELIM-LABEL: test2: @@ -54,14 +57,15 @@ ; CHECK-ARM-LABEL: test3: ; CHECK-ARM: .cfi_startproc -; CHECK-ARM: push {r4, r5, r11} -; CHECK-ARM: .cfi_def_cfa_offset 12 -; CHECK-ARM: .cfi_offset r11, -4 -; CHECK-ARM: .cfi_offset r5, -8 -; CHECK-ARM: .cfi_offset r4, -12 +; CHECK-ARM: push {r4, r5, r11, lr} +; CHECK-ARM: .cfi_def_cfa_offset 16 +; CHECK-ARM: .cfi_offset lr, -4 +; CHECK-ARM: .cfi_offset r11, -8 +; CHECK-ARM: .cfi_offset r5, -12 +; CHECK-ARM: .cfi_offset r4, -16 ; CHECK-ARM: add r11, sp, #8 -; CHECK-ARM: .cfi_def_cfa r11, 4 -; CHECK-ARM: sub sp, sp, #20 +; CHECK-ARM: .cfi_def_cfa r11, 8 +; CHECK-ARM: sub sp, sp, #16 ; CHECK-ARM: sub sp, sp, #805306368 ; CHECK-ARM: bic sp, sp, #15 ; CHECK-ARM: .cfi_endproc Index: test/CodeGen/ARM/dwarf-unwind.ll =================================================================== --- test/CodeGen/ARM/dwarf-unwind.ll +++ test/CodeGen/ARM/dwarf-unwind.ll @@ -71,12 +71,14 @@ define void @test_frame_pointer_offset() minsize "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_frame_pointer_offset: -; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK: .cfi_def_cfa_offset 40 -; CHECK: add r7, sp, #16 -; CHECK: .cfi_def_cfa r7, 24 +; CHECK: push {r4, r5, r6, r7, lr} +; CHECK: .cfi_def_cfa_offset 20 +; CHECK: add r7, sp, #12 +; CHECK: .cfi_def_cfa r7, 8 +; CHECK-NOT: .cfi_def_cfa_offset +; CHECK: push.w {r7, r8, r9, r10, r11} ; CHECK-NOT: .cfi_def_cfa_offset call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"() call void @bar() ret void -} \ No newline at end of file +} Index: test/CodeGen/ARM/fast-isel-frameaddr.ll =================================================================== --- test/CodeGen/ARM/fast-isel-frameaddr.ll +++ test/CodeGen/ARM/fast-isel-frameaddr.ll @@ -6,22 +6,22 @@ define i8* @frameaddr_index0() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index0: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: mov r0, r7 ; DARWIN-THUMB2-LABEL: frameaddr_index0: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: mov r0, r7 ; LINUX-ARM-LABEL: frameaddr_index0: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: mov r0, r11 ; LINUX-THUMB2-LABEL: frameaddr_index0: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 @@ -32,22 +32,22 @@ define i8* @frameaddr_index1() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index1: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: ldr r0, [r7] ; DARWIN-THUMB2-LABEL: frameaddr_index1: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: ldr r0, [r7] ; LINUX-ARM-LABEL: frameaddr_index1: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: ldr r0, [r11] ; LINUX-THUMB2-LABEL: frameaddr_index1: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 ; LINUX-THUMB2: ldr r0, [r0] @@ -59,28 +59,28 @@ define i8* @frameaddr_index3() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index3: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: ldr r0, [r7] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-THUMB2-LABEL: frameaddr_index3: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: ldr r0, [r7] ; DARWIN-THUMB2: ldr r0, [r0] ; DARWIN-THUMB2: ldr r0, [r0] ; LINUX-ARM-LABEL: frameaddr_index3: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: ldr r0, [r11] ; LINUX-ARM: ldr r0, [r0] ; LINUX-ARM: ldr r0, [r0] ; LINUX-THUMB2-LABEL: frameaddr_index3: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 ; LINUX-THUMB2: ldr r0, [r0] Index: test/CodeGen/Thumb/push.ll =================================================================== --- test/CodeGen/Thumb/push.ll +++ test/CodeGen/Thumb/push.ll @@ -3,7 +3,7 @@ define void @t() nounwind { ; CHECK-LABEL: t: -; CHECK: push {r7} +; CHECK: push {r7, lr} entry: call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind ret void Index: test/CodeGen/Thumb2/frame-pointer.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/frame-pointer.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=thumbv7m-none-eabi -o - %s | FileCheck %s + +declare void @foo() + +; Has a call, but no need for a frame pointer. +define void @call() { +; CHECK-LABEL: call: +; CHECK: push {[[DUMMYREG:r[0-9]+]], lr} +; CHECK-NOT: sp +; CHECK: bl foo +; CHECK: pop {[[DUMMYREG]], pc} + call void @foo() + ret void +} + +; Has a call, and frame pointer requested. +define void @call_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: call_nofpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: bl foo +; CHECK: pop {r7, pc} + call void @foo() + ret void +} + +; Has a high register clobbered, no need for a frame pointer. +define void @highreg() { +; CHECK-LABEL: highreg: +; CHECK: push.w {r8, lr} +; CHECK-NOT: sp +; CHECK: bl foo +; CHECK: pop.w {r8, pc} + call void asm sideeffect "", "~{r8}" () + call void @foo() + ret void +} + +; Has a high register clobbered, frame pointer requested. We need to split the +; push into two, to ensure that r7 and sp are adjacent on the stack. +define void @highreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: highreg_nofpelim: +; CHECK: push {[[DUMMYREG:r[0-9]+]], r7, lr} +; CHECK: add r7, sp, #4 +; CHECK: str r8, [sp, #-4]! +; CHECK: bl foo +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {[[DUMMYREG]], r7, pc} + call void asm sideeffect "", "~{r8}" () + call void @foo() + ret void +} + +; Has a high register clobbered, frame required due to variable-sized alloca. +; We need a frame pointer to correctly restore the stack, but don't need to +; split the push/pop here, because the frame pointer not required by the ABI. +define void @highreg_alloca(i32 %a) { +; CHECK-LABEL: highreg_alloca: +; CHECK: push.w {[[SOMEREGS:.*]], r7, r8, lr} +; CHECK: add r7, sp, #{{[0-9]+}} +; CHECK: bl foo +; CHECK: pop.w {[[SOMEREGS]], r7, r8, pc} + %alloca = alloca i32, i32 %a, align 4 + call void @foo() + call void asm sideeffect "", "~{r8}" () + ret void +} + +; Has a high register clobbered, frame required due to both variable-sized +; alloca and ABI. We do need to split the push/pop here. +define void @highreg_alloca_nofpelim(i32 %a) "no-frame-pointer-elim"="true" { +; CHECK-LABEL: highreg_alloca_nofpelim: +; CHECK: push {[[SOMEREGS:.*]], r7, lr} +; CHECK: add r7, sp, #{{[0-9]+}} +; CHECK: str r8, [sp, #-4]! +; CHECK: bl foo +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {[[SOMEREGS]], r7, pc} + %alloca = alloca i32, i32 %a, align 4 + call void @foo() + call void asm sideeffect "", "~{r8}" () + ret void +}