Index: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -49,18 +49,13 @@ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { - if (STI.isTargetMachO()) - return ARM::R7; - else if (STI.isTargetWindows()) - return ARM::R11; - else // ARM EABI - return STI.isThumb() ? ARM::R7 : ARM::R11; + return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; } const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); - bool UseSplitPush = STI.splitFramePushPop(); + bool UseSplitPush = STI.splitFramePushPop(*MF); const MCPhysReg *RegList = STI.isTargetDarwin() ? CSR_iOS_SaveList Index: llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp @@ -57,16 +57,14 @@ /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); - // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS() || STI.isTargetWatchOS()) + // ABI-required frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; - const MachineFrameInfo &MFI = MF.getFrameInfo(); - // Always eliminate non-leaf frame pointers. - return ((MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI.hasCalls()) || - RegInfo->needsStackRealignment(MF) || + // Frame pointer required for use within this function. + return (RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken()); } @@ -352,7 +350,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } @@ -557,7 +555,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; LLVM_FALLTHROUGH; case ARM::R0: @@ -590,7 +588,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned Offset = MFI.getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( @@ -902,7 +900,7 @@ unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -983,7 +981,7 @@ bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1547,7 +1545,7 @@ if (Spilled) { NumGPRSpills++; - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1569,7 +1567,7 @@ break; } } else { - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1634,6 +1632,23 @@ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); + if (hasFP(MF)) { + SavedRegs.set(FramePtr); + // If the frame pointer is required by the ABI, also spill LR so that we + // emit a complete frame record. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + SavedRegs.set(ARM::LR); + LRSpilled = true; + NumGPRSpills++; + } + auto FPPos = find(UnspilledCS1GPRs, FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); + NumGPRSpills++; + if (FramePtr == ARM::R7) + CS1Spilled = true; + } + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { @@ -1648,14 +1663,6 @@ ExtraCSSpill = true; } - if (hasFP(MF)) { - SavedRegs.set(FramePtr); - auto FPPos = find(UnspilledCS1GPRs, FramePtr); - if (FPPos != UnspilledCS1GPRs.end()) - UnspilledCS1GPRs.erase(FPPos); - NumGPRSpills++; - } - // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -560,11 +560,15 @@ return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } + bool useR7AsFramePointer() const { + return isTargetDarwin() || (!isTargetWindows() && isThumb()); + } /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent /// to lr. - bool splitFramePushPop() const { - return isTargetMachO(); + bool splitFramePushPop(const MachineFunction &MF) const { + return useR7AsFramePointer() && + MF.getTarget().Options.DisableFramePointerElim(MF); } bool useStride4VFPs(const MachineFunction &MF) const; Index: llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp +++ llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -150,7 +150,7 @@ case ARM::R9: case ARM::R10: case ARM::R11: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } @@ -212,7 +212,7 @@ case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; // fallthough case ARM::R0: Index: llvm/trunk/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ llvm/trunk/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -9,7 +9,7 @@ @A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1] ; CHECK-LABEL: dct_luma_sp: -define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { +define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) "no-frame-pointer-elim"="true" { entry: ; Make sure to use base-updating stores for saving callee-saved registers. ; CHECK: push Index: llvm/trunk/test/CodeGen/ARM/2010-11-29-PrologueBug.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/2010-11-29-PrologueBug.ll +++ llvm/trunk/test/CodeGen/ARM/2010-11-29-PrologueBug.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s ; rdar://8690640 -define i32* @t(i32* %x) nounwind { +define i32* @t(i32* %x) nounwind "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: t: ; CHECK: push Index: llvm/trunk/test/CodeGen/ARM/2010-12-07-PEIBug.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/2010-12-07-PEIBug.ll +++ llvm/trunk/test/CodeGen/ARM/2010-12-07-PEIBug.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s ; rdar://8728956 -define hidden void @foo() nounwind ssp { +define hidden void @foo() nounwind ssp "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: foo: ; CHECK: mov r7, sp Index: llvm/trunk/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll +++ llvm/trunk/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll @@ -14,7 +14,7 @@ declare void @foo(i32) declare i32 @bar(i32) -define i32 @test(i32 %in1, i32 %in2) nounwind { +define i32 @test(i32 %in1, i32 %in2) nounwind "no-frame-pointer-elim"="true" { entry: %call = tail call zeroext i1 @getbool() nounwind br i1 %call, label %sw.bb18, label %sw.bb2 Index: llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -59,7 +59,7 @@ ; DISABLE-NEXT: pop {r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @foo(i32 %a, i32 %b) { +define i32 @foo(i32 %a, i32 %b) "no-frame-pointer-elim"="true" { %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -124,7 +124,7 @@ ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -178,7 +178,7 @@ ; CHECK: @ %for.exit ; CHECK: nop ; CHECK: pop {r4 -define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) "no-frame-pointer-elim"="true" { entry: br label %for.preheader @@ -248,7 +248,7 @@ ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -327,7 +327,7 @@ ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { +define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" #0 { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -405,7 +405,7 @@ ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @inlineAsm(i32 %cond, i32 %N) { +define i32 @inlineAsm(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -474,7 +474,7 @@ ; ARM-DISABLE-NEXT: mov sp, r7 ; THUMB-DISABLE-NEXT: add sp, #12 ; DISABLE-NEXT: pop {r7, pc} -define i32 @callVariadicFunc(i32 %cond, i32 %N) { +define i32 @callVariadicFunc(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -521,7 +521,7 @@ ; ; CHECK: bl{{x?}} _abort ; ENABLE-NOT: pop -define i32 @noreturn(i8 signext %bad_thing) { +define i32 @noreturn(i8 signext %bad_thing) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort @@ -548,7 +548,7 @@ ; The only condition for this test is the compilation finishes correctly. ; CHECK-LABEL: infiniteloop ; CHECK: pop -define void @infiniteloop() { +define void @infiniteloop() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %if.then, label %if.end @@ -570,7 +570,7 @@ ; Another infinite loop test this time with a body bigger than just one block. ; CHECK-LABEL: infiniteloop2 ; CHECK: pop -define void @infiniteloop2() { +define void @infiniteloop2() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %if.then, label %if.end @@ -600,7 +600,7 @@ ; Another infinite loop test this time with two nested infinite loop. ; CHECK-LABEL: infiniteloop3 ; CHECK: bx lr -define void @infiniteloop3() { +define void @infiniteloop3() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %loop2a, label %body @@ -657,7 +657,7 @@ ; DISABLE: pop ; ; CHECK: bl -define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) { +define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" { bb: br i1 %or.cond, label %bb3, label %bb13 Index: llvm/trunk/test/CodeGen/ARM/call-tc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/call-tc.ll +++ llvm/trunk/test/CodeGen/ARM/call-tc.ll @@ -10,14 +10,14 @@ declare void @g(i32, i32, i32, i32) -define void @t1() { +define void @t1() "no-frame-pointer-elim"="true" { ; CHECKELF-LABEL: t1: ; CHECKELF: bl g call void @g( i32 1, i32 2, i32 3, i32 4 ) ret void } -define void @t2() { +define void @t2() "no-frame-pointer-elim"="true" { ; CHECKV6-LABEL: t2: ; CHECKV6: bx r0 ; CHECKT2D-LABEL: t2: @@ -29,7 +29,7 @@ ret void } -define void @t3() { +define void @t3() "no-frame-pointer-elim"="true" { ; CHECKV6-LABEL: t3: ; CHECKV6: b _t2 ; CHECKELF-LABEL: t3: @@ -42,7 +42,7 @@ } ; Sibcall optimization of expanded libcalls. rdar://8707777 -define double @t4(double %a) nounwind readonly ssp { +define double @t4(double %a) nounwind readonly ssp "no-frame-pointer-elim"="true" { entry: ; CHECKV6-LABEL: t4: ; CHECKV6: b _sin @@ -52,7 +52,7 @@ ret double %0 } -define float @t5(float %a) nounwind readonly ssp { +define float @t5(float %a) nounwind readonly ssp "no-frame-pointer-elim"="true" { entry: ; CHECKV6-LABEL: t5: ; CHECKV6: b _sinf @@ -66,7 +66,7 @@ declare double @sin(double) nounwind readonly -define i32 @t6(i32 %a, i32 %b) nounwind readnone { +define i32 @t6(i32 %a, i32 %b) nounwind readnone "no-frame-pointer-elim"="true" { entry: ; CHECKV6-LABEL: t6: ; CHECKV6: b ___divsi3 @@ -80,7 +80,7 @@ ; rdar://8309338 declare void @foo() nounwind -define void @t7() nounwind { +define void @t7() nounwind "no-frame-pointer-elim"="true" { entry: ; CHECKT2D-LABEL: t7: ; CHECKT2D: it ne @@ -101,7 +101,7 @@ ; Make sure codegenprep is duplicating ret instructions to enable tail calls. ; rdar://11140249 -define i32 @t8(i32 %x) nounwind ssp { +define i32 @t8(i32 %x) nounwind ssp "no-frame-pointer-elim"="true" { entry: ; CHECKT2D-LABEL: t8: ; CHECKT2D-NOT: push @@ -148,7 +148,7 @@ @x = external global i32, align 4 -define i32 @t9() nounwind { +define i32 @t9() nounwind "no-frame-pointer-elim"="true" { ; CHECKT2D-LABEL: t9: ; CHECKT2D: bl __ZN9MutexLockC1Ev ; CHECKT2D: bl __ZN9MutexLockD1Ev @@ -168,7 +168,7 @@ ; rdar://13827621 ; Correctly preserve the input chain for the tailcall node in the bitcast case, ; otherwise the call to floorf is lost. -define float @libcall_tc_test2(float* nocapture %a, float %b) { +define float @libcall_tc_test2(float* nocapture %a, float %b) "no-frame-pointer-elim"="true" { ; CHECKT2D-LABEL: libcall_tc_test2: ; CHECKT2D: bl _floorf ; CHECKT2D: b.w _truncf Index: llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll +++ llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll @@ -33,7 +33,7 @@ ; THUMB: blx ; THUMB: r4 ; THUMB: pop {{.*}}r4 -define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind "no-frame-pointer-elim"="true" { %.b.i = load i1, i1* @__tls_guard, align 1 br i1 %.b.i, label %__tls_init.exit, label %init.i @@ -95,7 +95,7 @@ ; CHECK-O0-NOT: vpop ; CHECK-O0-NOT: vldr ; CHECK-O0: pop -define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind "no-frame-pointer-elim"="true" { ret i32* @sum1 } @@ -109,7 +109,7 @@ ; CHECK-O0-NOT: vldr ; CHECK-O0: pop declare cxx_fast_tlscc void @tls_helper() -define cxx_fast_tlscc %class.C* @tls_test2() #1 { +define cxx_fast_tlscc %class.C* @tls_test2() #1 "no-frame-pointer-elim"="true" { call cxx_fast_tlscc void @tls_helper() ret %class.C* @tC } @@ -119,7 +119,7 @@ declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this) ; CHECK-LABEL: tls_test ; CHECK: bl __tlv_atexit -define cxx_fast_tlscc void @__tls_test() { +define cxx_fast_tlscc void @__tls_test() "no-frame-pointer-elim"="true" { entry: store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4 %0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1 @@ -127,7 +127,7 @@ } declare void @somefunc() -define cxx_fast_tlscc void @test_ccmismatch_notail() { +define cxx_fast_tlscc void @test_ccmismatch_notail() "no-frame-pointer-elim"="true" { ; A tail call is not possible here because somefunc does not preserve enough ; registers. ; CHECK-LABEL: test_ccmismatch_notail: @@ -138,7 +138,7 @@ } declare cxx_fast_tlscc void @some_fast_tls_func() -define void @test_ccmismatch_tail() { +define void @test_ccmismatch_tail() "no-frame-pointer-elim"="true" { ; We can perform a tail call here because some_fast_tls_func preserves all ; necessary registers (and more). ; CHECK-LABEL: test_ccmismatch_tail: Index: llvm/trunk/test/CodeGen/ARM/debug-frame-large-stack.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/debug-frame-large-stack.ll +++ llvm/trunk/test/CodeGen/ARM/debug-frame-large-stack.ll @@ -23,13 +23,16 @@ ; CHECK-ARM-LABEL: test2: ; CHECK-ARM: .cfi_startproc -; CHECK-ARM: push {r4, r5} -; CHECK-ARM: .cfi_def_cfa_offset 8 -; CHECK-ARM: .cfi_offset r5, -4 -; CHECK-ARM: .cfi_offset r4, -8 +; CHECK-ARM: push {r4, r5, r11, lr} +; CHECK-ARM: .cfi_def_cfa_offset 16 +; CHECK-ARM: .cfi_offset lr, -4 +; CHECK-ARM: .cfi_offset r11, -8 +; CHECK-ARM: .cfi_offset r5, -12 +; CHECK-ARM: .cfi_offset r4, -16 +; CHECK-ARM: add r11, sp, #8 +; CHECK-ARM: .cfi_def_cfa r11, 8 ; CHECK-ARM: sub sp, sp, #72 ; CHECK-ARM: sub sp, sp, #4096 -; CHECK-ARM: .cfi_def_cfa_offset 4176 ; CHECK-ARM: .cfi_endproc ; CHECK-ARM-FP_ELIM-LABEL: test2: @@ -54,14 +57,15 @@ ; CHECK-ARM-LABEL: test3: ; CHECK-ARM: .cfi_startproc -; CHECK-ARM: push {r4, r5, r11} -; CHECK-ARM: .cfi_def_cfa_offset 12 -; CHECK-ARM: .cfi_offset r11, -4 -; CHECK-ARM: .cfi_offset r5, -8 -; CHECK-ARM: .cfi_offset r4, -12 +; CHECK-ARM: push {r4, r5, r11, lr} +; CHECK-ARM: .cfi_def_cfa_offset 16 +; CHECK-ARM: .cfi_offset lr, -4 +; CHECK-ARM: .cfi_offset r11, -8 +; CHECK-ARM: .cfi_offset r5, -12 +; CHECK-ARM: .cfi_offset r4, -16 ; CHECK-ARM: add r11, sp, #8 -; CHECK-ARM: .cfi_def_cfa r11, 4 -; CHECK-ARM: sub sp, sp, #20 +; CHECK-ARM: .cfi_def_cfa r11, 8 +; CHECK-ARM: sub sp, sp, #16 ; CHECK-ARM: sub sp, sp, #805306368 ; CHECK-ARM: bic sp, sp, #15 ; CHECK-ARM: .cfi_endproc Index: llvm/trunk/test/CodeGen/ARM/debug-info-arg.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/debug-info-arg.ll +++ llvm/trunk/test/CodeGen/ARM/debug-info-arg.ll @@ -6,7 +6,7 @@ %struct.tag_s = type { i32, i32, i32 } -define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp !dbg !1 { +define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp "no-frame-pointer-elim"="true" !dbg !1 { tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !DIExpression()), !dbg !20 tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !DIExpression()), !dbg !21 tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !DIExpression()), !dbg !22 Index: llvm/trunk/test/CodeGen/ARM/dwarf-unwind.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/dwarf-unwind.ll +++ llvm/trunk/test/CodeGen/ARM/dwarf-unwind.ll @@ -71,12 +71,14 @@ define void @test_frame_pointer_offset() minsize "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_frame_pointer_offset: -; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK: .cfi_def_cfa_offset 40 -; CHECK: add r7, sp, #16 -; CHECK: .cfi_def_cfa r7, 24 +; CHECK: push {r4, r5, r6, r7, lr} +; CHECK: .cfi_def_cfa_offset 20 +; CHECK: add r7, sp, #12 +; CHECK: .cfi_def_cfa r7, 8 +; CHECK-NOT: .cfi_def_cfa_offset +; CHECK: push.w {r7, r8, r9, r10, r11} ; CHECK-NOT: .cfi_def_cfa_offset call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"() call void @bar() ret void -} \ No newline at end of file +} Index: llvm/trunk/test/CodeGen/ARM/fast-isel-frameaddr.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fast-isel-frameaddr.ll +++ llvm/trunk/test/CodeGen/ARM/fast-isel-frameaddr.ll @@ -6,22 +6,22 @@ define i8* @frameaddr_index0() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index0: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: mov r0, r7 ; DARWIN-THUMB2-LABEL: frameaddr_index0: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: mov r0, r7 ; LINUX-ARM-LABEL: frameaddr_index0: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: mov r0, r11 ; LINUX-THUMB2-LABEL: frameaddr_index0: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 @@ -32,22 +32,22 @@ define i8* @frameaddr_index1() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index1: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: ldr r0, [r7] ; DARWIN-THUMB2-LABEL: frameaddr_index1: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: ldr r0, [r7] ; LINUX-ARM-LABEL: frameaddr_index1: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: ldr r0, [r11] ; LINUX-THUMB2-LABEL: frameaddr_index1: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 ; LINUX-THUMB2: ldr r0, [r0] @@ -59,28 +59,28 @@ define i8* @frameaddr_index3() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index3: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: ldr r0, [r7] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-THUMB2-LABEL: frameaddr_index3: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: ldr r0, [r7] ; DARWIN-THUMB2: ldr r0, [r0] ; DARWIN-THUMB2: ldr r0, [r0] ; LINUX-ARM-LABEL: frameaddr_index3: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: ldr r0, [r11] ; LINUX-ARM: ldr r0, [r0] ; LINUX-ARM: ldr r0, [r0] ; LINUX-THUMB2-LABEL: frameaddr_index3: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 ; LINUX-THUMB2: ldr r0, [r0] Index: llvm/trunk/test/CodeGen/ARM/hello.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/hello.ll +++ llvm/trunk/test/CodeGen/ARM/hello.ll @@ -9,7 +9,7 @@ @str = internal constant [12 x i8] c"Hello World\00" -define i32 @main() { +define i32 @main() "no-frame-pointer-elim"="true" { %tmp = call i32 @puts( i8* getelementptr ([12 x i8], [12 x i8]* @str, i32 0, i64 0) ) ; [#uses=0] ret i32 0 } @@ -17,7 +17,10 @@ declare i32 @puts(i8*) ; CHECK-LABEL: main -; CHECK: mov +; CHECK-NOT: mov +; CHECK: mov r11, sp +; CHECK-NOT: mov +; CHECK: mov r0, #0 ; CHECK-NOT: mov ; CHECK-FP-ELIM-LABEL: main Index: llvm/trunk/test/CodeGen/ARM/ifcvt-iter-indbr.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/ifcvt-iter-indbr.ll +++ llvm/trunk/test/CodeGen/ARM/ifcvt-iter-indbr.ll @@ -16,11 +16,12 @@ ; CHECK-NEXT: moveq pc ; CHECK-NEXT: LBB{{[0-9_]+}}: ; CHECK-NEXT: cmp {{.*}}, #42 -; CHECK-NEXT: itt ne -; CHECK-NEXT: strne.w -; CHECK-NEXT: movne pc +; CHECK-NEXT: beq [[CALL_FOO_1234:LBB[0-9_]+]] +; CHECK-NEXT: ldr {{.*}}[sp +; CHECK-NEXT: str +; CHECK-NEXT: mov pc ; CHECK-NEXT: Ltmp -; CHECK-NEXT: LBB0_2: +; CHECK-NEXT: [[CALL_FOO_1234]]: ; CHECK-NEXT: movw r0, #1234 ; CHECK-NEXT: b [[FOOCALL:LBB[0-9_]+]] ; CHECK-NEXT: Ltmp @@ -30,11 +31,11 @@ ; CHECK-NEXT: bl _foo ; ; CHECK-PROB: BB#0: -; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#2({{[0-9a-fx/= ]+}}25.00%) BB#4({{[0-9a-fx/= ]+}}25.00%) -; CHECK-PROB: BB#1: -; CHECK-PROB: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.00%) BB#4({{[0-9a-fx/= ]+}}25.00%) +; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#3({{[0-9a-fx/= ]+}}25.00%) BB#5({{[0-9a-fx/= ]+}}25.00%) +; CHECK-PROB: BB#2: +; CHECK-PROB: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#5({{[0-9a-fx/= ]+}}50.00%) -define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) { +define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) "no-frame-pointer-elim"="true" { entry: %dst1 = call i8* @bar(i32 1, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2)) %dst2 = call i8* @bar(i32 2, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2)) Index: llvm/trunk/test/CodeGen/ARM/ifcvt10.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/ifcvt10.ll +++ llvm/trunk/test/CodeGen/ARM/ifcvt10.ll @@ -4,7 +4,7 @@ ; micro-coded and would have long issue latency even if predicated on ; false predicate. -define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind { +define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: t: ; CHECK: vpop {d8} Index: llvm/trunk/test/CodeGen/ARM/ifcvt5.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/ifcvt5.ll +++ llvm/trunk/test/CodeGen/ARM/ifcvt5.ll @@ -4,14 +4,14 @@ @x = external global i32* ; [#uses=1] -define void @foo(i32 %a) { +define void @foo(i32 %a) "no-frame-pointer-elim"="true" { entry: %tmp = load i32*, i32** @x ; [#uses=1] store i32 %a, i32* %tmp ret void } -define i32 @t1(i32 %a, i32 %b) { +define i32 @t1(i32 %a, i32 %b) "no-frame-pointer-elim"="true" { ; A8-LABEL: t1: ; A8: bxlt lr Index: llvm/trunk/test/CodeGen/ARM/insn-sched1.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/insn-sched1.ll +++ llvm/trunk/test/CodeGen/ARM/insn-sched1.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null ; RUN: llc -mtriple=arm-apple-ios -mattr=+v6 %s -o - | FileCheck %s -define i32 @test(i32 %x) { +define i32 @test(i32 %x) "no-frame-pointer-elim"="true" { %tmp = trunc i32 %x to i16 ; [#uses=1] %tmp2 = call i32 @f( i32 1, i16 %tmp ) ; [#uses=1] ret i32 %tmp2 Index: llvm/trunk/test/CodeGen/ARM/ldrd.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/ldrd.ll +++ llvm/trunk/test/CodeGen/ARM/ldrd.ll @@ -15,7 +15,7 @@ declare i64* @get_ptr() declare void @use_i64(i64 %v) -define void @test_ldrd(i64 %a) nounwind readonly { +define void @test_ldrd(i64 %a) nounwind readonly "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_ldrd: ; NORMAL: bl{{x?}} _get_ptr ; A8: ldrd r0, r1, [r0] @@ -49,7 +49,7 @@ ; GREEDY: %bb ; GREEDY: ldrd ; GREEDY: str -define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind { +define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind "no-frame-pointer-elim"="true" { entry: %0 = add nsw i32 %n, -1 ; [#uses=2] %1 = icmp sgt i32 %0, 0 ; [#uses=1] @@ -79,7 +79,7 @@ @TestVar = external global %struct.Test ; CHECK-LABEL: Func1: -define void @Func1() nounwind ssp { +define void @Func1() nounwind ssp "no-frame-pointer-elim"="true" { entry: ; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} ; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}} @@ -104,7 +104,7 @@ ; A8: ldrd ; CHECK: bl{{x?}} _extfunc ; A8: pop -define void @Func2(i32* %p) { +define void @Func2(i32* %p) "no-frame-pointer-elim"="true" { entry: %addr0 = getelementptr i32, i32* %p, i32 0 %addr1 = getelementptr i32, i32* %p, i32 1 @@ -129,7 +129,7 @@ ; GREEDY: ldrd r1, r2, [sp] ; CONSERVATIVE: ldrd r1, r2, [sp] ; CHECK: bl{{x?}} _extfunc -define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) { +define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) "no-frame-pointer-elim"="true" { ; force %v0 and %v1 to be spilled call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{lr}"() ; force the reloaded %v0, %v1 into different registers @@ -143,7 +143,7 @@ ; NORMAL: ldrd r1, r2, [r0], #-8 ; CONSERVATIVE-NOT: ldrd ; CHECK: bl{{x?}} _extfunc -define void @ldrd_postupdate_dec(i32* %p0) { +define void @ldrd_postupdate_dec(i32* %p0) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 %v0 = load i32, i32* %p0 %v1 = load i32, i32* %p0.1 @@ -156,7 +156,7 @@ ; NORMAL: ldrd r1, r2, [r0], #8 ; CONSERVATIVE-NOT: ldrd ; CHECK: bl{{x?}} _extfunc -define void @ldrd_postupdate_inc(i32* %p0) { +define void @ldrd_postupdate_inc(i32* %p0) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 %v0 = load i32, i32* %p0 %v1 = load i32, i32* %p0.1 @@ -169,7 +169,7 @@ ; NORMAL: strd r1, r2, [r0], #-8 ; CONSERVATIVE-NOT: strd ; CHECK: bx lr -define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) { +define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 store i32 %v0, i32* %p0 store i32 %v1, i32* %p0.1 @@ -181,7 +181,7 @@ ; NORMAL: strd r1, r2, [r0], #8 ; CONSERVATIVE-NOT: strd ; CHECK: bx lr -define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) { +define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 store i32 %v0, i32* %p0 store i32 %v1, i32* %p0.1 Index: llvm/trunk/test/CodeGen/ARM/lsr-unfolded-offset.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ llvm/trunk/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -15,7 +15,7 @@ %struct.partition_entry = type { i32, i32, i64, i64 } -define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp { +define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp "no-frame-pointer-elim"="true" { entry: %cmp79 = icmp sgt i32 %num_entries, 0 br i1 %cmp79, label %outer.loop, label %for.end72 Index: llvm/trunk/test/CodeGen/ARM/memfunc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/memfunc.ll +++ llvm/trunk/test/CodeGen/ARM/memfunc.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=arm-none-musleabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK ; RUN: llc < %s -mtriple=arm-none-musleabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK -define void @f1(i8* %dest, i8* %src) { +define void @f1(i8* %dest, i8* %src) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f1 @@ -98,7 +98,7 @@ } ; Check that alloca arguments to memory intrinsics are automatically aligned if at least 8 bytes in size -define void @f2(i8* %dest, i32 %n) { +define void @f2(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f2 @@ -107,9 +107,9 @@ ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: add r1, sp, #28 ; CHECK-DARWIN: bl _memmove - ; CHECK-EABI: add r1, sp, #28 + ; CHECK-EABI: {{add r1, sp, #28|sub r1, r(7|11), #20}} ; CHECK-EABI: bl __aeabi_memmove - ; CHECK-GNUEABI: add r1, sp, #28 + ; CHECK-GNUEABI: {{add r1, sp, #28|sub r1, r(7|11), #20}} ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = bitcast [9 x i8]* %arr0 to i8* @@ -144,11 +144,11 @@ } ; Check that alloca arguments are not aligned if less than 8 bytes in size -define void @f3(i8* %dest, i32 %n) { +define void @f3(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f3 - ; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r7, #15}} + ; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r(7|11), #15}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -157,7 +157,7 @@ %0 = bitcast [7 x i8]* %arr0 to i8* call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r1, sp, #10}} + ; CHECK: {{add(.w)? r1, sp, #10|sub(.w)? r1, r(7|11), #22}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -166,7 +166,7 @@ %1 = bitcast [7 x i8]* %arr1 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r0, sp, #3}} + ; CHECK: {{add(.w)? r0, sp, #3|sub(.w)? r0, r(7|11), #29}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -183,11 +183,11 @@ } ; Check that alloca arguments are not aligned if size+offset is less than 8 bytes -define void @f4(i8* %dest, i32 %n) { +define void @f4(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f4 - ; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r7, #17}} + ; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r(7|11), #17}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -196,7 +196,7 @@ %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w) r., r(7|11), #26}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -205,7 +205,7 @@ %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w) r., r(7|11), #35}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -222,11 +222,11 @@ } ; Check that alloca arguments are not aligned if the offset is not a multiple of 4 -define void @f5(i8* %dest, i32 %n) { +define void @f5(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f5 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #21}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -235,7 +235,7 @@ %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -244,7 +244,7 @@ %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -261,11 +261,11 @@ } ; Check that alloca arguments are not aligned if the offset is unknown -define void @f6(i8* %dest, i32 %n, i32 %i) { +define void @f6(i8* %dest, i32 %n, i32 %i) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f6 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #25}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #(25|29)}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -274,7 +274,7 @@ %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #42}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -283,7 +283,7 @@ %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #55}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -300,11 +300,11 @@ } ; Check that alloca arguments are not aligned if the GEP is not inbounds -define void @f7(i8* %dest, i32 %n) { +define void @f7(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f7 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #21}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -313,7 +313,7 @@ %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -322,7 +322,7 @@ %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -339,11 +339,11 @@ } ; Check that alloca arguments are not aligned when the offset is past the end of the allocation -define void @f8(i8* %dest, i32 %n) { +define void @f8(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f8 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #21}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -352,7 +352,7 @@ %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -361,7 +361,7 @@ %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -388,7 +388,7 @@ @arr7 = external global [7 x i8], align 1 @arr8 = internal global [128 x i8] undef @arr9 = weak_odr global [128 x i8] undef -define void @f9(i8* %dest, i32 %n) { +define void @f9(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) Index: llvm/trunk/test/CodeGen/ARM/noreturn.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/noreturn.ll +++ llvm/trunk/test/CodeGen/ARM/noreturn.ll @@ -1,6 +1,6 @@ ; RUN: llc -O3 -o - %s | FileCheck %s ; Test case from PR16882. -target triple = "thumbv7s-apple-ios" +target triple = "thumbv7a-none-eabi" define i32 @test1() { ; CHECK-LABEL: @test1 @@ -60,6 +60,61 @@ unreachable } + +define i32 @test1_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test1_nofpelim +; CHECK: push +entry: + tail call void @overflow() #0 + unreachable +} + +define i32 @test2_nofpelim(i32 %x, i32 %y) "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test2_nofpelim +; CHECK: push +entry: + %conv = sext i32 %x to i64 + %conv1 = sext i32 %y to i64 + %mul = mul nsw i64 %conv1, %conv + %conv2 = trunc i64 %mul to i32 + %conv3 = sext i32 %conv2 to i64 + %cmp = icmp eq i64 %mul, %conv3 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @overflow() #0 + unreachable + +if.end: ; preds = %entry + ret i32 %conv2 +} + +; Test case for PR17825. +define i32 @test3_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test3_nofpelim +; CHECK: push +entry: + tail call void @overflow_with_unwind() #1 + unreachable +} + +; Test case for uwtable +define i32 @test4_nofpelim() uwtable "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test4_nofpelim +; CHECK: push +entry: + tail call void @overflow() #0 + unreachable +} + +define i32 @test5_nofpelim() uwtable "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test5_nofpelim +; CHECK: push +entry: + tail call void @overflow_with_unwind() #1 + unreachable +} + ; Function Attrs: noreturn declare void @overflow_with_unwind() #1 Index: llvm/trunk/test/CodeGen/ARM/swiftself.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/swiftself.ll +++ llvm/trunk/test/CodeGen/ARM/swiftself.ll @@ -7,7 +7,7 @@ ; Parameter with swiftself should be allocated to r10. ; CHECK-LABEL: swiftself_param: ; CHECK: mov r0, r10 -define i8 *@swiftself_param(i8* swiftself %addr0) { +define i8 *@swiftself_param(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { ret i8 *%addr0 } @@ -15,7 +15,7 @@ ; CHECK-LABEL: call_swiftself: ; CHECK: mov r10, r0 ; CHECK: bl {{_?}}swiftself_param -define i8 *@call_swiftself(i8* %arg) { +define i8 *@call_swiftself(i8* %arg) "no-frame-pointer-elim"="true" { %res = call i8 *@swiftself_param(i8* swiftself %arg) ret i8 *%res } @@ -25,7 +25,7 @@ ; CHECK: push {r10} ; ... ; CHECK: pop {r10} -define i8 *@swiftself_clobber(i8* swiftself %addr0) { +define i8 *@swiftself_clobber(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { call void asm sideeffect "", "~{r10}"() ret i8 *%addr0 } @@ -37,7 +37,7 @@ ; OPT: bl {{_?}}swiftself_param ; OPT-NOT: mov{{.*}}r10 ; OPT-NEXT: bl {{_?}}swiftself_param -define void @swiftself_passthrough(i8* swiftself %addr0) { +define void @swiftself_passthrough(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { call i8 *@swiftself_param(i8* swiftself %addr0) call i8 *@swiftself_param(i8* swiftself %addr0) ret void @@ -47,7 +47,7 @@ ; CHECK-LABEL: swiftself_tail: ; TAILCALL: b {{_?}}swiftself_param ; TAILCALL-NOT: pop -define i8* @swiftself_tail(i8* swiftself %addr0) { +define i8* @swiftself_tail(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { call void asm sideeffect "", "~{r10}"() %res = tail call i8* @swiftself_param(i8* swiftself %addr0) ret i8* %res @@ -59,7 +59,7 @@ ; CHECK: mov r10, r0 ; CHECK: bl {{_?}}swiftself_param ; CHECK: pop -define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { +define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind "no-frame-pointer-elim"="true" { %res = tail call i8* @swiftself_param(i8* swiftself %addr1) ret i8* %res } Index: llvm/trunk/test/CodeGen/ARM/v7k-abi-align.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/v7k-abi-align.ll +++ llvm/trunk/test/CodeGen/ARM/v7k-abi-align.ll @@ -2,25 +2,25 @@ %struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> } -define i32 @test_i64_align() { +define i32 @test_i64_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_i64_align: ; CHECL: movs r0, #8 ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32) } -define i32 @test_f64_align() { +define i32 @test_f64_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_f64_align: ; CHECL: movs r0, #24 ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32) } -define i32 @test_v2f32_align() { +define i32 @test_v2f32_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v2f32_align: ; CHECL: movs r0, #40 ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32) } -define i32 @test_v4f32_align() { +define i32 @test_v4f32_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v4f32_align: ; CHECL: movs r0, #64 ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32) @@ -28,7 +28,7 @@ ; Key point here is than an extra register has to be saved so that the DPRs end ; up in an aligned location (as prologue/epilogue inserter had calculated). -define void @test_dpr_unwind_align() { +define void @test_dpr_unwind_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align: ; CHECK: push {r5, r6, r7, lr} ; CHECK-NOT: sub sp @@ -51,7 +51,7 @@ ; This time, there's no viable way to tack CS-registers onto the list: a real SP ; adjustment needs to be performed to put d8 and d9 where they should be. -define void @test_dpr_unwind_align_manually() { +define void @test_dpr_unwind_align_manually() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align_manually: ; CHECK: push {r4, r5, r6, r7, lr} ; CHECK-NOT: sub sp @@ -76,7 +76,7 @@ } ; If there's only a CS1 area, the sub should be in the right place: -define void @test_dpr_unwind_align_just_cs1() { +define void @test_dpr_unwind_align_just_cs1() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align_just_cs1: ; CHECK: push {r4, r5, r6, r7, lr} ; CHECK: sub sp, #4 @@ -99,7 +99,7 @@ } ; If there are no DPRs, we shouldn't try to align the stack in stages anyway -define void @test_dpr_unwind_align_no_dprs() { +define void @test_dpr_unwind_align_no_dprs() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align_no_dprs: ; CHECK: push {r4, r5, r6, r7, lr} ; CHECK: sub sp, #12 @@ -117,7 +117,7 @@ ; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on ; the stack. -define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) { +define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v128_stack_pass: ; CHECK: add r[[ADDR:[0-9]+]], sp, #16 ; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128] @@ -129,7 +129,7 @@ ; When varargs are enabled, we go down a different route. Still want 128-bit ; alignment though. -define void @test_v128_stack_pass_varargs(<4 x float> %in) { +define void @test_v128_stack_pass_varargs(<4 x float> %in) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v128_stack_pass_varargs: ; CHECK: add r[[ADDR:[0-9]+]], sp, #16 ; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128] @@ -140,7 +140,7 @@ ; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give ; a single pointer), 64-bit quantities must be pass -define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) { +define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_64bit_gpr_align: ; CHECK: ldr [[RHS:r[0-9]+]], [sp] ; CHECK: adds r0, [[RHS]], r2 Index: llvm/trunk/test/CodeGen/ARM/warn-stack.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/warn-stack.ll +++ llvm/trunk/test/CodeGen/ARM/warn-stack.ll @@ -4,7 +4,7 @@ ; ; CHECK-NOT: nowarn -define void @nowarn() nounwind ssp { +define void @nowarn() nounwind ssp "no-frame-pointer-elim"="true" { entry: %buffer = alloca [12 x i8], align 1 %arraydecay = getelementptr inbounds [12 x i8], [12 x i8]* %buffer, i64 0, i64 0 @@ -13,7 +13,7 @@ } ; CHECK: warning: stack size limit exceeded (92) in warn -define void @warn() nounwind ssp { +define void @warn() nounwind ssp "no-frame-pointer-elim"="true" { entry: %buffer = alloca [80 x i8], align 1 %arraydecay = getelementptr inbounds [80 x i8], [80 x i8]* %buffer, i64 0, i64 0 Index: llvm/trunk/test/CodeGen/Thumb/large-stack.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/large-stack.ll +++ llvm/trunk/test/CodeGen/Thumb/large-stack.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=IOS -; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=EABI +; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s ; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios -; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=IOS +; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s ; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi -; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=EABI +; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s ; Largest stack for which a single tADDspi/tSUBspi is enough define void @test1() { @@ -20,11 +20,21 @@ ; CHECK: sub sp, #508 ; CHECK: sub sp, #508 ; CHECK: sub sp, #508 -; EABI: add sp, #508 -; EABI: add sp, #508 -; EABI: add sp, #508 -; IOS: subs r4, r7, #4 -; IOS: mov sp, r4 +; CHECK: add sp, #508 +; CHECK: add sp, #508 +; CHECK: add sp, #508 + %tmp = alloca [ 1524 x i8 ] , align 4 + ret void +} + +; Largest stack for which three tADDspi/tSUBspis are enough +define void @test100_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: test100_nofpelim: +; CHECK: sub sp, #508 +; CHECK: sub sp, #508 +; CHECK: sub sp, #508 +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 %tmp = alloca [ 1524 x i8 ] , align 4 ret void } @@ -34,10 +44,19 @@ ; CHECK-LABEL: test2: ; CHECK: ldr [[TEMP:r[0-7]]], ; CHECK: add sp, [[TEMP]] -; EABI: ldr [[TEMP:r[0-7]]], -; EABI: add sp, [[TEMP]] -; IOS: subs r4, r7, #4 -; IOS: mov sp, r4 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] + %tmp = alloca [ 1528 x i8 ] , align 4 + ret void +} + +; Smallest stack for which we use a constant pool +define void @test2_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: test2_nofpelim: +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 %tmp = alloca [ 1528 x i8 ] , align 4 ret void } @@ -48,10 +67,24 @@ ; CHECK: add sp, [[TEMP]] ; CHECK: ldr [[TEMP]], ; CHECK: add [[TEMP]], sp -; EABI: ldr [[TEMP:r[0-7]]], -; EABI: add sp, [[TEMP]] -; IOS: subs r4, r7, #4 -; IOS: mov sp, r4 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32, i32* %tmp + ret i32 %tmp1 +} + +define i32 @test3_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: test3_nofpelim: +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: ldr [[TEMP]], +; CHECK: add [[TEMP]], sp +; CHECK: subs r4, r7, +; CHECK: mov sp, r4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 %a = alloca [805306369 x i8], align 16 Index: llvm/trunk/test/CodeGen/Thumb/push.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/push.ll +++ llvm/trunk/test/CodeGen/Thumb/push.ll @@ -3,7 +3,7 @@ define void @t() nounwind { ; CHECK-LABEL: t: -; CHECK: push {r7} +; CHECK: push {r7, lr} entry: call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind ret void Index: llvm/trunk/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll +++ llvm/trunk/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll @@ -3,7 +3,7 @@ @"\01LC" = external constant [36 x i8], align 1 ; <[36 x i8]*> [#uses=1] -define i32 @t(i32, ...) nounwind { +define i32 @t(i32, ...) nounwind "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: t: ; CHECK: add r7, sp, #12 Index: llvm/trunk/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll +++ llvm/trunk/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll @@ -5,7 +5,7 @@ declare void @bar() nounwind optsize -define void @foo() nounwind optsize { +define void @foo() nounwind optsize "no-frame-pointer-elim"="true" { ; CHECK-LABEL: foo: ; CHECK: push ; CHECK: mov r7, sp Index: llvm/trunk/test/CodeGen/Thumb2/aligned-spill.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/aligned-spill.ll +++ llvm/trunk/test/CodeGen/Thumb2/aligned-spill.ll @@ -11,7 +11,7 @@ ; CHECK: push {r4, r7, lr} ; CHECK: bfc r4, #0, #3 ; CHECK: mov sp, r4 -define void @f(double* nocapture %p) nounwind ssp { +define void @f(double* nocapture %p) nounwind ssp "no-frame-pointer-elim"="true" { entry: %0 = load double, double* %p, align 4 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind @@ -45,7 +45,7 @@ declare void @g() ; Spill 7 d-registers. -define void @f7(double* nocapture %p) nounwind ssp { +define void @f7(double* nocapture %p) nounwind ssp "no-frame-pointer-elim"="true" { entry: tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind ret void @@ -69,7 +69,7 @@ ; NEON: pop ; Spill 7 d-registers, leave a hole. -define void @f3plus4(double* nocapture %p) nounwind ssp { +define void @f3plus4(double* nocapture %p) nounwind ssp "no-frame-pointer-elim"="true" { entry: tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind ret void Index: llvm/trunk/test/CodeGen/Thumb2/frame-pointer.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/frame-pointer.ll +++ llvm/trunk/test/CodeGen/Thumb2/frame-pointer.ll @@ -0,0 +1,152 @@ +; RUN: llc -mtriple=thumbv7m-none-eabi -o - %s | FileCheck %s + +declare void @foo() + +; Leaf function, no frame so no need for a frame pointer. +define void @leaf() { +; CHECK-LABEL: leaf: +; CHECK-NOT: push +; CHECK-NOT: sp +; CHECK-NOT: pop +; CHECK: bx lr + ret void +} + +; Leaf function, frame pointer is requested but we don't need any stack frame, +; so don't create a frame pointer. +define void @leaf_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: leaf_nofpelim: +; CHECK-NOT: push +; CHECK-NOT: sp +; CHECK-NOT: pop +; CHECK: bx lr + ret void +} + +; Leaf function, frame pointer is requested and we need a stack frame, so we +; need to use a frame pointer. +define void @leaf_lowreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: leaf_lowreg_nofpelim: +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 +; CHECK: pop {r4, r7, pc} + call void asm sideeffect "", "~{r4}" () + ret void +} + +; Leaf function, frame pointer is requested and we need a stack frame, so we +; need to use a frame pointer. A high register is pushed to the stack, so we +; must use two push/pop instructions to ensure that fp and sp are adjacent on +; the stack. +define void @leaf_highreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: leaf_highreg_nofpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: str r8, [sp, #-4]! +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {r7, pc} + call void asm sideeffect "", "~{r8}" () + ret void +} + +; Leaf function, frame pointer requested for non-leaf functions only, so no +; need for a stack frame. +define void @leaf_nononleaffpelim() "no-frame-pointer-elim-non-leaf" { +; CHECK-LABEL: leaf_nononleaffpelim: +; CHECK-NOT: push +; CHECK-NOT: sp +; CHECK-NOT: pop +; CHECK: bx lr + ret void +} + +; Has a call, but still no need for a frame pointer. +define void @call() { +; CHECK-LABEL: call: +; CHECK: push {[[DUMMYREG:r[0-9]+]], lr} +; CHECK-NOT: sp +; CHECK: bl foo +; CHECK: pop {[[DUMMYREG]], pc} + call void @foo() + ret void +} + +; Has a call, and frame pointer requested. +define void @call_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: call_nofpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: bl foo +; CHECK: pop {r7, pc} + call void @foo() + ret void +} + +; Has a call, and frame pointer requested for non-leaf function. +define void @call_nononleaffpelim() "no-frame-pointer-elim-non-leaf" { +; CHECK-LABEL: call_nononleaffpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: bl foo +; CHECK: pop {r7, pc} + call void @foo() + ret void +} + +; Has a high register clobbered, no need for a frame pointer. +define void @highreg() { +; CHECK-LABEL: highreg: +; CHECK: push.w {r8, lr} +; CHECK-NOT: sp +; CHECK: bl foo +; CHECK: pop.w {r8, pc} + call void asm sideeffect "", "~{r8}" () + call void @foo() + ret void +} + +; Has a high register clobbered, frame pointer requested. We need to split the +; push into two, to ensure that r7 and sp are adjacent on the stack. +define void @highreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: highreg_nofpelim: +; CHECK: push {[[DUMMYREG:r[0-9]+]], r7, lr} +; CHECK: add r7, sp, #4 +; CHECK: str r8, [sp, #-4]! +; CHECK: bl foo +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {[[DUMMYREG]], r7, pc} + call void asm sideeffect "", "~{r8}" () + call void @foo() + ret void +} + +; Has a high register clobbered, frame required due to variable-sized alloca. +; We need a frame pointer to correctly restore the stack, but don't need to +; split the push/pop here, because the frame pointer not required by the ABI. +define void @highreg_alloca(i32 %a) { +; CHECK-LABEL: highreg_alloca: +; CHECK: push.w {[[SOMEREGS:.*]], r7, r8, lr} +; CHECK: add r7, sp, #{{[0-9]+}} +; CHECK: bl foo +; CHECK: pop.w {[[SOMEREGS]], r7, r8, pc} + %alloca = alloca i32, i32 %a, align 4 + call void @foo() + call void asm sideeffect "", "~{r8}" () + ret void +} + +; Has a high register clobbered, frame required due to both variable-sized +; alloca and ABI. We do need to split the push/pop here. +define void @highreg_alloca_nofpelim(i32 %a) "no-frame-pointer-elim"="true" { +; CHECK-LABEL: highreg_alloca_nofpelim: +; CHECK: push {[[SOMEREGS:.*]], r7, lr} +; CHECK: add r7, sp, #{{[0-9]+}} +; CHECK: str r8, [sp, #-4]! +; CHECK: bl foo +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {[[SOMEREGS]], r7, pc} + %alloca = alloca i32, i32 %a, align 4 + call void @foo() + call void asm sideeffect "", "~{r8}" () + ret void +} Index: llvm/trunk/test/CodeGen/Thumb2/thumb2-ldm.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/thumb2-ldm.ll +++ llvm/trunk/test/CodeGen/Thumb2/thumb2-ldm.ll @@ -3,7 +3,7 @@ @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] -define i32 @t1() { +define i32 @t1() "no-frame-pointer-elim"="true" { ; ALL-LABEL: t1: ; ALL: push {r7, lr} ; CHECK: ldrd @@ -16,7 +16,7 @@ ret i32 %tmp4 } -define i32 @t2() { +define i32 @t2() "no-frame-pointer-elim"="true" { ; ALL-LABEL: t2: ; ALL: push {r7, lr} ; CHECK: ldm @@ -30,7 +30,7 @@ ret i32 %tmp6 } -define i32 @t3() { +define i32 @t3() "no-frame-pointer-elim"="true" { ; ALL-LABEL: t3: ; ALL: push {r7, lr} ; CHECK: ldm @@ -46,7 +46,7 @@ @g = common global i32* null -define void @t4(i32 %a0, i32 %a1, i32 %a2) { +define void @t4(i32 %a0, i32 %a1, i32 %a2) "no-frame-pointer-elim"="true" { ; ALL-LABEL: t4: ; ALL: stm.w sp, {r0, r1, r2} ; ALL: bl _ext Index: llvm/trunk/test/DebugInfo/ARM/PR16736.ll =================================================================== --- llvm/trunk/test/DebugInfo/ARM/PR16736.ll +++ llvm/trunk/test/DebugInfo/ARM/PR16736.ll @@ -13,7 +13,7 @@ target triple = "thumbv7-apple-ios" ; Function Attrs: nounwind -define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 !dbg !4 { +define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 "no-frame-pointer-elim"="true" !dbg !4 { entry: tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !12, metadata !DIExpression()), !dbg !18 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !13, metadata !DIExpression()), !dbg !18