Index: llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp +++ llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -611,6 +611,12 @@ unsigned TemporaryReg = 0; BitVector PopFriendly = TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); + // R7 may be used as a frame pointer, hence marked as not generally + // allocatable, however there's no reason to not use it as a temporary for + // restoring LR. + if (STI.useR7AsFramePointer()) + PopFriendly.set(ARM::R7); + assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. @@ -622,17 +628,20 @@ GPRsNoLRSP.reset(ARM::PC); findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); - // If we couldn't find a pop-friendly register, restore LR before popping the - // other callee-saved registers, so we can use one of them as a temporary. + // If we couldn't find a pop-friendly register, try restoring LR before + // popping the other callee-saved registers, so we could use one of them as a + // temporary. bool UseLDRSP = false; if (!PopReg && MBBI != MBB.begin()) { auto PrevMBBI = MBBI; PrevMBBI--; if (PrevMBBI->getOpcode() == ARM::tPOP) { - MBBI = PrevMBBI; - UsedRegs.stepBackward(*MBBI); + UsedRegs.stepBackward(*PrevMBBI); findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); - UseLDRSP = true; + if (PopReg) { + MBBI = PrevMBBI; + UseLDRSP = true; + } } } Index: llvm/trunk/test/CodeGen/Thumb/PR35481.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/PR35481.ll +++ llvm/trunk/test/CodeGen/Thumb/PR35481.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple thumbv4t-eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T +; RUN: llc -mtriple armv8m.base-eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V8M + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; Function Attrs: nounwind +define <4 x i32> @f() local_unnamed_addr #0 { +entry: + %call = tail call i32 @h(i32 1) + %call1 = tail call <4 x i32> @g(i32 %call, i32 2, i32 3, i32 4) + ret <4 x i32> %call1 +; CHECK: ldr r7, [sp, #4] +; CHECK-NEXT: mov lr, r7 +; CHECK-NEXT: pop {r7} +; CHECK-NEXT: add sp, #4 +; CHECK-V47: bx lr +; CHECK-V8M: b g +} + +declare <4 x i32> @g(i32, i32, i32, i32) local_unnamed_addr + +declare i32 @h(i32) local_unnamed_addr + +attributes #0 = { "disable-tail-calls"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }