Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5711,6 +5711,37 @@ return 0u; } +// Compute liveness of LR at the point after the interval [I, E), which +// denotes a *backward* iteration through instructions. Used only for return +// basic blocks, which do not end with a tail call. +static bool isLRAvailable(const TargetRegisterInfo &TRI, + MachineBasicBlock::reverse_iterator I, + MachineBasicBlock::reverse_iterator E) { + // At the end of the function LR dead. + bool Live = false; + for (; I != E; ++I) { + const MachineInstr &MI = *I; + + // Check defs of LR. + if (MI.modifiesRegister(ARM::LR, &TRI)) + Live = false; + + // Check uses of LR. + unsigned Opcode = MI.getOpcode(); + if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR || + Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET || + Opcode == ARM::tBXNS_RET) { + // These instructions use LR, but it's not an (explicit or implicit) + // operand. + Live = true; + continue; + } + if (MI.readsRegister(ARM::LR, &TRI)) + Live = true; + } + return !Live; +} + outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; @@ -5803,8 +5834,15 @@ for (outliner::Candidate &C : RepeatedSequenceLocs) { C.initLRU(TRI); - // Is LR available? If so, we don't need a save. - if (C.LRU.available(ARM::LR)) { + // LR liveness is overestimated in return blocks, unless they end with a + // tail call. + const auto Last = C.getMBB()->rbegin(); + const bool LRIsAvailable = + C.getMBB()->isReturnBlock() && !Last->isCall() + ? isLRAvailable(TRI, Last, + (MachineBasicBlock::reverse_iterator)C.front()) + : C.LRU.available(ARM::LR); + if (LRIsAvailable) { FrameID = MachineOutlinerNoLRSave; NumBytesNoStackCalls += Costs.CallNoLRSave; C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave); @@ -5915,7 +5953,13 @@ if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) Flags |= MachineOutlinerMBBFlags::HasCalls; - if (!LRU.available(ARM::LR)) + // LR liveness is overestimated in return blocks. + + bool LRIsAvailable = + MBB.isReturnBlock() && !MBB.back().isCall() + ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend()) + : LRU.available(ARM::LR); + if (!LRIsAvailable) Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; return true; Index: llvm/test/CodeGen/ARM/machine-outliner-return-1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-return-1.ll @@ -0,0 +1,57 @@ +; RUN: llc --verify-machineinstrs %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-unknown-unknown-eabi" + +declare dso_local i32 @h0(i32, i32) local_unnamed_addr #1 + +define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %a, 1 + %sub = add nsw i32 %b, -1 + %call = tail call i32 @h0(i32 %add, i32 %sub) #0 + %add1 = add nsw i32 %c, %b + %mul = shl nsw i32 %call, 1 + %add2 = add nsw i32 %mul, %add1 + %sub3 = sub nsw i32 %c, %d + %mul4 = mul nsw i32 %add2, %sub3 + %sub5 = sub nsw i32 %call, %add1 + %div = sdiv i32 %mul4, %sub5 + %add6 = add nsw i32 %d, %c + %mul7 = mul nsw i32 %div, %add6 + %add8 = add nsw i32 %mul7, 1 + ret i32 %add8 +} +; CHECK-LABEL: f: +; CHECK: bl h0 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: pop {r4, r5, r6, pc} + + +define dso_local i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %sub = add nsw i32 %a, -1 + %add = add nsw i32 %b, 1 + %call = tail call i32 @h0(i32 %sub, i32 %add) #0 + %add1 = add nsw i32 %c, %b + %mul = shl nsw i32 %call, 1 + %add2 = add nsw i32 %mul, %add1 + %sub3 = sub nsw i32 %c, %d + %mul4 = mul nsw i32 %add2, %sub3 + %sub5 = sub nsw i32 %call, %add1 + %div = sdiv i32 %mul4, %sub5 + %add6 = add nsw i32 %d, %c + %mul7 = mul nsw i32 %div, %add6 + %add8 = add nsw i32 %mul7, 2 + ret i32 %add8 +} +; CHECK-LABEL: g: +; CHECK: bl h0 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: pop {r4, r5, r6, pc} + + +attributes #0 = { minsize nounwind optsize } +attributes #1 = { minsize optsize } Index: llvm/test/CodeGen/ARM/machine-outliner-return-2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-return-2.ll @@ -0,0 +1,52 @@ +; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-unknown-unknown-eabi" + +declare dso_local i32 @t(i32) local_unnamed_addr #0 + +define dso_local i32 @f(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { +entry: + %mul = mul nsw i32 %a, 3 + %add = add nsw i32 %mul, 1 + %sub = add nsw i32 %b, -1 + %div = sdiv i32 %add, %sub + %sub1 = sub nsw i32 %a, %c + %div2 = sdiv i32 %div, %sub1 + %mul3 = mul nsw i32 %div2, %b + %add4 = add nsw i32 %mul3, 1 + %call = tail call i32 @t(i32 %add4) #0 + ret i32 %call +} +; CHECK-LABEL: f: +; CHECK: str lr, [sp, #-8]! +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldr lr, [sp], #8 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: b t + +define dso_local i32 @g(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { +entry: + %mul = mul nsw i32 %a, 3 + %add = add nsw i32 %mul, 1 + %sub = add nsw i32 %b, -1 + %div = sdiv i32 %add, %sub + %sub1 = sub nsw i32 %a, %c + %div2 = sdiv i32 %div, %sub1 + %mul3 = mul nsw i32 %div2, %b + %add4 = add nsw i32 %mul3, 3 + %call = tail call i32 @t(i32 %add4) #0 + ret i32 %call +} + +; CHECK-LABEL: g: +; CHECK: str lr, [sp, #-8]! +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldr lr, [sp], #8 +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: b t + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: lr +; CHECK: bx lr + +attributes #0 = { minsize nounwind optsize }