diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1153,6 +1153,7 @@ MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; + LLVM_DEBUG(dbgs() << "ARMExpandPseudo::ExpandMI: " << MI << "\n"); unsigned Opcode = MI.getOpcode(); switch (Opcode) { default: @@ -1915,6 +1916,41 @@ case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); + + case ARM::tBL_PUSHLR: { + // Insert push {lr} before the call + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + + // Replace with the pseudo instruction with a call instruction + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::tBL)); + MIB.cloneMemRefs(MI); + for (const MachineOperand &MO : MI.operands()) + MIB.add(MO); + MI.eraseFromParent(); + return true; + } + + case ARM::BL_PUSHLR: { + // Insert stmdb sp!, {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + + // Replace the pseudo instruction with a call instruction + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::BL)); + MIB.cloneMemRefs(MI); + for (const MachineOperand &MO : MI.operands()) { + MIB.add(MO); + } + MI.eraseFromParent(); + return true; + } } } diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -208,7 +208,7 @@ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT); unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg); - unsigned ARMSelectCallOp(bool UseReg); + unsigned ARMSelectCallOp(bool UseReg, bool PushLR = false); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); const TargetLowering *getTargetLowering() { return &TLI; } @@ -2182,7 +2182,9 @@ return true; } -unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { +unsigned ARMFastISel::ARMSelectCallOp(bool UseReg, bool PushLR) { + if (PushLR) + return isThumb2 ? ARM::tBL_PUSHLR : ARM::BL_PUSHLR; if (UseReg) return isThumb2 ? ARM::tBLXr : ARM::BLX; else @@ -2412,7 +2414,7 @@ } // Issue the call. - unsigned CallOpc = ARMSelectCallOp(UseReg); + unsigned CallOpc = ARMSelectCallOp(UseReg, Callee && Callee->getName() == "\01__gnu_mcount_nc"); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -68,6 +68,7 @@ CALL, // Function call. CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. + CALL_PUSHLR, // Function call that pushes LR before the call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1421,6 +1421,7 @@ case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; + case ARMISD::CALL_PUSHLR: return "ARMISD::CALL_PUSHLR"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; @@ -2289,6 +2290,13 @@ CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } + if (CLI.CS) { + if (const Value * CalledValue = CLI.CS.getCalledValue()) { + if (CalledValue->getName() == "\01__gnu_mcount_nc") + CallOpc = ARMISD::CALL_PUSHLR; + } + } + std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -148,6 +148,9 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def ARMcall_pushlr : SDNode<"ARMISD::CALL_PUSHLR", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -2350,6 +2353,12 @@ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBr]>; + + // push lr before the call + def BL_PUSHLR : ARMPseudoInst<(outs), (ins arm_bl_target:$func), + 4, IIC_Br, + [(ARMcall_pushlr tglobaladdr:$func)]>, + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -565,6 +565,13 @@ 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; + + // Also used for Thumb2 + // push lr before the call + def tBL_PUSHLR : tPseudoInst<(outs), (ins pred:$p, thumb_bl_target:$func), + 4, IIC_Br, + [(ARMcall_pushlr tglobaladdr:$func)]>, + Requires<[IsThumb]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { diff --git a/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll b/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=armv7a-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumbv7a-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-THUMB + +define dso_local i32 @foo(i64) local_unnamed_addr #0 { +; CHECK-ARM: stmdb sp!, {lr} +; CHECK-ARM-NOT: stmdb sp!, {lr} +; CHECK-ARM-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB: push {lr} +; CHECK-THUMB-NOT: push {lr} +; CHECK-THUMB-NEXT: bl __gnu_mcount_nc + %2 = mul nsw i64 %0, %0 + %3 = trunc i64 %2 to i32 + ret i32 %3 +} + +attributes #0 = { nofree nounwind "instrument-function-entry-inlined"="\01__gnu_mcount_nc" }