diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -321,7 +321,7 @@ if (Triple.getOS() == llvm::Triple::Linux || Triple.getOS() == llvm::Triple::UnknownOS) this->MCountName = Opts.EABIVersion == llvm::EABI::GNU - ? "\01__gnu_mcount_nc" + ? "llvm.arm.gnu.eabi.mcount" : "\01mcount"; SoftFloatABI = llvm::is_contained(Opts.FeaturesAsWritten, "+soft-float-abi"); diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -778,4 +778,9 @@ def int_arm_neon_sdot : Neon_Dot_Intrinsic; +// GNU eabi mcount +def int_arm_gnu_eabi_mcount : Intrinsic<[], + [], + [IntrReadMem, IntrWriteMem]>; + } // end TargetPrefix diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6803,6 +6803,24 @@ // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely // delete it now. return; + + case Intrinsic::arm_gnu_eabi_mcount: { + const auto &CI = cast(I); + SDValue Callee = getValue(CI.getCalledValue()); + bool isTailCall = CI.isTailCall() && isInTailCallPosition(&CI, DAG.getTarget()); + + // Emit a call. + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl) + .setChain(getRoot()) + .setCallee(CI.getCallingConv(), CI.getCalledFunction()->getReturnType(), Callee, {}) + .setTailCall(isTailCall) + .setConvergent(CI.isConvergent()); + + std::pair Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return; + } } } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1153,6 +1153,7 @@ MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; + LLVM_DEBUG(dbgs() << "ARMExpandPseudo::ExpandMI: " << MI << "\n"); unsigned Opcode = MI.getOpcode(); switch (Opcode) { default: @@ -1915,6 +1916,45 @@ case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); + + case ARM::tBL_PUSHLR: + case ARM::BL_PUSHLR: { + const bool Thumb = Opcode == ARM::tBL_PUSHLR; + MachineInstrBuilder MIB; + if (Thumb) { + // Insert push {lr} before the call + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + + // Replace with the pseudo instruction with a call instruction + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::tBL)); + } else { + // Insert stmdb sp!, {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); + + // Replace the pseudo instruction with a call instruction + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::BL)); + } + MIB.cloneMemRefs(MI); + for (const MachineOperand &MO : MI.operands()) { + if (MO.isGlobal()) + if (const GlobalValue *GV = MO.getGlobal()) + if (GV->getName() == "llvm.arm.gnu.eabi.mcount") { + MIB.addExternalSymbol("\01__gnu_mcount_nc"); + continue; + } + MIB.add(MO); + } + MI.eraseFromParent(); + return true; + } } } diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -177,7 +177,7 @@ bool SelectFPToI(const Instruction *I, bool isSigned); bool SelectDiv(const Instruction *I, bool isSigned); bool SelectRem(const Instruction *I, bool isSigned); - bool SelectCall(const Instruction *I, const char *IntrMemName); + bool SelectCall(const Instruction *I, const char *IntrinsicName); bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); bool SelectRet(const Instruction *I); @@ -208,7 +208,7 @@ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT); unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg); - unsigned ARMSelectCallOp(bool UseReg); + unsigned ARMSelectCallOp(bool UseReg, bool PushLR); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); const TargetLowering *getTargetLowering() { return &TLI; } @@ -2182,7 +2182,9 @@ return true; } -unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { +unsigned ARMFastISel::ARMSelectCallOp(bool UseReg, bool PushLR) { + if (PushLR) + return isThumb2 ? ARM::tBL_PUSHLR : ARM::BL_PUSHLR; if (UseReg) return isThumb2 ? ARM::tBLXr : ARM::BLX; else @@ -2270,7 +2272,7 @@ } // Issue the call. - unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls()); + unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls(), false); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); // BL / BLX don't take a predicate, but tBL / tBLX do. @@ -2300,7 +2302,7 @@ } bool ARMFastISel::SelectCall(const Instruction *I, - const char *IntrMemName = nullptr) { + const char *IntrinsicName = nullptr) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); @@ -2352,7 +2354,7 @@ i != e; ++i) { // If we're lowering a memory intrinsic instead of a regular call, skip the // last argument, which shouldn't be passed to the underlying function. - if (IntrMemName && e - i <= 1) + if (IntrinsicName && e - i <= 1) break; ISD::ArgFlagsTy Flags; @@ -2403,8 +2405,8 @@ unsigned CalleeReg = 0; if (UseReg) { - if (IntrMemName) - CalleeReg = getLibcallReg(IntrMemName); + if (IntrinsicName) + CalleeReg = getLibcallReg(IntrinsicName); else CalleeReg = getRegForValue(Callee); @@ -2412,7 +2414,8 @@ } // Issue the call. - unsigned CallOpc = ARMSelectCallOp(UseReg); + bool PushLR = IntrinsicName && !strcmp(IntrinsicName, "\01__gnu_mcount_nc"); + unsigned CallOpc = ARMSelectCallOp(UseReg, PushLR); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); @@ -2421,10 +2424,10 @@ MIB.add(predOps(ARMCC::AL)); if (UseReg) MIB.addReg(CalleeReg); - else if (!IntrMemName) + else if (!IntrinsicName) MIB.addGlobalAddress(GV, 0, 0); else - MIB.addExternalSymbol(IntrMemName, 0); + MIB.addExternalSymbol(IntrinsicName, 0); // Add implicit physical register uses to the call. for (unsigned R : RegArgs) @@ -2558,8 +2561,8 @@ if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) return false; - const char *IntrMemName = isa(I) ? "memcpy" : "memmove"; - return SelectCall(&I, IntrMemName); + const char *IntrinsicName = isa(I) ? "memcpy" : "memmove"; + return SelectCall(&I, IntrinsicName); } case Intrinsic::memset: { const MemSetInst &MSI = cast(I); @@ -2580,6 +2583,8 @@ Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP)); return true; } + case Intrinsic::arm_gnu_eabi_mcount: + return SelectCall(&I, "\01__gnu_mcount_nc"); } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -68,6 +68,7 @@ CALL, // Function call. CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. + CALL_PUSHLR, // Function call that pushes LR before the call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1421,6 +1421,7 @@ case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; + case ARMISD::CALL_PUSHLR: return "ARMISD::CALL_PUSHLR"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; @@ -2289,6 +2290,11 @@ CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } + if (isa(Callee)) + if(auto *GV = cast(Callee)->getGlobal()) + if (Subtarget->isTargetGNUAEABI() && GV->getName() == "llvm.arm.gnu.eabi.mcount") + CallOpc = ARMISD::CALL_PUSHLR; + std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -148,6 +148,9 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def ARMcall_pushlr : SDNode<"ARMISD::CALL_PUSHLR", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -2350,6 +2353,12 @@ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBr]>; + + // push lr before the call + def BL_PUSHLR : ARMPseudoInst<(outs), (ins arm_bl_target:$func), + 4, IIC_Br, + [(ARMcall_pushlr tglobaladdr:$func)]>, + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -565,6 +565,13 @@ 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; + + // Also used for Thumb2 + // push lr before the call + def tBL_PUSHLR : tPseudoInst<(outs), (ins pred:$p, thumb_bl_target:$func), + 4, IIC_Br, + [(ARMcall_pushlr tglobaladdr:$func)]>, + Requires<[IsThumb]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp --- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -24,7 +24,7 @@ if (Func == "mcount" || Func == ".mcount" || - Func == "\01__gnu_mcount_nc" || + Func == "llvm.arm.gnu.eabi.mcount" || Func == "\01_mcount" || Func == "\01mcount" || Func == "__mcount" || diff --git a/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll b/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=armv7a-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumbv7a-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-THUMB + +define dso_local void @callee() #0 { +; CHECK-ARM: stmdb sp!, {lr} +; CHECK-ARM-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB: push {lr} +; CHECK-THUMB-NEXT: bl __gnu_mcount_nc + ret void +} + +define dso_local void @caller() #0 { +; CHECK-ARM: stmdb sp!, {lr} +; CHECK-ARM-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB: push {lr} +; CHECK-THUMB-NEXT: bl __gnu_mcount_nc + call void @callee() + ret void +} + +attributes #0 = { nofree nounwind "instrument-function-entry-inlined"="llvm.arm.gnu.eabi.mcount" }