diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -321,7 +321,7 @@ if (Triple.getOS() == llvm::Triple::Linux || Triple.getOS() == llvm::Triple::UnknownOS) this->MCountName = Opts.EABIVersion == llvm::EABI::GNU - ? "\01__gnu_mcount_nc" + ? "llvm.arm.gnu.eabi.mcount" : "\01mcount"; SoftFloatABI = llvm::is_contained(Opts.FeaturesAsWritten, "+soft-float-abi"); diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -778,4 +778,9 @@ def int_arm_neon_sdot : Neon_Dot_Intrinsic; +// GNU eabi mcount +def int_arm_gnu_eabi_mcount : Intrinsic<[], + [], + [IntrReadMem, IntrWriteMem]>; + } // end TargetPrefix diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1915,6 +1915,37 @@ case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); + + case ARM::tBL_PUSHLR: + case ARM::BL_PUSHLR: { + const bool Thumb = Opcode == ARM::tBL_PUSHLR; + Register Reg = MI.getOperand(0).getReg(); + assert(Reg == ARM::LR && "expect LR register!"); + MachineInstrBuilder MIB; + if (Thumb) { + // push {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(Reg); + + // bl __gnu_mcount_nc + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); + } else { + // stmdb sp!, {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(Reg); + + // bl __gnu_mcount_nc + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); + } + MIB.cloneMemRefs(MI); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); + MI.eraseFromParent(); + return true; + } } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -666,6 +666,8 @@ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -955,6 +955,7 @@ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl @@ -3477,6 +3478,48 @@ Op.getOperand(0)); } +SDValue ARMTargetLowering::LowerINTRINSIC_VOID( + SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { + unsigned IntNo = + cast( + Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) + ->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_gnu_eabi_mcount: { + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc dl(Op); + SDValue Chain = Op.getOperand(0); + // call "\01__gnu_mcount_nc" + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + // Mark LR an implicit live-in. + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); + SDValue ReturnAddress = + DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); + std::vector ResultTys = {MVT::Other, MVT::Glue}; + SDValue Callee = + DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); + SDValue RegisterMask = DAG.getRegisterMask(Mask); + if (Subtarget->isThumb()) + return SDValue( + DAG.getMachineNode( + ARM::tBL_PUSHLR, dl, ResultTys, + {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), + DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), + 0); + return SDValue( + DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, + {ReturnAddress, Callee, RegisterMask, Chain}), + 0); + } + } +} + SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -8331,6 +8374,7 @@ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); + case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -2350,6 +2350,12 @@ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBr]>; + + // push lr before the call + def BL_PUSHLR : ARMPseudoInst<(outs), (ins GPRlr:$ra, arm_bl_target:$func), + 4, IIC_Br, + []>, + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -565,6 +565,13 @@ 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; + + // Also used for Thumb2 + // push lr before the call + def tBL_PUSHLR : tPseudoInst<(outs), (ins GPRlr:$ra, pred:$p, thumb_bl_target:$func), + 4, IIC_Br, + []>, + Requires<[IsThumb]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp --- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -24,7 +24,7 @@ if (Func == "mcount" || Func == ".mcount" || - Func == "\01__gnu_mcount_nc" || + Func == "llvm.arm.gnu.eabi.mcount" || Func == "\01_mcount" || Func == "\01mcount" || Func == "__mcount" || diff --git a/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll b/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/gnu_mcount_nc.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=armv7a-linux-gnueabihf -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=armv7a-linux-gnueabihf -verify-machineinstrs -fast-isel %s -o - | FileCheck %s --check-prefix=CHECK-ARM-FAST-ISEL +; RUN: llc -mtriple=armv7a-linux-gnueabihf -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - | FileCheck %s --check-prefix=CHECK-ARM-GLOBAL-ISEL +; RUN: llc -mtriple=thumbv7a-linux-gnueabihf -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-THUMB +; RUN: llc -mtriple=thumbv7a-linux-gnueabihf -verify-machineinstrs -fast-isel %s -o - | FileCheck %s --check-prefix=CHECK-THUMB-FAST-ISEL +; RUN: llc -mtriple=thumbv7a-linux-gnueabihf -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - | FileCheck %s --check-prefix=CHECK-THUMB-GLOBAL-ISEL + +define dso_local void @callee() #0 { +; CHECK-ARM: stmdb sp!, {lr} +; CHECK-ARM-NEXT: bl __gnu_mcount_nc +; CHECK-ARM-FAST-ISEL: stmdb sp!, {lr} +; CHECK-ARM-FAST-ISEL-NEXT: bl __gnu_mcount_nc +; CHECK-ARM-GLOBAL-ISEL: stmdb sp!, {lr} +; CHECK-ARM-GLOBAL-ISEL-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB: push {lr} +; CHECK-THUMB-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB-FAST-ISEL: push {lr} +; CHECK-THUMB-FAST-ISEL-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB-GLOBAL-ISEL: push {lr} +; CHECK-THUMB-GLOBAL-ISEL-NEXT: bl __gnu_mcount_nc + ret void +} + +define dso_local void @caller() #0 { +; CHECK-ARM: stmdb sp!, {lr} +; CHECK-ARM-NEXT: bl __gnu_mcount_nc +; CHECK-ARM-FAST-ISEL: stmdb sp!, {lr} +; CHECK-ARM-FAST-ISEL-NEXT: bl __gnu_mcount_nc +; CHECK-ARM-GLOBAL-ISEL: stmdb sp!, {lr} +; CHECK-ARM-GLOBAL-ISEL-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB: push {lr} +; CHECK-THUMB-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB-FAST-ISEL: push {lr} +; CHECK-THUMB-FAST-ISEL-NEXT: bl __gnu_mcount_nc +; CHECK-THUMB-GLOBAL-ISEL: push {lr} +; CHECK-THUMB-GLOBAL-ISEL-NEXT: bl __gnu_mcount_nc + call void @callee() + ret void +} + +attributes #0 = { nofree nounwind "instrument-function-entry-inlined"="llvm.arm.gnu.eabi.mcount" }