Index: llvm/include/llvm/CodeGen/MachineInstr.h =================================================================== --- llvm/include/llvm/CodeGen/MachineInstr.h +++ llvm/include/llvm/CodeGen/MachineInstr.h @@ -106,6 +106,7 @@ // known to be exact. NoFPExcept = 1 << 14, // Instruction does not raise // floatint-point exceptions. + NoMerge = 1 << 15, // Instruction should not be merged }; private: Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -278,6 +278,7 @@ struct CallSiteDbgInfo { CallSiteInfo CSInfo; MDNode *HeapAllocSite = nullptr; + bool NoMerge = false; }; DenseMap SDCallSiteDbgInfo; @@ -1912,6 +1913,18 @@ return It->second.HeapAllocSite; } + void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) { + if (NoMerge) + SDCallSiteDbgInfo[Node].NoMerge = NoMerge; + } + + bool getNoMergeSiteInfo(const SDNode *Node) { + auto I = SDCallSiteDbgInfo.find(Node); + if (I == SDCallSiteDbgInfo.end()) + return false; + return I->second.NoMerge; + } + /// Return the current function's default denormal handling kind for the given /// floating point type. DenormalMode getDenormalMode(EVT VT) const { Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -3609,6 +3609,7 @@ bool IsReturnValueUsed : 1; bool IsConvergent : 1; bool IsPatchPoint : 1; + bool NoMerge : 1; // IsTailCall should be modified by implementations of // TargetLowering::LowerCall that perform tail call conversions. @@ -3632,7 +3633,7 @@ CallLoweringInfo(SelectionDAG &DAG) : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), - IsPatchPoint(false), DAG(DAG) {} + IsPatchPoint(false), NoMerge(false), DAG(DAG) {} CallLoweringInfo &setDebugLoc(const SDLoc &dl) { DL = dl; @@ -3681,6 +3682,7 @@ IsReturnValueUsed = !Call.use_empty(); RetSExt = Call.hasRetAttr(Attribute::SExt); RetZExt = Call.hasRetAttr(Attribute::ZExt); + NoMerge = Call.hasFnAttr(Attribute::NoMerge); Callee = Target; Index: llvm/lib/CodeGen/BranchFolding.cpp =================================================================== --- llvm/lib/CodeGen/BranchFolding.cpp +++ llvm/lib/CodeGen/BranchFolding.cpp @@ -348,6 +348,9 @@ MBBI1->isInlineAsm()) { break; } + if (MBBI1->getFlag(MachineInstr::NoMerge) || + MBBI2->getFlag(MachineInstr::NoMerge)) + break; ++TailLen; I1 = MBBI1; I2 = MBBI2; Index: llvm/lib/CodeGen/MIRPrinter.cpp =================================================================== --- llvm/lib/CodeGen/MIRPrinter.cpp +++ llvm/lib/CodeGen/MIRPrinter.cpp @@ -778,6 +778,8 @@ OS << "exact "; if (MI.getFlag(MachineInstr::NoFPExcept)) OS << "nofpexcept "; + if (MI.getFlag(MachineInstr::NoMerge)) + OS << "nomerge "; OS << TII->getName(MI.getOpcode()); if (I < E) Index: llvm/lib/CodeGen/MachineInstr.cpp =================================================================== --- llvm/lib/CodeGen/MachineInstr.cpp +++ llvm/lib/CodeGen/MachineInstr.cpp @@ -1595,6 +1595,8 @@ OS << "exact "; if (getFlag(MachineInstr::NoFPExcept)) OS << "nofpexcept "; + if (getFlag(MachineInstr::NoMerge)) + OS << "nomerge "; // Print the opcode name. if (TII) Index: llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -871,6 +871,10 @@ DAG->getTarget().Options.EmitCallSiteInfo) MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + if (DAG->getNoMergeSiteInfo(Node)) { + MI->setFlag(MachineInstr::MIFlag::NoMerge); + } + return MI; }; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4524,6 +4524,7 @@ // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2500,6 +2500,7 @@ // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -986,12 +986,13 @@ const bool IsPatchPoint : 1; const bool IsIndirect : 1; const bool HasNest : 1; + const bool NoMerge : 1; CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg, - bool IsPatchPoint, bool IsIndirect, bool HasNest) + bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge) : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg), IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect), - HasNest(HasNest) {} + HasNest(HasNest), NoMerge(NoMerge) {} }; private: Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5574,6 +5574,7 @@ std::array ReturnTypes = {{MVT::Other, MVT::Glue}}; Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge); Glue = Chain.getValue(1); // When performing tail call optimization the callee pops its arguments off @@ -5655,7 +5656,8 @@ isIndirectCall(Callee, DAG, Subtarget, isPatchPoint), // hasNest Subtarget.is64BitELFABI() && - any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); })); + any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }), + CLI.NoMerge); if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2353,6 +2353,7 @@ } Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4318,6 +4318,7 @@ Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); } else { Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); } InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Index: llvm/test/CodeGen/AArch64/nomerge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/nomerge.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -mtriple=aarch64 -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK: // %bb.1: // %entry +; CHECK: // %bb.2: // %if.then +; CHECK: bl bar +; CHECK: b bar +; CHECK: .LBB0_3: // %if.then2 +; CHECK: bl bar +; CHECK: .LBB0_4: // %if.end3 +; CHECK: b bar Index: llvm/test/CodeGen/ARM/nomerge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/nomerge.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -mtriple=arm -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: @ %entry +; CHECK: @ %bb.1: @ %entry +; CHECK: @ %bb.2: @ %if.then +; CHECK: bl bar +; CHECK: b bar +; CHECK: .LBB0_3: @ %if.then2 +; CHECK: bl bar +; CHECK: .LBB0_4: @ %if.end3 +; CHECK: b bar Index: llvm/test/CodeGen/PowerPC/nomerge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/nomerge.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=powerpc -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK: # %bb.1: # %entry +; CHECK: # %bb.2: # %if.then +; CHECK: bl bar +; CHECK: .LBB0_3: # %if.then2 +; CHECK: bl bar +; CHECK: .LBB0_4: # %if.end3 +; CHECK: bl bar Index: llvm/test/CodeGen/RISCV/nomerge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/nomerge.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=riscv64 -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK: # %bb.1: # %entry +; CHECK: # %bb.2: # %if.then +; CHECK: call bar +; CHECK: .LBB0_3: # %if.then2 +; CHECK: call bar +; CHECK: .LBB0_4: # %if.end3 +; CHECK: tail bar Index: llvm/test/CodeGen/X86/nomerge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/nomerge.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -mtriple=x86_64 -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK: # %bb.1: # %entry +; CHECK: # %bb.2: # %if.then +; CHECK: callq bar +; CHECK: jmp bar # TAILCALL +; CHECK: .LBB0_3: # %if.then2 +; CHECK: callq bar +; CHECK: .LBB0_4: # %if.end3 +; CHECK: jmp bar # TAILCALL