diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1320,6 +1320,7 @@ case TargetOpcode::LIFETIME_START: case TargetOpcode::LIFETIME_END: case TargetOpcode::PSEUDO_PROBE: + case TargetOpcode::COMPILER_BARRIER: return true; } } diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1466,6 +1466,10 @@ SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg); + /// A convenience function for creating TargetOpCode::COMPILER_BARRIER nodes. + SDValue getCompilerBarrier(const SDLoc &DL, AtomicOrdering Ordering, + SyncScope::ID Scope, SDValue Chain); + /// Get the specified node if it's already available, or else return NULL. SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef Ops, const SDNodeFlags Flags); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -217,6 +217,11 @@ HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL) +// This is a fence with the singlethread scope. It has a single operand, which +// is the ordering requested. This instruction has `mayLoad` and `mayStore`, so +// that memory operations are not moved around it. +HANDLE_TARGET_OPCODE(COMPILER_BARRIER) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1318,6 +1318,13 @@ let AsmString = ""; let hasSideEffects = true; } +def COMPILER_BARRIER : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i32imm:$ordering, i32imm:$scope); + let AsmString = ""; + let hasSideEffects = true; + let Size = 0; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -899,6 +899,24 @@ OutStreamer->AddBlankLine(); } +static void emitCompilerBarrierComment(const MachineInstr *MI, AsmPrinter &AP) { + auto Ordering = static_cast(MI->getOperand(0).getImm()); + auto Scope = static_cast(MI->getOperand(1).getImm()); + + SmallVector SSNames; + LLVMContext &Ctx = AP.MMI->getModule()->getContext(); + Ctx.getSyncScopeNames(SSNames); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << " Compiler Barrier: " << toIRString(Ordering); + if (Scope != SyncScope::System) { + OS << " syncscope(" << SSNames[Scope] << ")"; + } + + AP.OutStreamer->emitRawComment(OS.str()); +} + static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { std::string Str; raw_string_ostream OS(Str); @@ -1332,6 +1350,9 @@ case TargetOpcode::PSEUDO_PROBE: emitPseudoProbe(MI); break; + case TargetOpcode::COMPILER_BARRIER: + emitCompilerBarrierComment(&MI, *this); + break; default: emitInstruction(&MI); if (CanDoExtraAnalysis) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8651,6 +8651,16 @@ return SDValue(Result, 0); } +SDValue SelectionDAG::getCompilerBarrier(const SDLoc &DL, + AtomicOrdering Ordering, + SyncScope::ID Scope, SDValue Chain) { + SDValue OrderingVal = getTargetConstant((uint64_t)Ordering, DL, MVT::i32); + SDValue ScopeVal = getTargetConstant((uint64_t)Scope, DL, MVT::i32); + SDNode *Result = getMachineNode(TargetOpcode::COMPILER_BARRIER, DL, + MVT::Other, {OrderingVal, ScopeVal, Chain}); + return SDValue(Result, 0); +} + /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -13,9 +13,8 @@ //===---------------------------------- // Atomic fences //===---------------------------------- -let AddedComplexity = 15, Size = 0 in -def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), - [(atomic_fence timm:$ordering, 0)]>, Sched<[]>; +let AddedComplexity = 15 in +def : Pat<(atomic_fence (timm:$ordering), 0), (COMPILER_BARRIER i32:$ordering, (i32 0))>; def : Pat<(atomic_fence (i64 4), (timm)), (DMB (i32 0x9))>; def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -284,12 +284,6 @@ } } - if (Opcode == AArch64::CompilerBarrier) { - O << '\t' << MAI.getCommentString() << " COMPILER BARRIER"; - printAnnotation(O, Annot); - return; - } - if (Opcode == AArch64::SPACE) { O << '\t' << MAI.getCommentString() << " SPACE " << MI->getOperand(1).getImm(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -608,9 +608,7 @@ return; } - if (MI.getOpcode() == AArch64::CompilerBarrier || - MI.getOpcode() == AArch64::SPACE) { - // CompilerBarrier just prevents the compiler from reordering accesses, and + if (MI.getOpcode() == AArch64::SPACE) { // SPACE just increases basic block size, in both cases no actual code. return; } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6431,10 +6431,4 @@ NoItinerary, []>, Sched<[]>; } -def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, - [(atomic_fence timm:$ordering, 0)]> { - let hasSideEffects = 1; - let Size = 0; - let AsmString = "@ COMPILER BARRIER"; - let hasNoSchedulingInfo = 1; -} +def : Pat<(atomic_fence (timm:$ordering), 0), (COMPILER_BARRIER i32:$ordering, (i32 0))>; diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -119,8 +119,7 @@ "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG", - "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier", - "t__brkdiv0")>; + "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "t__brkdiv0")>; def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; @@ -1495,4 +1494,3 @@ def : ReadAdvance; } // SchedModel = CortexA57Model - diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1146,6 +1146,9 @@ // Refer to Table A.6 in the version 2.3 draft of the RISC-V Instruction Set // Manual: Volume I. +// Scope of 0 is for singlethread fences, which are not generated into instructions. +def : Pat<(atomic_fence (timm:$ordering), 0), (COMPILER_BARRIER i32:$ordering, (i32 0))>; + // fence acquire -> fence r, rw def : Pat<(atomic_fence (XLenVT 4), (timm)), (FENCE 0b10, 0b11)>; // fence release -> fence rw, w diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -494,11 +494,6 @@ .addImm(15).addReg(SystemZ::R0D); break; - // Emit nothing here but a comment if we can. - case SystemZ::MemBarrier: - OutStreamer->emitRawComment("MEMBARRIER"); - return; - // We want to emit "j .+2" for traps, jumping to the relative immediate field // of the jump instruction, which is an illegal instruction. We cannot emit a // "." symbol, so create and emit a temp label before the instruction and use diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -151,9 +151,6 @@ // Store the CC value in bits 29 and 28 of an integer. IPM, - // Compiler barrier only; generate a no-op. - MEMBARRIER, - // Transaction begin. The first operand is the chain, the second // the TDB pointer, and the third the immediate control field. // Returns CC value and chain. diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -3910,8 +3910,7 @@ 0); } - // MEMBARRIER is a compiler barrier; it codegens to a no-op. - return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); + return DAG.getCompilerBarrier(DL, FenceOrdering, FenceSSID, Op.getOperand(0)); } // Op is an atomic load. Lower it into a normal volatile load. @@ -5647,7 +5646,6 @@ OPCODE(STRCMP); OPCODE(SEARCH_STRING); OPCODE(IPM); - OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1690,10 +1690,6 @@ let hasSideEffects = 1 in def Serialize : Alias<2, (outs), (ins), []>; -// A pseudo instruction that serves as a compiler barrier. -let hasSideEffects = 1, hasNoSchedulingInfo = 1 in -def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>; - let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>; def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>; diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -286,9 +286,6 @@ def z_addcarry_1 : SDNode<"SystemZISD::ADDCARRY", SDT_ZBinaryWithCarry>; def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>; -def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, - [SDNPHasChain, SDNPSideEffect]>; - def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore, diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -35,7 +35,6 @@ GLOBAL_BASE_REG, // Global base reg for PIC. Hi, // Hi/Lo operations, typically on a global address. Lo, // Hi/Lo operations, typically on a global address. - MEMBARRIER, // Compiler barrier only; generate a no-op. RET_FLAG, // Return with a flag operand. TS1AM, // A TS1AM instruction used for 1/2 bytes swap. VEC_BROADCAST, // A vector broadcast instruction. diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -893,7 +893,6 @@ TARGET_NODE_CASE(GLOBAL_BASE_REG) TARGET_NODE_CASE(Hi) TARGET_NODE_CASE(Lo) - TARGET_NODE_CASE(MEMBARRIER) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(TS1AM) TARGET_NODE_CASE(VEC_BROADCAST) @@ -1078,8 +1077,7 @@ } } - // MEMBARRIER is a compiler barrier; it codegens to a no-op. - return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); + return DAG.getCompilerBarrier(DL, FenceOrdering, FenceSSID, Op.getOperand(0)); } TargetLowering::AtomicExpansionKind diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -469,10 +469,6 @@ def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, [SDNPHasChain, SDNPSideEffect]>; -// MEMBARRIER -def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone, - [SDNPHasChain, SDNPSideEffect]>; - // TS1AM def SDT_TS1AM : SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisInt<3>]>; @@ -2014,10 +2010,6 @@ "# GET STACK TOP", [(set iPTR:$dst, (GetStackTop))]>; -// MEMBARRIER -let hasSideEffects = 1 in -def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >; - //===----------------------------------------------------------------------===// // Other patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -644,7 +644,6 @@ PROBED_ALLOCA, // Memory barriers. - MEMBARRIER, MFENCE, // Get a random integer and indicate whether it is valid in CF. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29200,8 +29200,8 @@ return emitLockedStackOp(DAG, Subtarget, Chain, dl); } - // MEMBARRIER is a compiler barrier; it codegens to a no-op. - return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + // COMPILER_BARRIER codegens to a 0-byte instruction. + return DAG.getCompilerBarrier(dl, FenceOrdering, FenceSSID, Op.getOperand(0)); } static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, @@ -29745,8 +29745,9 @@ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT), NewChain); } - // MEMBARRIER is a compiler barrier; it codegens to a no-op. - SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain); + + // COMPILER_BARRIER is a compiler barrier; it codegens to a no-op. + SDValue NewChain = DAG.getCompilerBarrier(DL, AN->getOrdering(), AN->getSyncScopeID(), Chain); assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), @@ -31498,7 +31499,6 @@ NODE_NAME_CASE(VAARG_64) NODE_NAME_CASE(VAARG_X32) NODE_NAME_CASE(WIN_ALLOCA) - NODE_NAME_CASE(MEMBARRIER) NODE_NAME_CASE(MFENCE) NODE_NAME_CASE(SEG_ALLOCA) NODE_NAME_CASE(PROBED_ALLOCA) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -656,11 +656,6 @@ Requires<[Not64BitMode]>, OpSize32, LOCK, Sched<[WriteALURMW]>; -let hasSideEffects = 1 in -def Int_MemBarrier : I<0, Pseudo, (outs), (ins), - "#MEMBARRIER", - [(X86MemBarrier)]>, Sched<[WriteLoad]>; - // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction // ImmOpc8 corresponds to the mi8 version of the instruction diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -146,6 +146,7 @@ case TargetOpcode::COPY: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::COMPILER_BARRIER: return true; // On x86 it is believed that imul is constant time w.r.t. the loaded data. @@ -8953,7 +8954,7 @@ // FIXME: x86 doesn't implement getInstSizeInBytes, so // we can't tell the cost. Just assume each instruction // is one byte. - if (MI.isDebugInstr() || MI.isKill()) + if (MI.isMetaInstruction()) return Sum; return Sum + 1; }); diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -122,7 +122,7 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; -def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_X86MFENCE : SDTypeProfile<0, 0, []>; def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>; @@ -132,9 +132,7 @@ SDTCisVT<2, v2i64>, SDTCisPtrTy<3>]>; -def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, - [SDNPHasChain,SDNPSideEffect]>; -def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, +def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MFENCE, [SDNPHasChain]>; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2386,11 +2386,6 @@ case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); - // Emit nothing here but a comment if we can. - case X86::Int_MemBarrier: - OutStreamer->emitRawComment("MEMBARRIER"); - return; - case X86::EH_RETURN: case X86::EH_RETURN64: { // Lower these as normal, but add some comments. diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.h b/llvm/lib/Target/XCore/XCoreISelLowering.h --- a/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -79,9 +79,6 @@ // Exception handler return. The stack is restored to the first // followed by a jump to the second argument. EH_RETURN, - - // Memory barrier. - MEMBARRIER }; } @@ -183,7 +180,6 @@ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -64,7 +64,6 @@ case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET"; case XCoreISD::EH_RETURN : return "XCoreISD::EH_RETURN"; - case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER"; } return nullptr; } @@ -150,8 +149,9 @@ // Atomic operations // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. - // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a compiler + // barrier. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Legal); setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); @@ -218,7 +218,6 @@ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_LOAD: return LowerATOMIC_LOAD(Op, DAG); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG); default: @@ -927,12 +926,6 @@ return SDValue(); } -SDValue XCoreTargetLowering:: -LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - return DAG.getNode(XCoreISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); -} - SDValue XCoreTargetLowering:: LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { AtomicSDNode *N = cast(Op); diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.td b/llvm/lib/Target/XCore/XCoreInstrInfo.td --- a/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -82,11 +82,6 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def SDT_XCoreMEMBARRIER : SDTypeProfile<0, 0, []>; - -def XCoreMemBarrier : SDNode<"XCoreISD::MEMBARRIER", SDT_XCoreMEMBARRIER, - [SDNPHasChain]>; - //===----------------------------------------------------------------------===// // Instruction Pattern Stuff //===----------------------------------------------------------------------===// @@ -363,9 +358,10 @@ (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>; } -let hasSideEffects = 1 in -def Int_MemBarrier : PseudoInstXCore<(outs), (ins), "#MEMBARRIER", - [(XCoreMemBarrier)]>; +// As we are always Sequential Consistent, an ATOMIC_FENCE becomes a compiler +// barrier (no instruction). +def : Pat<(atomic_fence (timm:$ordering), (timm:$scope)), + (COMPILER_BARRIER i32:$ordering, i32:$scope)>; //===----------------------------------------------------------------------===// // Instructions diff --git a/llvm/test/CodeGen/AArch64/fence-singlethread.ll b/llvm/test/CodeGen/AArch64/fence-singlethread.ll --- a/llvm/test/CodeGen/AArch64/fence-singlethread.ll +++ b/llvm/test/CodeGen/AArch64/fence-singlethread.ll @@ -8,12 +8,12 @@ define void @fence_singlethread() { ; LINUX-LABEL: fence_singlethread: ; LINUX-NOT: dmb -; LINUX: // COMPILER BARRIER +; LINUX: // Compiler Barrier: seq_cst syncscope(singlethread) ; LINUX-NOT: dmb ; IOS-LABEL: fence_singlethread: ; IOS-NOT: dmb -; IOS: ; COMPILER BARRIER +; IOS: ; Compiler Barrier: seq_cst syncscope(singlethread) ; IOS-NOT: dmb fence syncscope("singlethread") seq_cst diff --git a/llvm/test/CodeGen/ARM/fence-singlethread.ll b/llvm/test/CodeGen/ARM/fence-singlethread.ll --- a/llvm/test/CodeGen/ARM/fence-singlethread.ll +++ b/llvm/test/CodeGen/ARM/fence-singlethread.ll @@ -8,7 +8,7 @@ define void @fence_singlethread() { ; CHECK-LABEL: fence_singlethread: ; CHECK-NOT: dmb -; CHECK: @ COMPILER BARRIER +; CHECK: @ Compiler Barrier: seq_cst syncscope(singlethread) ; CHECK-NOT: dmb fence syncscope("singlethread") seq_cst diff --git a/llvm/test/CodeGen/RISCV/atomic-fence.ll b/llvm/test/CodeGen/RISCV/atomic-fence.ll --- a/llvm/test/CodeGen/RISCV/atomic-fence.ll +++ b/llvm/test/CodeGen/RISCV/atomic-fence.ll @@ -63,3 +63,60 @@ fence seq_cst ret void } + +define void @fence_st_acquire() nounwind { +; RV32I-LABEL: fence_st_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: # Compiler Barrier: acquire syncscope(singlethread) +; RV32I-NEXT: ret +; +; RV64I-LABEL: fence_st_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: # Compiler Barrier: acquire syncscope(singlethread) +; RV64I-NEXT: ret +; RV641-EMPTY: + fence syncscope("singlethread") acquire + ret void +} + +define void @fence_st_release() nounwind { +; RV32I-LABEL: fence_st_release: +; RV32I: # %bb.0: +; RV32I-NEXT: # Compiler Barrier: release syncscope(singlethread) +; RV32I-NEXT: ret +; +; RV64I-LABEL: fence_st_release: +; RV64I: # %bb.0: +; RV64I-NEXT: # Compiler Barrier: release syncscope(singlethread) +; RV64I-NEXT: ret + fence syncscope("singlethread") release + ret void +} + +define void @fence_st_acq_rel() nounwind { +; RV32I-LABEL: fence_st_acq_rel: +; RV32I: # %bb.0: +; RV32I-NEXT: # Compiler Barrier: acq_rel syncscope(singlethread) +; RV32I-NEXT: ret +; +; RV64I-LABEL: fence_st_acq_rel: +; RV64I: # %bb.0: +; RV64I-NEXT: # Compiler Barrier: acq_rel syncscope(singlethread) +; RV64I-NEXT: ret + fence syncscope("singlethread") acq_rel + ret void +} + +define void @fence_st_seq_cst() nounwind { +; RV32I-LABEL: fence_st_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: # Compiler Barrier: seq_cst syncscope(singlethread) +; RV32I-NEXT: ret +; +; RV64I-LABEL: fence_st_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: # Compiler Barrier: seq_cst syncscope(singlethread) +; RV64I-NEXT: ret + fence syncscope("singlethread") seq_cst + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-fence-02.ll b/llvm/test/CodeGen/SystemZ/atomic-fence-02.ll --- a/llvm/test/CodeGen/SystemZ/atomic-fence-02.ll +++ b/llvm/test/CodeGen/SystemZ/atomic-fence-02.ll @@ -3,11 +3,11 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s define void @test() { -; CHECK: #MEMBARRIER +; CHECK: # Compiler Barrier: acquire fence acquire -; CHECK: #MEMBARRIER +; CHECK: # Compiler Barrier: release fence release -; CHECK: #MEMBARRIER +; CHECK: # Compiler Barrier: acq_rel fence acq_rel ret void } diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll --- a/llvm/test/CodeGen/X86/atomic-idempotent.ll +++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll @@ -342,17 +342,19 @@ define void @or32_nouse_monotonic(i32* %p) { ; X64-LABEL: or32_nouse_monotonic: ; X64: # %bb.0: -; X64-NEXT: #MEMBARRIER +; X64-NEXT: # Compiler Barrier: monotonic ; X64-NEXT: retq ; ; X86-GENERIC-LABEL: or32_nouse_monotonic: ; X86-GENERIC: # %bb.0: -; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: # Compiler Barrier: monotonic ; X86-GENERIC-NEXT: retl ; ; X86-ATOM-LABEL: or32_nouse_monotonic: ; X86-ATOM: # %bb.0: -; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: # Compiler Barrier: monotonic +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop @@ -368,17 +370,19 @@ define void @or32_nouse_acquire(i32* %p) { ; X64-LABEL: or32_nouse_acquire: ; X64: # %bb.0: -; X64-NEXT: #MEMBARRIER +; X64-NEXT: # Compiler Barrier: acquire ; X64-NEXT: retq ; ; X86-GENERIC-LABEL: or32_nouse_acquire: ; X86-GENERIC: # %bb.0: -; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: # Compiler Barrier: acquire ; X86-GENERIC-NEXT: retl ; ; X86-ATOM-LABEL: or32_nouse_acquire: ; X86-ATOM: # %bb.0: -; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: # Compiler Barrier: acquire +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop @@ -393,17 +397,19 @@ define void @or32_nouse_release(i32* %p) { ; X64-LABEL: or32_nouse_release: ; X64: # %bb.0: -; X64-NEXT: #MEMBARRIER +; X64-NEXT: # Compiler Barrier: release ; X64-NEXT: retq ; ; X86-GENERIC-LABEL: or32_nouse_release: ; X86-GENERIC: # %bb.0: -; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: # Compiler Barrier: release ; X86-GENERIC-NEXT: retl ; ; X86-ATOM-LABEL: or32_nouse_release: ; X86-ATOM: # %bb.0: -; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: # Compiler Barrier: release +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop @@ -418,17 +424,19 @@ define void @or32_nouse_acq_rel(i32* %p) { ; X64-LABEL: or32_nouse_acq_rel: ; X64: # %bb.0: -; X64-NEXT: #MEMBARRIER +; X64-NEXT: # Compiler Barrier: acq_rel ; X64-NEXT: retq ; ; X86-GENERIC-LABEL: or32_nouse_acq_rel: ; X86-GENERIC: # %bb.0: -; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: # Compiler Barrier: acq_rel ; X86-GENERIC-NEXT: retl ; ; X86-ATOM-LABEL: or32_nouse_acq_rel: ; X86-ATOM: # %bb.0: -; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: # Compiler Barrier: acq_rel +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop ; X86-ATOM-NEXT: nop diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -2342,7 +2342,7 @@ ; CHECK-LABEL: nofold_fence_acquire: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: acquire ; CHECK-NEXT: addq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 @@ -2356,7 +2356,7 @@ ; CHECK-LABEL: nofold_stfence: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: seq_cst syncscope(singlethread) ; CHECK-NEXT: addq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 diff --git a/llvm/test/CodeGen/X86/barrier-sse.ll b/llvm/test/CodeGen/X86/barrier-sse.ll --- a/llvm/test/CodeGen/X86/barrier-sse.ll +++ b/llvm/test/CodeGen/X86/barrier-sse.ll @@ -1,14 +1,15 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s define void @test() { +; CHECK-LABEL: test fence acquire - ; CHECK: #MEMBARRIER + ; CHECK: ## Compiler Barrier: acquire fence release - ; CHECK: #MEMBARRIER + ; CHECK: ## Compiler Barrier: release fence acq_rel - ; CHECK: #MEMBARRIER + ; CHECK: ## Compiler Barrier: acq_rel ret void } diff --git a/llvm/test/CodeGen/X86/implicit-null-check.ll b/llvm/test/CodeGen/X86/implicit-null-check.ll --- a/llvm/test/CodeGen/X86/implicit-null-check.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check.ll @@ -441,7 +441,7 @@ ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: je LBB16_1 ; CHECK-NEXT: ## %bb.2: ## %not_null -; CHECK-NEXT: ##MEMBARRIER +; CHECK-NEXT: ## Compiler Barrier: acquire ; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: retq ; CHECK-NEXT: LBB16_1: ## %is_null diff --git a/llvm/test/CodeGen/XCore/atomic.ll b/llvm/test/CodeGen/XCore/atomic.ll --- a/llvm/test/CodeGen/XCore/atomic.ll +++ b/llvm/test/CodeGen/XCore/atomic.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -march=xcore | FileCheck %s ; CHECK-LABEL: atomic_fence -; CHECK: #MEMBARRIER -; CHECK: #MEMBARRIER -; CHECK: #MEMBARRIER -; CHECK: #MEMBARRIER +; CHECK: # Compiler Barrier: acquire +; CHECK: # Compiler Barrier: release +; CHECK: # Compiler Barrier: acq_rel +; CHECK: # Compiler Barrier: seq_cst ; CHECK: retsp 0 define void @atomic_fence() nounwind { entry: @@ -23,55 +23,55 @@ ; CHECK: ldw r[[R0:[0-9]+]], dp[pool] ; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0 %0 = load atomic i32, i32* bitcast (i64* @pool to i32*) acquire, align 4 ; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: %1 = load atomic i16, i16* bitcast (i64* @pool to i16*) acquire, align 2 ; CHECK-NEXT: ld8u r11, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: %2 = load atomic i8, i8* bitcast (i64* @pool to i8*) acquire, align 1 ; CHECK-NEXT: ldw r4, dp[pool] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: %3 = load atomic i32, i32* bitcast (i64* @pool to i32*) seq_cst, align 4 ; CHECK-NEXT: ld16s r5, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: %4 = load atomic i16, i16* bitcast (i64* @pool to i16*) seq_cst, align 2 ; CHECK-NEXT: ld8u r6, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: %5 = load atomic i8, i8* bitcast (i64* @pool to i8*) seq_cst, align 1 -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: stw r[[R0]], dp[pool] store atomic i32 %0, i32* bitcast (i64* @pool to i32*) release, align 4 -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: st16 r3, r[[R1]][r[[R2]]] store atomic i16 %1, i16* bitcast (i64* @pool to i16*) release, align 2 -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: st8 r11, r[[R1]][r[[R2]]] store atomic i8 %2, i8* bitcast (i64* @pool to i8*) release, align 1 -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: stw r4, dp[pool] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: store atomic i32 %3, i32* bitcast (i64* @pool to i32*) seq_cst, align 4 -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: st16 r5, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: store atomic i16 %4, i16* bitcast (i64* @pool to i16*) seq_cst, align 2 -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: ; CHECK-NEXT: st8 r6, r[[R1]][r[[R2]]] -; CHECK-NEXT: #MEMBARRIER +; CHECK-NEXT: # Compiler Barrier: store atomic i8 %5, i8* bitcast (i64* @pool to i8*) seq_cst, align 1 ; CHECK-NEXT: ldw r[[R0]], dp[pool]