Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -967,28 +967,51 @@ /// It is called by AtomicExpandPass before expanding an /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here. /// This function should either return a nullptr, or a pointer to an IR-level /// Instruction*. Even complex fence sequences can be represented by a /// single Instruction* through an intrinsic to be lowered later. + /// Backends with !getInsertFencesForAtomic() should keep a no-op here. + /// Backends should override this method to produce target-specific intrinsic + /// for their fences. + /// FIXME: Please note that the default implementation here in terms of + /// IR-level fences exists for historical/compatibility reasons and is + /// *unsound* ! Fences cannot, in general, be used to restore sequential + /// consistency. For example, consider the following example: + /// atomic x = y = 0; + /// int r1, r2, r3, r4; + /// Thread 0: + /// x.store(1); + /// Thread 1: + /// y.store(1); + /// Thread 2: + /// r1 = x.load(); + /// r2 = y.load(); + /// Thread 3: + /// r3 = y.load(); + /// r4 = x.load(); + /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all + /// seq_cst. But if they are lowered to monotonic accesses, no amount of + /// IR-level fences can prevent it. virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - assert(!getInsertFencesForAtomic()); - return nullptr; + if (!getInsertFencesForAtomic()) + return nullptr; + + if (isAtLeastRelease(Ord) && IsStore) + return Builder.CreateFence(Ord); + else + return nullptr; } - /// Inserts in the IR a target-specific intrinsic specifying a fence. - /// It is called by AtomicExpandPass after expanding an - /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. - /// RMW and CmpXchg set both IsStore and IsLoad to true. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here. - /// This function should either return a nullptr, or a pointer to an IR-level - /// Instruction*. Even complex fence sequences can be represented by a - /// single Instruction* through an intrinsic to be lowered later. virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - assert(!getInsertFencesForAtomic()); - return nullptr; + if (!getInsertFencesForAtomic()) + return nullptr; + + if (isAtLeastAcquire(Ord)) + return Builder.CreateFence(Ord); + else + return nullptr; } /// Returns true if the given (atomic) store should be expanded by the Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3640,30 +3640,6 @@ DAG.setRoot(StoreNode); } -static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, - SynchronizationScope Scope, - bool Before, SDLoc dl, - SelectionDAG &DAG, - const TargetLowering &TLI) { - // Fence, if necessary - if (Before) { - if (Order == AcquireRelease || Order == SequentiallyConsistent) - Order = Release; - else if (Order == Acquire || Order == Monotonic || Order == Unordered) - return Chain; - } else { - if (Order == AcquireRelease) - Order = Acquire; - else if (Order == Release || Order == Monotonic || Order == Unordered) - return Chain; - } - SDValue Ops[3]; - Ops[0] = Chain; - Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); -} - void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -3672,11 +3648,6 @@ SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, - DAG, *TLI); - MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( @@ -3684,15 +3655,10 @@ getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); + SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3719,11 +3685,6 @@ SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue L = DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getSimpleValueType(), @@ -3731,15 +3692,10 @@ getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3780,15 +3736,10 @@ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), MMO, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3807,22 +3758,13 @@ if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); - - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); + Order, Scope); DAG.setRoot(OutChain); } Index: lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- lib/Target/Mips/MipsTargetMachine.cpp +++ lib/Target/Mips/MipsTargetMachine.cpp @@ -138,6 +138,7 @@ void MipsPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); + addPass(createAtomicExpandPass(&getMipsTargetMachine())); if (getMipsSubtarget().os16()) addPass(createMipsOs16(getMipsTargetMachine())); if (getMipsSubtarget().inMips16HardFloat()) Index: lib/Target/Sparc/SparcTargetMachine.cpp =================================================================== --- lib/Target/Sparc/SparcTargetMachine.cpp +++ lib/Target/Sparc/SparcTargetMachine.cpp @@ -47,6 +47,7 @@ return getTM(); } + void addIRPasses() override; bool addInstSelector() override; bool addPreEmitPass() override; }; @@ -56,6 +57,12 @@ return new SparcPassConfig(this, PM); } +void SparcPassConfig::addIRPasses() { + addPass(createAtomicExpandPass(&getSparcTargetMachine())); + + TargetPassConfig::addIRPasses(); +} + bool SparcPassConfig::addInstSelector() { addPass(createSparcISelDag(getSparcTargetMachine())); return false; Index: lib/Target/XCore/XCoreTargetMachine.cpp =================================================================== --- lib/Target/XCore/XCoreTargetMachine.cpp +++ lib/Target/XCore/XCoreTargetMachine.cpp @@ -41,6 +41,7 @@ return getTM(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addPreEmitPass() override; @@ -51,6 +52,12 @@ return new XCorePassConfig(this, PM); } +void XCorePassConfig::addIRPasses() { + addPass(createAtomicExpandPass(&getXCoreTargetMachine())); + + TargetPassConfig::addIRPasses(); +} + bool XCorePassConfig::addPreISel() { addPass(createXCoreLowerThreadLocalPass()); return false; Index: test/CodeGen/XCore/atomic.ll =================================================================== --- test/CodeGen/XCore/atomic.ll +++ test/CodeGen/XCore/atomic.ll @@ -22,11 +22,10 @@ ; CHECK-LABEL: atomicloadstore ; CHECK: ldw r[[R0:[0-9]+]], dp[pool] -; CHECK-NEXT: #MEMBARRIER - %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4 - ; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool] +; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0 + %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4 ; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]] ; CHECK-NEXT: #MEMBARRIER