Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -964,29 +964,54 @@ /// It is called by AtomicExpandPass before expanding an /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here. /// This function should either return a nullptr, or a pointer to an IR-level /// Instruction*. Even complex fence sequences can be represented by a /// single Instruction* through an intrinsic to be lowered later. + /// Backends with !getInsertFencesForAtomic() should keep a no-op here. + /// Backends should override this method to produce target-specific intrinsic + /// for their fences. + /// FIXME: Please note that the default implementation here in terms of + /// IR-level fences exists for historical/compatibility reasons and is + /// *unsound* ! Fences cannot, in general, be used to restore sequential + /// consistency. For example, consider the following example: + /// atomic x = y = 0; + /// int r1, r2, r3, r4; + /// Thread 0: + /// x.store(1); + /// Thread 1: + /// y.store(1); + /// Thread 2: + /// r1 = x.load(); + /// r2 = y.load(); + /// Thread 3: + /// r3 = y.load(); + /// r4 = x.load(); + /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all + /// seq_cst. But if they are lowered to monotonic accesses, no amount of + /// IR-level fences can prevent it. + /// @{ virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - assert(!getInsertFencesForAtomic()); - return nullptr; + if (!getInsertFencesForAtomic()) + return nullptr; + + if (isAtLeastRelease(Ord) && IsStore) + return Builder.CreateFence(Ord); + else + return nullptr; } - /// Inserts in the IR a target-specific intrinsic specifying a fence. - /// It is called by AtomicExpandPass after expanding an - /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. - /// RMW and CmpXchg set both IsStore and IsLoad to true. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here. - /// This function should either return a nullptr, or a pointer to an IR-level - /// Instruction*. Even complex fence sequences can be represented by a - /// single Instruction* through an intrinsic to be lowered later. virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - assert(!getInsertFencesForAtomic()); - return nullptr; + if (!getInsertFencesForAtomic()) + return nullptr; + + if (isAtLeastAcquire(Ord)) + return Builder.CreateFence(Ord); + else + return nullptr; } + /// @} /// Returns true if the given (atomic) store should be expanded by the /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3604,30 +3604,6 @@ DAG.setRoot(StoreNode); } -static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, - SynchronizationScope Scope, - bool Before, SDLoc dl, - SelectionDAG &DAG, - const TargetLowering &TLI) { - // Fence, if necessary - if (Before) { - if (Order == AcquireRelease || Order == SequentiallyConsistent) - Order = Release; - else if (Order == Acquire || Order == Monotonic || Order == Unordered) - return Chain; - } else { - if (Order == AcquireRelease) - Order = Acquire; - else if (Order == Release || Order == Monotonic || Order == Unordered) - return Chain; - } - SDValue Ops[3]; - Ops[0] = Chain; - Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); -} - void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -3636,27 +3612,16 @@ SDValue InChain = getRoot(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.getInsertFencesForAtomic()) - InChain = - InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, TLI); - MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI.getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); - if (TLI.getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, - DAG, TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3683,22 +3648,17 @@ SDValue InChain = getRoot(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, DAG, TLI); - - SDValue L = DAG.getAtomic( - NT, dl, getValue(I.getValOperand()).getSimpleValueType(), InChain, - getValue(I.getPointerOperand()), getValue(I.getValOperand()), - I.getPointerOperand(), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, Scope); + SDValue L = + DAG.getAtomic(NT, dl, + getValue(I.getValOperand()).getSimpleValueType(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValOperand()), + I.getPointerOperand(), + /* Alignment=*/ 0, Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) - OutChain = - InsertFenceForAtomic(OutChain, Order, Scope, false, dl, DAG, TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3736,16 +3696,13 @@ DAG.getEVTAlignment(VT)); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = DAG.getAtomic( - ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), - MMO, TLI.getInsertFencesForAtomic() ? Monotonic : Order, Scope); + SDValue L = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), MMO, + Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3764,17 +3721,13 @@ if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI.getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, DAG, TLI); - - SDValue OutChain = DAG.getAtomic( - ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), - getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, Scope); - - if (TLI.getInsertFencesForAtomic()) - OutChain = - InsertFenceForAtomic(OutChain, Order, Scope, false, dl, DAG, TLI); + SDValue OutChain = + DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValueOperand()), + I.getPointerOperand(), I.getAlignment(), + Order, Scope); DAG.setRoot(OutChain); } Index: llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp +++ llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp @@ -178,6 +178,7 @@ void MipsPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); + addPass(createAtomicExpandPass(&getMipsTargetMachine())); if (getMipsSubtarget().os16()) addPass(createMipsOs16(getMipsTargetMachine())); if (getMipsSubtarget().inMips16HardFloat()) Index: llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp +++ llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp @@ -47,6 +47,7 @@ return getTM(); } + void addIRPasses() override; bool addInstSelector() override; bool addPreEmitPass() override; }; @@ -56,6 +57,12 @@ return new SparcPassConfig(this, PM); } +void SparcPassConfig::addIRPasses() { + addPass(createAtomicExpandPass(&getSparcTargetMachine())); + + TargetPassConfig::addIRPasses(); +} + bool SparcPassConfig::addInstSelector() { addPass(createSparcISelDag(getSparcTargetMachine())); return false; Index: llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp +++ llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp @@ -41,6 +41,7 @@ return getTM(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addPreEmitPass() override; @@ -51,6 +52,12 @@ return new XCorePassConfig(this, PM); } +void XCorePassConfig::addIRPasses() { + addPass(createAtomicExpandPass(&getXCoreTargetMachine())); + + TargetPassConfig::addIRPasses(); +} + bool XCorePassConfig::addPreISel() { addPass(createXCoreLowerThreadLocalPass()); return false; Index: llvm/trunk/test/CodeGen/XCore/atomic.ll =================================================================== --- llvm/trunk/test/CodeGen/XCore/atomic.ll +++ llvm/trunk/test/CodeGen/XCore/atomic.ll @@ -22,11 +22,10 @@ ; CHECK-LABEL: atomicloadstore ; CHECK: ldw r[[R0:[0-9]+]], dp[pool] -; CHECK-NEXT: #MEMBARRIER - %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4 - ; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool] +; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0 + %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4 ; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]] ; CHECK-NEXT: #MEMBARRIER