Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -44,6 +44,8 @@ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad); bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicLoadToLL(LoadInst *LI); + bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); bool expandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); @@ -160,6 +162,15 @@ } bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { + if (TM->getSubtargetImpl() + ->getTargetLowering() + ->hasLoadLinkedStoreConditional()) + return expandAtomicLoadToLL(LI); + else + return expandAtomicLoadToCmpXchg(LI); +} + +bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); IRBuilder<> Builder(LI); @@ -174,6 +185,24 @@ return true; } +bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { + IRBuilder<> Builder(LI); + AtomicOrdering Order = LI->getOrdering(); + Value *Addr = LI->getPointerOperand(); + Type *Ty = cast(Addr->getType())->getElementType(); + Constant *DummyVal = Constant::getNullValue(Ty); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, DummyVal, DummyVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); + + LI->replaceAllUsesWith(Loaded); + LI->eraseFromParent(); + + return true; +} + bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -17182,8 +17182,11 @@ return needsCmpXchgNb(SI->getValueOperand()->getType()); } -bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *SI) const { - return false; // FIXME, currently these are expanded separately in this file. +// Note: this turns large loads into lock cmpxchg8b/16b. +// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. +bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + auto PTy = cast(LI->getPointerOperand()->getType()); + return needsCmpXchgNb(PTy->getElementType()); } bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { @@ -17560,29 +17563,6 @@ } } -static void ReplaceATOMIC_LOAD(SDNode *Node, - SmallVectorImpl &Results, - SelectionDAG &DAG) { - SDLoc dl(Node); - EVT VT = cast(Node)->getMemoryVT(); - - // Convert wide load -> cmpxchg8b/cmpxchg16b - // FIXME: On 32-bit, load -> fild or movq would be more efficient - // (The only way to get a 16-byte load is cmpxchg16b) - // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment. - SDValue Zero = DAG.getConstant(0, VT); - SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); - SDValue Swap = - DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs, - Node->getOperand(0), Node->getOperand(1), Zero, Zero, - cast(Node)->getMemOperand(), - cast(Node)->getOrdering(), - cast(Node)->getOrdering(), - cast(Node)->getSynchScope()); - Results.push_back(Swap.getValue(0)); - Results.push_back(Swap.getValue(2)); -} - /// ReplaceNodeResults - Replace a node with an illegal result type /// with a new node built out of custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, @@ -17741,12 +17721,10 @@ case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: { // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; - case ISD::ATOMIC_LOAD: { - ReplaceATOMIC_LOAD(N, Results, DAG); - return; } case ISD::BITCAST: { assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); Index: test/CodeGen/X86/atomic-load-store-wide.ll =================================================================== --- test/CodeGen/X86/atomic-load-store-wide.ll +++ test/CodeGen/X86/atomic-load-store-wide.ll @@ -4,16 +4,18 @@ ; FIXME: The generated code can be substantially improved. define void @test1(i64* %ptr, i64 %val1) { -; CHECK: test1 -; CHECK: cmpxchg8b +; CHECK-LABEL: test1 +; CHECK: lock +; CHECK-NEXT: cmpxchg8b ; CHECK-NEXT: jne store atomic i64 %val1, i64* %ptr seq_cst, align 8 ret void } define i64 @test2(i64* %ptr) { -; CHECK: test2 -; CHECK: cmpxchg8b +; CHECK-LABEL: test2 +; CHECK: lock +; CHECK-NEXT: cmpxchg8b %val = load atomic i64* %ptr seq_cst, align 8 ret i64 %val }