Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -1097,6 +1097,14 @@ } /// @} + // Emits code that executes when the comparison result in the ll/sc + // expansion of a cmpxchg instruction is such that the store-conditional will + // not execute. This makes it possible to balance out the load-linked with + // a dedicated instruction, if desired. + // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would + // be unnecessarily held, except if clrex, inserted by this hook, is executed. + virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} + /// Returns true if the given (atomic) store should be expanded by the /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { Index: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp @@ -374,7 +374,7 @@ // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure + // label %cmpxchg.nostore // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 @@ -382,6 +382,9 @@ // cmpxchg.success: // fence? // br label %cmpxchg.end + // cmpxchg.nostore: + // @load_linked_fail_balance()? + // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end @@ -392,7 +395,8 @@ // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); @@ -416,7 +420,7 @@ // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( @@ -432,6 +436,13 @@ /*IsLoad=*/true); Builder.CreateBr(ExitBB); + Builder.SetInsertPoint(NoStoreBB); + // In the failing case, where we don't execute the store-conditional, the + // target might want to balance out the load-linked with a dedicated + // instruction (e.g., on ARM, clearing the exclusive monitor). + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); + Builder.CreateBr(FailureBB); + Builder.SetInsertPoint(FailureBB); TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, /*IsLoad=*/true); Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -348,6 +348,8 @@ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; + void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; + TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9682,6 +9682,13 @@ cast(Addr->getType())->getElementType()); } +void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( + IRBuilder<> &Builder) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Builder.CreateCall( + llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); +} + Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Index: llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll @@ -2,13 +2,17 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { ; CHECK-LABEL: val_compare_and_swap: -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: -; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: cmp [[RESULT]], w1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x0] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] +; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -17,13 +21,16 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { ; CHECK-LABEL: val_compare_and_swap_from_load: ; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[RESULT]], w1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %new = load i32, i32* %pnew %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 @@ -32,13 +39,17 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { ; CHECK-LABEL: val_compare_and_swap_rel: -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: -; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]] ; CHECK-NEXT: cmp [[RESULT]], w1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x0] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] +; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]] +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -47,13 +58,16 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 { ; CHECK-LABEL: val_compare_and_swap_64: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: cmp [[RESULT]], x1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val @@ -61,13 +75,13 @@ define i32 @fetch_and_nand(i32* %p) #0 { ; CHECK-LABEL: fetch_and_nand: -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0] ; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]] ; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8 ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] ; CHECK: mov x0, x[[DEST_REG]] %val = atomicrmw nand i32* %p, i32 7 release ret i32 %val @@ -76,12 +90,12 @@ define i64 @fetch_and_nand_64(i64* %p) #0 { ; CHECK-LABEL: fetch_and_nand_64: ; CHECK: mov x[[ADDR:[0-9]+]], x0 -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]] ; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]] ; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8 ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] %val = atomicrmw nand i64* %p, i64 7 acq_rel ret i64 %val @@ -90,12 +104,12 @@ define i32 @fetch_and_or(i32* %p) #0 { ; CHECK-LABEL: fetch_and_or: ; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5 -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0] ; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]] ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] ; CHECK: mov x0, x[[DEST_REG]] %val = atomicrmw or i32* %p, i32 5 seq_cst ret i32 %val @@ -104,11 +118,11 @@ define i64 @fetch_and_or_64(i64* %p) #0 { ; CHECK: fetch_and_or_64: ; CHECK: mov x[[ADDR:[0-9]+]], x0 -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]] ; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7 ; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] %val = atomicrmw or i64* %p, i64 7 monotonic ret i64 %val } Index: llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll +++ llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll @@ -893,6 +893,8 @@ ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb ; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] @@ -916,6 +918,8 @@ ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb ; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] @@ -927,21 +931,21 @@ %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic %old = extractvalue { i32, i1 } %pair, 0 +; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0 + ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]] ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } @@ -963,6 +967,8 @@ ; As above, w1 is a reasonable guess. ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb ; CHECK: str x[[OLD]],