diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2468,10 +2468,28 @@ // Fence instruction simplification Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { - // Remove identical consecutive fences. - Instruction *Next = FI.getNextNonDebugInstruction(); - if (auto *NFI = dyn_cast(Next)) - if (FI.isIdenticalTo(NFI)) + auto *NFI = dyn_cast(FI.getNextNonDebugInstruction()); + // This check is solely here to handle arbitrary target-dependent syncscopes. + // TODO: Can remove if does not matter in practice. + if (NFI && FI.isIdenticalTo(NFI)) + return eraseInstFromFunction(FI); + + // Returns true if FI1 is identical or stronger fence than FI2. + auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) { + auto FI1SyncScope = FI1->getSyncScopeID(); + // Consider same scope, where scope is global or single-thread. + if (FI1SyncScope != FI2->getSyncScopeID() || + (FI1SyncScope != SyncScope::System && + FI1SyncScope != SyncScope::SingleThread)) + return false; + + return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering()); + }; + if (NFI && isIdenticalOrStrongerFence(NFI, &FI)) + return eraseInstFromFunction(FI); + + if (auto *PFI = dyn_cast_or_null(FI.getPrevNonDebugInstruction())) + if (isIdenticalOrStrongerFence(PFI, &FI)) return eraseInstFromFunction(FI); return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/consecutive-fences.ll b/llvm/test/Transforms/InstCombine/consecutive-fences.ll --- a/llvm/test/Transforms/InstCombine/consecutive-fences.ll +++ b/llvm/test/Transforms/InstCombine/consecutive-fences.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: define void @tinkywinky ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: fence syncscope("singlethread") acquire +; CHECK-NEXT: fence syncscope("singlethread") acquire ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -18,6 +18,17 @@ ret void } +; Arbitrary target dependent scope +; Is this transform really needed? +; CHECK-LABEL: test_target_dependent_scope +; CHECK-NEXT: fence syncscope("MSP430") acquire +; CHECK-NEXT: ret void +define void @test_target_dependent_scope() { + fence syncscope("MSP430") acquire + fence syncscope("MSP430") acquire + ret void +} + ; CHECK-LABEL: define void @dipsy ; CHECK-NEXT: fence seq_cst ; CHECK-NEXT: fence syncscope("singlethread") seq_cst @@ -31,9 +42,6 @@ } ; CHECK-LABEL: define void @patatino -; CHECK-NEXT: fence acquire -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: fence acquire ; CHECK-NEXT: fence seq_cst ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -46,6 +54,47 @@ ret void } +; CHECK-LABEL: define void @weaker_fence_1 +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void +define void @weaker_fence_1() { + fence seq_cst + fence release + fence seq_cst + ret void +} + +; CHECK-LABEL: define void @weaker_fence_2 +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void +define void @weaker_fence_2() { + fence seq_cst + fence release + fence seq_cst + fence acquire + ret void +} + +; Although acquire is a weaker ordering than seq_cst, it has a system scope, +; compare to singlethread scope in seq_cst. +; CHECK-LABEL: acquire_global_neg_test +; CHECK-NEXT: fence acquire +; CHECK-NEXT: fence syncscope("singlethread") seq_cst +define void @acquire_global_neg_test() { + fence acquire + fence acquire + fence syncscope("singlethread") seq_cst + ret void +} + +; CHECK-LABEL: acquire_single_thread_scope +; CHECK-NEXT: fence syncscope("singlethread") seq_cst +define void @acquire_single_thread_scope() { + fence syncscope("singlethread") acquire + fence syncscope("singlethread") seq_cst + ret void +} + ; CHECK-LABEL: define void @debug ; CHECK-NOT: fence ; CHECK: call void @llvm.dbg.value