diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2468,10 +2468,28 @@ // Fence instruction simplification Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { + auto *NFI = dyn_cast(FI.getNextNonDebugInstruction()); // Remove identical consecutive fences. - Instruction *Next = FI.getNextNonDebugInstruction(); - if (auto *NFI = dyn_cast(Next)) - if (FI.isIdenticalTo(NFI)) + if (NFI && FI.isIdenticalTo(NFI)) + return eraseInstFromFunction(FI); + + // If the current instruction is a weaker ordering fence consecutive to a + // stronger fence of the same scope, then we can remove the weaker fence. + // We consider only system scope for now, instead of dealing with + // single-thread and target dependent scopes. + auto isSystemScopeFence = [](FenceInst *F) { + return F->getSyncScopeID() == SyncScope::System; + }; + if (!isSystemScopeFence(&FI)) + return nullptr; + + if (NFI && isSystemScopeFence(NFI) && + isStrongerThan(NFI->getOrdering(), FI.getOrdering())) + return eraseInstFromFunction(FI); + + if (auto *PFI = dyn_cast_or_null(FI.getPrevNonDebugInstruction())) + if (isSystemScopeFence(PFI) && + isStrongerThan(PFI->getOrdering(), FI.getOrdering())) return eraseInstFromFunction(FI); return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/consecutive-fences.ll b/llvm/test/Transforms/InstCombine/consecutive-fences.ll --- a/llvm/test/Transforms/InstCombine/consecutive-fences.ll +++ b/llvm/test/Transforms/InstCombine/consecutive-fences.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: define void @tinkywinky ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: fence syncscope("singlethread") acquire +; CHECK-NEXT: fence syncscope("singlethread") acquire ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -31,9 +31,6 @@ } ; CHECK-LABEL: define void @patatino -; CHECK-NEXT: fence acquire -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: fence acquire ; CHECK-NEXT: fence seq_cst ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -46,6 +43,49 @@ ret void } +; CHECK-LABEL: define void @weaker_fence_1 +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void +define void @weaker_fence_1() { + fence seq_cst + fence release + fence seq_cst + ret void +} + +; CHECK-LABEL: define void @weaker_fence_2 +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void +define void @weaker_fence_2() { + fence seq_cst + fence release + fence seq_cst + fence acquire + ret void +} + +; Although acquire is a weaker ordering than seq_cst, it has a system scope, +; compare to singlethread scope in seq_cst. +; CHECK-LABEL: acquire_global_neg_test +; CHECK-NEXT: fence acquire +; CHECK-NEXT: fence syncscope("singlethread") seq_cst +define void @acquire_global_neg_test() { + fence acquire + fence acquire + fence syncscope("singlethread") seq_cst + ret void +} + +; TODO: We could remove the fence acquire which is at same scope as seq_cst +; CHECK-LABEL: acquire_single_thread_scope +; CHECK-NEXT: fence syncscope("singlethread") acquire +; CHECK-NEXT: fence syncscope("singlethread") seq_cst +define void @acquire_single_thread_scope() { + fence syncscope("singlethread") acquire + fence syncscope("singlethread") seq_cst + ret void +} + ; CHECK-LABEL: define void @debug ; CHECK-NOT: fence ; CHECK: call void @llvm.dbg.value