diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -510,11 +510,14 @@ ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc, AAQueryInfo &AAQI) { - // All we know about a fence instruction is what we get from the ModRef - // mask: if Loc is a constant memory location, the fence definitely could - // not modify it. - if (Loc.Ptr) + // If Loc is a constant memory location (given to us via the ModRef mask) or + // cannot have a pointer outside this function mutating it, the fence cannot + // modify it. + if (Loc.Ptr) { + if (AAQI.CI->isNotCapturedBeforeOrAt(Loc.Ptr, S)) + return ModRefInfo::Ref; return getModRefInfoMask(Loc); + } return ModRefInfo::ModRef; } diff --git a/llvm/test/Transforms/EarlyCSE/fence.ll b/llvm/test/Transforms/EarlyCSE/fence.ll --- a/llvm/test/Transforms/EarlyCSE/fence.ll +++ b/llvm/test/Transforms/EarlyCSE/fence.ll @@ -44,9 +44,9 @@ ; fence. Note that it would be legal to reorder '%a' after the fence ; and then remove '%a2'. The current implementation doesn't know how ; to do this, but if it learned, this test will need revised. -define i32 @test3(ptr noalias %addr.i, ptr noalias %otheraddr) { +define i32 @test3(ptr %addr.i) { ; CHECK-LABEL: define i32 @test3 -; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) { +; CHECK-SAME: (ptr [[ADDR_I:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[A2:%.*]] = load i32, ptr [[ADDR_I]], align 4 @@ -60,6 +60,22 @@ ret i32 %res } +; This can be optimized because another thread modifying %addr.i would be UB +; given that we've marked %addr.i as noalias. +define i32 @test3_noalias(ptr noalias %addr.i) { +; CHECK-LABEL: define i32 @test3_noalias +; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4 +; CHECK-NEXT: fence acquire +; CHECK-NEXT: ret i32 0 +; + %a = load i32, ptr %addr.i, align 4 + fence acquire + %a2 = load i32, ptr %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + ; We can not dead store eliminate accross the fence. We could in ; principal reorder the second store above the fence and then DSE either ; store, but this is beyond the simple last-store DSE which EarlyCSE diff --git a/llvm/test/Transforms/GVN/fence.ll b/llvm/test/Transforms/GVN/fence.ll --- a/llvm/test/Transforms/GVN/fence.ll +++ b/llvm/test/Transforms/GVN/fence.ll @@ -37,9 +37,9 @@ ; ordering property (though it is that too), but a liveness ; property. We expect to eventually see the value of store by ; another thread when spinning on that location. -define i32 @test3(ptr noalias %addr.i, ptr noalias %otheraddr) { +define i32 @test3(ptr %addr.i) { ; CHECK-LABEL: define i32 @test3 -; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]], ptr noalias [[OTHERADDR:%.*]]) { +; CHECK-SAME: (ptr [[ADDR_I:%.*]]) { ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[ADDR_I]], align 4 ; CHECK-NEXT: fence acquire @@ -60,6 +60,23 @@ ret i32 %res } +; As opposed to @test3, this can be optimized away because another thread +; modifying %addr.i would be UB given that we've marked %addr.i as noalias. +define i32 @test3_noalias(ptr noalias %addr.i) { +; CHECK-LABEL: define i32 @test3_noalias +; CHECK-SAME: (ptr noalias [[ADDR_I:%.*]]) { +; CHECK-NEXT: fence acquire +; CHECK-NEXT: fence acquire +; CHECK-NEXT: ret i32 0 +; + fence acquire + %a = load i32, ptr %addr.i, align 4 + fence acquire + %a2 = load i32, ptr %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + ; We can forward the value forward the load ; across both the fences, because the load is from ; a constant memory location.