diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -807,6 +807,19 @@ return callCapturesBefore(I, MemoryLocation(P, Size), DT); } + ModRefInfo callKnownNoCapture(const Instruction *I, + const MemoryLocation &MemLoc, + DominatorTree *DT) { + AAQueryInfo AAQIP; + return callKnownNoCapture(I, MemLoc, DT, AAQIP); + } + + /// A convenience wrapper to synthesize a memory location. + ModRefInfo callKnownNoCapture(const Instruction *I, const Value *P, + LocationSize Size, DominatorTree *DT) { + return callKnownNoCapture(I, MemoryLocation(P, Size), DT); + } + /// @} //===--------------------------------------------------------------------===// /// \name Higher level methods for querying mod/ref information. @@ -871,6 +884,11 @@ ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, AAQueryInfo &AAQIP); + ModRefInfo callKnownNoCapture(const Instruction *I, + const MemoryLocation &MemLoc, DominatorTree *DT, + AAQueryInfo &AAQIP); + ModRefInfo callCaptures(const CallBase *Call, const Value *Object, + DominatorTree &DT, AAQueryInfo &AAQIP); class Concept; diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -735,6 +735,30 @@ /* include Object */ true)) return ModRefInfo::ModRef; + return callCaptures(Call, Object, *DT, AAQI); +} + +ModRefInfo AAResults::callKnownNoCapture(const Instruction *I, + const MemoryLocation &MemLoc, + DominatorTree *DT, AAQueryInfo &AAQI) { + if (!DT) + return ModRefInfo::ModRef; + + const Value *Object = getUnderlyingObject(MemLoc.Ptr); + if (!isIdentifiedFunctionLocal(Object)) + return ModRefInfo::ModRef; + + const auto *Call = dyn_cast(I); + if (!Call || Call == Object) + return ModRefInfo::ModRef; + + return callCaptures(Call, Object, *DT, AAQI); +} + +/// Return information about whether a particular call site \p I modifies +/// or reads the specified memory object \p Object +ModRefInfo AAResults::callCaptures(const CallBase *Call, const Value *Object, + DominatorTree &DT, AAQueryInfo &AAQI) { unsigned ArgNo = 0; ModRefInfo R = ModRefInfo::NoModRef; bool IsMustAlias = true; diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1318,12 +1318,23 @@ return false; } } + ModRefInfo MR = BatchAA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isRefSet(MR)) { + if (auto *CI = dyn_cast(UseInst)) { + auto *DefUO = getUnderlyingObject(DefLoc.Ptr); + if (DefUO && isa(DefUO) && + !MaybeCapturedBefore(DefUO, UseInst)) { + assert(!PointerMayBeCapturedBefore(DefLoc.Ptr, false, true, UseInst, + &DT, false, 0, &this->LI) && + "cached analysis disagrees with fresh " + "PointerMayBeCapturedBefore"); + MR = AA.callKnownNoCapture(UseInst, DefLoc, &DT); + } + } + } - // NOTE: For calls, the number of stores removed could be slightly improved - // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to - // be expensive compared to the benefits in practice. For now, avoid more - // expensive analysis to limit compile-time. - return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + return isRefSet(MR); } /// Returns true if a dependency between \p Current and \p KillingDef is diff --git a/llvm/test/Transforms/DeadStoreElimination/captures-before-call.ll b/llvm/test/Transforms/DeadStoreElimination/captures-before-call.ll --- a/llvm/test/Transforms/DeadStoreElimination/captures-before-call.ll +++ b/llvm/test/Transforms/DeadStoreElimination/captures-before-call.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: [[V2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 0, i32* [[V1]], align 4 ; CHECK-NEXT: call void @escape(i32* nonnull [[V1]]) -; CHECK-NEXT: store i32 55555, i32* [[V2]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @getval() ; CHECK-NEXT: store i32 [[CALL]], i32* [[V2]], align 4 ; CHECK-NEXT: call void @escape(i32* nonnull [[V2]]) @@ -46,7 +45,6 @@ define i32 @test_not_captured_before_call_same_bb(i32** %in.ptr) { ; CHECK-LABEL: @test_not_captured_before_call_same_bb( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: store i32 99, i32* [[A]], align 4 ; CHECK-NEXT: call void @escape_and_clobber(i32* [[A]]) @@ -63,7 +61,6 @@ define i32 @test_not_captured_before_call_same_bb_escape_unreachable_block(i32** %in.ptr) { ; CHECK-LABEL: @test_not_captured_before_call_same_bb_escape_unreachable_block( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: store i32 99, i32* [[A]], align 4 ; CHECK-NEXT: call void @escape_and_clobber(i32* [[A]]) @@ -108,7 +105,6 @@ define i32 @test_captured_after_call_same_bb_2_clobbered_later(i32** %in.ptr) { ; CHECK-LABEL: @test_captured_after_call_same_bb_2_clobbered_later( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: call void @escape_writeonly(i32* [[A]]) ; CHECK-NEXT: store i32 99, i32* [[A]], align 4 @@ -298,7 +294,6 @@ define i32 @test_not_captured_before_call_other_blocks_1(i32** %in.ptr, i1 %c.1) { ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_1( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: store i32 99, i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] @@ -330,7 +325,6 @@ define i32 @test_not_captured_before_call_other_blocks_2(i32** %in.ptr, i1 %c.1) { ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_2( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: store i32 99, i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] @@ -364,7 +358,6 @@ define i32 @test_not_captured_before_call_other_blocks_3(i32** %in.ptr, i1 %c.1) { ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_3( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: store i32 99, i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] @@ -396,7 +389,6 @@ define i32 @test_not_captured_before_call_other_blocks_4(i32** %in.ptr, i1 %c.1) { ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_4( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -433,7 +425,6 @@ ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]] ; CHECK: then: ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() @@ -466,7 +457,6 @@ ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]] ; CHECK: then: ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() @@ -501,7 +491,6 @@ ; CHECK-LABEL: @test_not_captured_before_call_other_blocks_7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: [[R:%.*]] = call i32 @getval() ; CHECK-NEXT: call void @escape_writeonly(i32* [[A]]) ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]] @@ -619,7 +608,6 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: [[A:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[EXT_A:%.*]] = bitcast i64* [[A]] to i8* -; CHECK-NEXT: store i64 0, i64* [[A]], align 8 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[EXT_A]], i8* [[SRC:%.*]], i64 8, i1 false) ; CHECK-NEXT: store i64* [[A]], i64** [[ESCAPE:%.*]], align 8 diff --git a/llvm/test/Transforms/DeadStoreElimination/captures-before-load.ll b/llvm/test/Transforms/DeadStoreElimination/captures-before-load.ll --- a/llvm/test/Transforms/DeadStoreElimination/captures-before-load.ll +++ b/llvm/test/Transforms/DeadStoreElimination/captures-before-load.ll @@ -155,7 +155,6 @@ define i32 @test_not_captured_before_load_same_bb_clobber(i32** %in.ptr) { ; CHECK-LABEL: @test_not_captured_before_load_same_bb_clobber( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 55, i32* [[A]], align 4 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: [[IN_LV_1:%.*]] = load i32*, i32** [[IN_PTR:%.*]], align 2 ; CHECK-NEXT: [[IN_LV_2:%.*]] = load i32, i32* [[IN_LV_1]], align 2