diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1075,13 +1075,16 @@ } MemoryAccess *UseAccess = WorkList[I]; - // Simply adding the users of MemoryPhi to the worklist is not enough, - // because we might miss read clobbers in different iterations of a loop, - // for example. - // TODO: Add support for phi translation to handle the loop case. - if (isa(UseAccess)) - return false; + if (isa(UseAccess)) { + // AliasAnalysis does not account for loops. Limit elimination to + // candidates for which we can guarantee they always store to the same + // memory location. + if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr)) + return false; + PushMemUses(cast(UseAccess)); + continue; + } // TODO: Checking for aliasing is expensive. Consider reducing the amount // of times this is called and/or caching it. Instruction *UseInst = cast(UseAccess)->getMemoryInst(); diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll --- a/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll @@ -138,8 +138,6 @@ ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void @@ -177,12 +175,8 @@ ; CHECK-NEXT: call void @readonly_use(i32* [[P]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll b/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll --- a/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll +++ b/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -dse -S %s | FileCheck %s -; TODO: Both the stores in %then and %else can be eliminated by translating %p +; Both the stores in %then and %else can be eliminated by translating %p ; through the phi. define void @memoryphi_translate_1(i1 %c) { ; CHECK-LABEL: @memoryphi_translate_1( @@ -10,10 +10,8 @@ ; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ] @@ -39,7 +37,7 @@ ret void } -; TODO: The store in %else can be eliminated by translating %p through the phi. +; The store in %else can be eliminated by translating %p through the phi. ; The store in %then cannot be eliminated, because %a.1 is read before the final ; store. define i8 @memoryphi_translate_2(i1 %c) { @@ -52,7 +50,6 @@ ; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ] @@ -80,7 +77,7 @@ ret i8 %l } -; TODO: The store in %then can be eliminated by translating %p through the phi. +; The store in %then can be eliminated by translating %p through the phi. ; The store in %else cannot be eliminated, because %a.2 is read before the final ; store. define i8 @memoryphi_translate_3(i1 %c) { @@ -90,7 +87,6 @@ ; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1 @@ -166,11 +162,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[B:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[C:%.*]] = alloca i8, align 1 ; CHECK-NEXT: store i8 0, i8* [[A]], align 1 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] ; CHECK: cond.true: -; CHECK-NEXT: store i8 0, i8* [[C]], align 1 ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: ; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[B]], [[COND_TRUE]] ], [ [[A]], [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -225,12 +225,9 @@ define void @test4_non_local(i8 *%P, i1 %c) { ; CHECK-LABEL: @test4_non_local( -; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 -; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[CALL:%.*]], label [[EXIT:%.*]] ; CHECK: call: -; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[P]]) +; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[P:%.*]]) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void