diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1459,6 +1459,8 @@ // accesses are executed before another access. DenseMap PostOrderNumbers; + DenseMap IOLs; + DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} @@ -1692,6 +1694,9 @@ } } + auto I = IOLs.find(DeadInst->getParent()); + if (I != IOLs.end()) + I->second.erase(DeadInst); // Remove it's operands for (Use &O : DeadInst->operands()) if (Instruction *OpI = dyn_cast(O)) { @@ -1846,7 +1851,10 @@ // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; - InstOverlapIntervalsTy IOL; + auto Iter = State.IOLs.insert( + std::make_pair( + NI->getParent(), InstOverlapIntervalsTy())); + auto &IOL = Iter.first->second; OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, InstWriteOffset, NI, IOL, AA, &F); @@ -1861,6 +1869,10 @@ } } + if (EnablePartialOverwriteTracking) + for (auto &KV : State.IOLs) + MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); + return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s define void @write4to7(i32* nocapture %p) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -1,7 +1,6 @@ ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. -; XFAIL: * ; RUN: opt -S -march=native -dse -enable-dse-memoryssa < %s| FileCheck %s ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -76,7 +76,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -43,7 +43,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck %s + + +%struct.ham = type { [3 x double], [3 x double]} + +declare void @may_throw() +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +define void @overlap1(%struct.ham* %arg, i1 %cond) { +; CHECK-LABEL: @overlap1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_HAM:%.*]], %struct.ham* [[ARG:%.*]], i64 0, i32 0, i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i32 0 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB7:%.*]], label [[BB8:%.*]] +; CHECK: bb7: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: store double 1.000000e+00, double* [[TMP2]], align 8 +; CHECK-NEXT: store double 2.000000e+00, double* [[TMP1]], align 8 +; CHECK-NEXT: store double 3.000000e+00, double* [[TMP]], align 8 +; CHECK-NEXT: store double 4.000000e+00, double* [[TMP5]], align 8 +; CHECK-NEXT: store double 5.000000e+00, double* [[TMP4]], align 8 +; CHECK-NEXT: store double 6.000000e+00, double* [[TMP3]], align 8 +; CHECK-NEXT: ret void +; +bb: + %tmp = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 2 + %tmp1 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 1 + %tmp2 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 0 + %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0,i32 1, i64 2 + %tmp4 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i64 1 + %tmp5 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i32 0 + %tmp6 = bitcast double* %tmp2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %tmp6, i8 0, i64 48, i1 false) + br i1 %cond, label %bb7, label %bb8 + +bb7: ; preds = %bb + br label %bb9 + +bb8: ; preds = %bb + br label %bb9 + +bb9: ; preds = %bb8, %bb7 + store double 1.0, double* %tmp2, align 8 + store double 2.0, double* %tmp1, align 8 + store double 3.0, double* %tmp, align 8 + store double 4.0, double* %tmp5, align 8 + store double 5.0, double* %tmp4, align 8 + store double 6.0, double* %tmp3, align 8 + ret void +} + +define void @overlap2(%struct.ham* %arg, i1 %cond) { +; CHECK-LABEL: @overlap2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_HAM:%.*]], %struct.ham* [[ARG:%.*]], i64 0, i32 0, i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) [[TMP6]], i8 0, i64 48, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB7:%.*]], label [[BB8:%.*]] +; CHECK: bb7: +; CHECK-NEXT: call void @may_throw() +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: store double 1.000000e+00, double* [[TMP2]], align 8 +; CHECK-NEXT: store double 2.000000e+00, double* [[TMP1]], align 8 +; CHECK-NEXT: store double 3.000000e+00, double* [[TMP]], align 8 +; CHECK-NEXT: store double 4.000000e+00, double* [[TMP5]], align 8 +; CHECK-NEXT: store double 5.000000e+00, double* [[TMP4]], align 8 +; CHECK-NEXT: store double 6.000000e+00, double* [[TMP3]], align 8 +; CHECK-NEXT: ret void +; +bb: + %tmp = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 2 + %tmp1 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 1 + %tmp2 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 0 + %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0,i32 1, i64 2 + %tmp4 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i64 1 + %tmp5 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i32 0 + %tmp6 = bitcast double* %tmp2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %tmp6, i8 0, i64 48, i1 false) + br i1 %cond, label %bb7, label %bb8 + +bb7: ; preds = %bb + call void @may_throw() + br label %bb9 + +bb8: ; preds = %bb + br label %bb9 + +bb9: ; preds = %bb8, %bb7 + store double 1.0, double* %tmp2, align 8 + store double 2.0, double* %tmp1, align 8 + store double 3.0, double* %tmp, align 8 + store double 4.0, double* %tmp5, align 8 + store double 5.0, double* %tmp4, align 8 + store double 6.0, double* %tmp3, align 8 + ret void +} + +