diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1626,17 +1626,23 @@ // Check for any extra throws between SI and NI that block DSE. This only // checks extra maythrows (those that arn't MemoryDef's). MemoryDef that may // throw are handled during the walk from one def to the next. -bool mayThrowBetween(Instruction *SI, Instruction *NI, const Value *SILocUnd, - SmallSetVector &InvisibleToCaller, - SmallPtrSetImpl &ThrowingBlocks) { +bool mayThrowBetween( + Instruction *SI, Instruction *NI, const Value *SILocUnd, + SmallSetVector &InvisibleToCaller, + DenseMap> &ThrowingBlocks) { // First see if we can ignore it by using the fact that SI is an alloca/alloca // like object that is not visible to the caller during execution of the // function. if (SILocUnd && InvisibleToCaller.count(SILocUnd)) return false; - if (SI->getParent() == NI->getParent()) - return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); + if (SI->getParent() == NI->getParent()) { + auto BlockMap = ThrowingBlocks.find(SI->getParent()); + if (BlockMap == ThrowingBlocks.end()) + return false; + return BlockMap->second[SI] != + BlockMap->second[&*std::prev(NI->getIterator())]; + } return !ThrowingBlocks.empty(); } @@ -1692,8 +1698,12 @@ // Keep track of all of the objects that are invisible to the caller until the // function returns. SmallSetVector InvisibleToCaller; - // Keep track of blocks with throwing instructions. - SmallPtrSet ThrowingBlocks; + // Keep track of blocks with throwing instructions. For each block with + // throwing instructions, we number all instructions in the block, starting + // with 0 and incrementing the counter for throwing instructions. If 2 + // instructions have the same number there are no throwing instructions + // between them. + DenseMap> ThrowingBlocks; // Post-order numbers for basic blocks in the function. DenseMap PostOrderNumbers; @@ -1702,9 +1712,13 @@ // post-order numbers for basic blocks. for (BasicBlock *BB : post_order(&F)) { PostOrderNumbers[BB] = PO++; + + unsigned ThrowingCnt = 0; + DenseMap BlockMap; for (Instruction &I : *BB) { if (I.mayThrow()) - ThrowingBlocks.insert(I.getParent()); + ThrowingCnt++; + BlockMap[&I] = ThrowingCnt; auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) @@ -1718,6 +1732,10 @@ (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) InvisibleToCaller.insert(&I); } + + // Only add the mapping for blocks with any throwing instructions. + if (ThrowingCnt != 0) + ThrowingBlocks[BB] = BlockMap; } // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-bgq-linux" @@ -252,6 +252,7 @@ ; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8* ; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1 ; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2 +; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 ; CHECK-NEXT: [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16* ; CHECK-NEXT: [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16* ; CHECK-NEXT: [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16* diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -218,7 +218,7 @@ ; it could unwind define void @test34(i32* noalias %p) { ; CHECK-LABEL: @test34( -; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: call void @unknown_func() ; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: ret void @@ -229,12 +229,10 @@ ret void } -; TODO ; Remove redundant store even with an unwinding function in the same block define void @test35(i32* noalias %p) { ; CHECK-LABEL: @test35( ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; @@ -384,6 +382,14 @@ ; NOCHECK-NEXT: call void @unknown_func() ; NOCHECK-NEXT: call void @free(i8* [[P2]]) ; NOCHECK-NEXT: ret void +; +; CHECK-LABEL: @test41( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: store i32 2, i32* [[P]] +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void ; %P2 = bitcast i32* %P to i8* store i32 1, i32* %P @@ -400,6 +406,13 @@ ; NOCHECK-NEXT: store i32 2, i32* [[Q:%.*]] ; NOCHECK-NEXT: store i8 3, i8* [[P2]] ; NOCHECK-NEXT: ret void +; +; CHECK-LABEL: @test42( +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: store i32 2, i32* [[Q:%.*]] +; CHECK-NEXT: store i8 3, i8* [[P2]] +; CHECK-NEXT: ret void ; store i32 1, i32* %P %P2 = bitcast i32* %P to i8* @@ -415,6 +428,13 @@ ; NOCHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 ; NOCHECK-NEXT: store atomic i8 3, i8* [[P2]] unordered, align 4 ; NOCHECK-NEXT: ret void +; +; CHECK-LABEL: @test42a( +; CHECK-NEXT: store atomic i32 1, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 +; CHECK-NEXT: store atomic i8 3, i8* [[P2]] unordered, align 4 +; CHECK-NEXT: ret void ; store atomic i32 1, i32* %P unordered, align 4 %P2 = bitcast i32* %P to i8*