diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1308,7 +1308,8 @@ MemoryAccess *Current = StartAccess; Instruction *KillingI = KillingDef->getMemoryInst(); bool StepAgain; - LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); + LLVM_DEBUG(dbgs() << " trying to get dominating access for " + << *StartAccess << " and " << *DefLoc.Ptr << "\n"); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. Optional CurrentLoc; @@ -1890,21 +1891,28 @@ unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; unsigned PartialLimit = MemorySSAPartialStoreLimit; // Worklist of MemoryAccesses that may be killed by KillingDef. - SetVector ToCheck; + SmallPtrSet SeenAccesses; + SmallVector, 32> ToCheck; - if (SILocUnd) - ToCheck.insert(KillingDef->getDefiningAccess()); + if (SILocUnd) { + ToCheck.emplace_back(KillingDef->getDefiningAccess(), SILoc); + } bool Shortend = false; bool IsMemTerm = State.isMemTerminatorInst(SI); // Check if MemoryAccesses in the worklist are killed by KillingDef. for (unsigned I = 0; I < ToCheck.size(); I++) { - Current = ToCheck[I]; + MemoryAccess *Current; + MemoryLocation CurrentLoc; + std::tie(Current, CurrentLoc) = ToCheck[I]; + if (!SeenAccesses.insert(Current).second) + continue; if (State.SkipStores.count(Current)) continue; + const Value *SILocUnd = getUnderlyingObject(CurrentLoc.Ptr); Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, + KillingDef, Current, CurrentLoc, SILocUnd, ScanLimit, WalkerStepLimit, IsMemTerm, PartialLimit); if (!Next) { @@ -1916,24 +1924,38 @@ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); if (isa(EarlierAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); + BasicBlock *PhiBlock = EarlierAccess->getBlock(); + SmallPtrSet Predecessors(pred_begin(PhiBlock), + pred_end(PhiBlock)); for (Value *V : cast(EarlierAccess)->incoming_values()) { MemoryAccess *IncomingAccess = cast(V); BasicBlock *IncomingBlock = IncomingAccess->getBlock(); - BasicBlock *PhiBlock = EarlierAccess->getBlock(); // We only consider incoming MemoryAccesses that come before the // MemoryPhi. Otherwise we could discover candidates that do not // strictly dominate our starting def. if (State.PostOrderNumbers[IncomingBlock] > - State.PostOrderNumbers[PhiBlock]) - ToCheck.insert(IncomingAccess); + State.PostOrderNumbers[PhiBlock]) { + auto NewLoc = CurrentLoc; + PHITransAddr Addr(const_cast(CurrentLoc.Ptr), State.DL, + nullptr); + if (Addr.NeedsPHITranslationFromBlock(PhiBlock) && + Predecessors.count(IncomingBlock)) { + if (Addr.IsPotentiallyPHITranslatable() && + !Addr.PHITranslateValue(PhiBlock, IncomingBlock, &DT, + false) && + Addr.getAddr()) + NewLoc = CurrentLoc.getWithNewPtr(Addr.getAddr()); + } + ToCheck.emplace_back(IncomingAccess, NewLoc); + } } continue; } auto *NextDef = cast(EarlierAccess); Instruction *NI = NextDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); - ToCheck.insert(NextDef->getDefiningAccess()); + ToCheck.emplace_back(NextDef->getDefiningAccess(), CurrentLoc); NumGetDomMemoryDefPassed++; if (!DebugCounter::shouldExecute(MemorySSACounter)) @@ -1954,15 +1976,15 @@ // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = - isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); + isOverwrite(SI, NI, CurrentLoc, NILoc, State.DL, TLI, + DepWriteOffset, InstWriteOffset, State.BatchAA, &F); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( NI->getParent(), InstOverlapIntervalsTy())); auto &IOL = Iter.first->second; - OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset, - NI, IOL); + OR = isPartialOverwrite(CurrentLoc, NILoc, DepWriteOffset, + InstWriteOffset, NI, IOL); } if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { diff --git a/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll b/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll --- a/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll +++ b/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -dse -S %s | FileCheck %s -; TODO: Both the stores in %then and %else can be eliminated by translating %p +; Both the stores in %then and %else can be eliminated by translating %p ; through the phi. define void @memoryphi_translate_1(i1 %c) { ; CHECK-LABEL: @memoryphi_translate_1( @@ -10,10 +10,8 @@ ; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ] @@ -39,7 +37,7 @@ ret void } -; TODO: The store in %else can be eliminated by translating %p through the phi. +; The store in %else can be eliminated by translating %p through the phi. ; The store in %then cannot be eliminated, because %a.1 is read before the final ; store. define i8 @memoryphi_translate_2(i1 %c) { @@ -52,7 +50,6 @@ ; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: -; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ] @@ -80,7 +77,7 @@ ret i8 %l } -; TODO: The store in %then can be eliminated by translating %p through the phi. +; The store in %then can be eliminated by translating %p through the phi. ; The store in %else cannot be eliminated, because %a.2 is read before the final ; store. define i8 @memoryphi_translate_3(i1 %c) { @@ -90,7 +87,6 @@ ; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: else: ; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1 @@ -160,14 +156,13 @@ ret i8 %l } -; TODO: The store in %entry can be removed by translating %p through the phi. +; The store in %entry can be removed by translating %p through the phi. define void @memoryphi_translate_5(i1 %cond) { ; CHECK-LABEL: @memoryphi_translate_5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[B:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[C:%.*]] = alloca i8, align 1 -; CHECK-NEXT: store i8 0, i8* [[A]], align 1 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: store i8 0, i8* [[C]], align 1 @@ -241,8 +236,6 @@ ; CHECK: else: ; CHECK-NEXT: call void @fn() ; CHECK-NEXT: [[BC:%.*]] = bitcast i8* undef to i16* -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[BC]], i64 2 -; CHECK-NEXT: store i16 8, i16* [[GEP_1]], align 2 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[P:%.*]] = phi i16* [ [[PTR:%.*]], [[THEN]] ], [ [[BC]], [[ELSE]] ]