diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -95,6 +95,7 @@ STATISTIC(NumRemainingStores, "Number of stores remaining after DSE"); STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); STATISTIC(NumFastStores, "Number of stores deleted"); +STATISTIC(NumSunkStores, "Number of sunk stores"); STATISTIC(NumFastOther, "Number of other instrs removed"); STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); STATISTIC(NumModifiedStores, "Number of stores modified"); @@ -230,7 +231,18 @@ OW_Unknown }; -} // end anonymous namespace +// Represent a dead access candidate that we can delete or sink it. +struct DeadAccessCandidate { + MemoryAccess *MaybeDeadAccess; + + // Only successor that MaybeDeadAccess live in. nullptr if + // MaybeDeadAccess is post-dominated by its KillingDefs. + BasicBlock *AliveSucc; + + bool hasAliveSucc() { return AliveSucc != nullptr; } +}; + +} // end anonymous namespace. /// Check if two instruction are masked stores that completely /// overwrite one another. More specifically, \p KillingI has to @@ -782,6 +794,10 @@ /// Keep track of instructions (partly) overlapping with killing MemoryDefs per /// basic block. MapVector IOLs; + + /// Keep track of instructions that is only alive in one successor. + MapVector SinkCandidates; + // Check if there are root nodes that are terminated by UnreachableInst. // Those roots pessimize post-dominance queries. If there are such roots, // fall back to CFG scan starting from all non-unreachable roots. @@ -1228,7 +1244,7 @@ // there is no such MemoryDef, return None. The returned value may not // (completely) overwrite \p KillingLoc. Currently we bail out when we // encounter an aliasing MemoryUse (read). - Optional + Optional getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, const MemoryLocation &KillingLoc, const Value *KillingUndObj, unsigned &ScanLimit, unsigned &WalkerStepLimit, @@ -1286,7 +1302,7 @@ // caller is responsible for traversing them. if (isa(Current)) { LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); - return Current; + return {{Current, nullptr}}; } // Below, check if CurrentDef is a valid candidate to be eliminated by @@ -1546,9 +1562,33 @@ // there is a path from MaybeDeadAccess to an exit not going through a // killing block. if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) { - if (!AnyUnreachableExit) - return None; + if (!AnyUnreachableExit) { + if (IsMemTerm) + return None; + + // It does not make sense to sink if has only one successor. + if (succ_size(MaybeDeadAccess->getBlock()) == 1) + return None; + + BasicBlock *SuccToSinkTo = nullptr; + for (auto *Succ : successors(MaybeDeadAccess->getBlock())) { + if (all_of(KillingDefs, [Succ](Instruction *KillingDef) { + return KillingDef->getParent() != Succ; + })) { + if (SuccToSinkTo) + return None; + + // If the successor to sink has other predecessors, then sink + // to it will cause mis-compilation. + if (pred_size(Succ) > 1) + return None; + + SuccToSinkTo = Succ; + } + } + return {{MaybeDeadAccess, SuccToSinkTo}}; + } // Fall back to CFG scan starting at all non-unreachable roots if not // all paths to the exit go through CommonPred. CommonPred = nullptr; @@ -1556,7 +1596,7 @@ // If CommonPred itself is in the set of killing blocks, we're done. if (KillingBlocks.count(CommonPred)) - return {MaybeDeadAccess}; + return {{MaybeDeadAccess, nullptr}}; SetVector WorkList; // If CommonPred is null, there are multiple exits from the function. @@ -1596,7 +1636,30 @@ // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is // potentially dead. - return {MaybeDeadAccess}; + return {{MaybeDeadAccess, nullptr}}; + } + + bool sinkInstructions() { + bool MadeChange = false; + MemorySSAUpdater Updater(&MSSA); + for (auto Candidate : SinkCandidates) { + Instruction *ToSink = Candidate.first; + BasicBlock *SuccToSinkTo = Candidate.second; + MemoryAccess *DeadAccess = MSSA.getMemoryAccess(ToSink); + assert(!isa(DeadAccess) && "Can't sink MemoryPhi"); + MemoryAccess *DeadAccessDefining = + cast(DeadAccess)->getDefiningAccess(); + + if (auto *Def = dyn_cast(DeadAccess)) + SkipStores.insert(Def); + Updater.removeMemoryAccess(DeadAccess); + ToSink->moveBefore(SuccToSinkTo->getFirstNonPHI()); + Updater.createMemoryAccessInBB(ToSink, DeadAccessDefining, + ToSink->getParent(), MemorySSA::Beginning); + ++NumSunkStores; + MadeChange = true; + } + return MadeChange; } // Delete dead memory defs @@ -1634,6 +1697,11 @@ auto I = IOLs.find(DeadInst->getParent()); if (I != IOLs.end()) I->second.erase(DeadInst); + + // Deleting is more beneficial than sinking. + if (SinkCandidates.count(DeadInst)) + SinkCandidates.erase(DeadInst); + // Remove its operands for (Use &O : DeadInst->operands()) if (Instruction *OpI = dyn_cast(O)) { @@ -2013,16 +2081,16 @@ if (State.SkipStores.count(Current)) continue; - Optional MaybeDeadAccess = State.getDomMemoryDef( + Optional DeadCandidate = State.getDomMemoryDef( KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit, WalkerStepLimit, IsMemTerm, PartialLimit); - if (!MaybeDeadAccess) { + if (!DeadCandidate) { LLVM_DEBUG(dbgs() << " finished walk\n"); continue; } - MemoryAccess *DeadAccess = *MaybeDeadAccess; + MemoryAccess *DeadAccess = DeadCandidate->MaybeDeadAccess; LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess); if (isa(DeadAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); @@ -2066,48 +2134,57 @@ int64_t DeadOffset = 0; OverwriteResult OR = State.isOverwrite( KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset); - if (OR == OW_MaybePartial) { - auto Iter = State.IOLs.insert( - std::make_pair( - DeadI->getParent(), InstOverlapIntervalsTy())); - auto &IOL = Iter.first->second; - OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset, - DeadOffset, DeadI, IOL); - } + if (!DeadCandidate->hasAliveSucc()) { + if (OR == OW_MaybePartial) { + auto Iter = State.IOLs.insert( + std::make_pair( + DeadI->getParent(), InstOverlapIntervalsTy())); + auto &IOL = Iter.first->second; + OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset, + DeadOffset, DeadI, IOL); + } - if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { - auto *DeadSI = dyn_cast(DeadI); - auto *KillingSI = dyn_cast(KillingI); - // We are re-using tryToMergePartialOverlappingStores, which requires - // DeadSI to dominate DeadSI. - // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. - if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) { - if (Constant *Merged = tryToMergePartialOverlappingStores( - KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL, - State.BatchAA, &DT)) { - - // Update stored value of earlier store to merged constant. - DeadSI->setOperand(0, Merged); - ++NumModifiedStores; - MadeChange = true; - - Shortend = true; - // Remove killing store and remove any outstanding overlap - // intervals for the updated store. - State.deleteDeadInstruction(KillingSI); - auto I = State.IOLs.find(DeadSI->getParent()); - if (I != State.IOLs.end()) - I->second.erase(DeadSI); - break; + if (EnablePartialStoreMerging && + OR == OW_PartialEarlierWithFullLater) { + auto *DeadSI = dyn_cast(DeadI); + auto *KillingSI = dyn_cast(KillingI); + // We are re-using tryToMergePartialOverlappingStores, which + // requires DeadSI to dominate DeadSI. + // TODO: implement tryToMergeParialOverlappingStores using + // MemorySSA. + if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) { + if (Constant *Merged = tryToMergePartialOverlappingStores( + KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL, + State.BatchAA, &DT)) { + + // Update stored value of earlier store to merged constant. + DeadSI->setOperand(0, Merged); + ++NumModifiedStores; + MadeChange = true; + + Shortend = true; + // Remove killing store and remove any outstanding overlap + // intervals for the updated store. + State.deleteDeadInstruction(KillingSI); + auto I = State.IOLs.find(DeadSI->getParent()); + if (I != State.IOLs.end()) + I->second.erase(DeadSI); + break; + } } } - } - if (OR == OW_Complete) { - LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI - << "\n KILLER: " << *KillingI << '\n'); - State.deleteDeadInstruction(DeadI); - ++NumFastStores; + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI + << "\n KILLER: " << *KillingI << '\n'); + State.deleteDeadInstruction(DeadI); + ++NumFastStores; + MadeChange = true; + } + } else if (OR == OW_Complete) { + // The MaybeDeadAccess is live in only one successor, in which case, + // we can sink the instruction of MaybeDeadAccess to this successor. + State.SinkCandidates.insert({DeadI, DeadCandidate->AliveSucc}); MadeChange = true; } } @@ -2139,6 +2216,7 @@ MadeChange |= State.eliminateRedundantStoresOfExistingValues(); MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); + MadeChange |= State.sinkInstructions(); return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-multipath.ll --- a/llvm/test/Transforms/DeadStoreElimination/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-multipath.ll @@ -109,13 +109,13 @@ ; entry->bb2->bb5. define void @accessible_after_return_4(ptr noalias %P, i1 %c1) { ; CHECK-LABEL: @accessible_after_return_4( -; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 ; CHECK-NEXT: call void @use(ptr [[P]]) ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: bb2: +; CHECK-NEXT: store i32 1, ptr [[P]], align 4 ; CHECK-NEXT: br label [[BB5]] ; CHECK: bb5: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-simple.ll @@ -28,9 +28,9 @@ define void @test3(ptr noalias %P) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, ptr [[P]], align 4 @@ -148,12 +148,12 @@ define void @test10(ptr %P) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, ptr [[P]], align 4 +; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 ; CHECK-NEXT: ret void ; CHECK: bb3: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-sink.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-sink.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-sink.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -dse -S | FileCheck %s + +define void @dont_sink_if_has_only_one_succ(ptr noalias %P) { +; CHECK-LABEL: @dont_sink_if_has_only_one_succ( +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 1, ptr [[P]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; + store i32 0, ptr %P + br label %bb0 +bb0: + br i1 true, label %bb1, label %bb2 + +bb1: + br label %bb3 + +bb2: + store i32 1, ptr %P + br label %bb3 + +bb3: + ret void +} + +define void @shouldnt_sink_since_theres_a_full_overlap(ptr %ptr, i1 %c) { +; CHECK-LABEL: @shouldnt_sink_since_theres_a_full_overlap( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 -1 +; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; CHECK-NEXT: store i32 1234, ptr [[BPTRM1]], align 1 +; CHECK-NEXT: store i64 5678, ptr [[BPTR3]], align 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 1, ptr [[PTR]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + store i64 0, ptr %ptr + %bptrm1 = getelementptr inbounds i8, ptr %ptr, i64 -1 + %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3 + store i32 1234, ptr %bptrm1, align 1 + store i64 5678, ptr %bptr3, align 1 + br i1 %c, label %bb1, label %bb2 + +bb1: + br label %bb3 + +bb2: + store i64 1, ptr %ptr + br label %bb3 + +bb3: + ret void +} + +define void @dont_sink_if_succ_to_sink_will_reach_other_succs(ptr noalias %P) { +; CHECK-LABEL: @dont_sink_if_succ_to_sink_will_reach_other_succs( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 1, ptr [[P]], align 4 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; +entry: + store i32 0, ptr %P + br i1 true, label %bb1, label %bb2 + +bb1: + store i32 1, ptr %P + br label %bb2 + +bb2: + ret void +} + +define i32 @successor_of_each_other(i1 %c, ptr %p) { +; CHECK-LABEL: @successor_of_each_other( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB0:%.*]], label [[BB0_0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: br i1 [[C]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb0.0: +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4 +; CHECK-NEXT: br i1 [[C]], label [[BB3:%.*]], label [[BB0]] +; CHECK: bb2: +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, ptr %p, align 4 + br i1 %c, label %bb0, label %bb0.0 + +bb0: + store i32 0, ptr %p + br i1 %c, label %bb1, label %bb2 + +bb0.0: + br label %bb1 + +bb1: + store i32 %v, ptr %p, align 4 + br i1 %c, label %bb3, label %bb0 + +bb2: + br label %bb3 + +bb3: + ret i32 0 +} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll @@ -8,7 +8,6 @@ ; CHECK-LABEL: @__muloti4( ; CHECK-NEXT: Entry: ; CHECK-NEXT: [[DOTFR:%.*]] = freeze i128 [[TMP1:%.*]] -; CHECK-NEXT: store i32 0, i32* [[TMP2:%.*]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 [[TMP0:%.*]], i128 [[DOTFR]]) ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i128 [[TMP0]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i128 [[DOTFR]], -170141183460469231731687303715884105728 @@ -18,9 +17,10 @@ ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i128, i1 } [[MUL]], 1 ; CHECK-NEXT: br i1 [[MUL_OV]], label [[THEN7]], label [[BLOCK9:%.*]] ; CHECK: Then7: -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 ; CHECK-NEXT: br label [[BLOCK9]] ; CHECK: Block9: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 1, [[THEN7]] ], [ 0, [[ELSE2]] ] +; CHECK-NEXT: store i32 [[STOREMERGE]], i32* [[TMP2:%.*]], align 4 ; CHECK-NEXT: [[MUL_VAL:%.*]] = extractvalue { i128, i1 } [[MUL]], 0 ; CHECK-NEXT: ret i128 [[MUL_VAL]] ;