Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,10 +40,12 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -355,125 +357,6 @@ return OW_Complete; } -/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI -/// instruction) completely overwrites a store to the 'Earlier' location. -/// (by \p EarlierI instruction). -/// Return OW_MaybePartial if \p Later does not completely overwrite -/// \p Earlier, but they both write to the same underlying object. In that -/// case, use isPartialOverwrite to check if \p Later partially overwrites -/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. -static OverwriteResult -isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, - const MemoryLocation &Later, const MemoryLocation &Earlier, - const DataLayout &DL, const TargetLibraryInfo &TLI, - int64_t &EarlierOff, int64_t &LaterOff, BatchAAResults &AA, - const Function *F) { - // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll - // get imprecise values here, though (except for unknown sizes). - if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { - // In case no constant size is known, try to an IR values for the number - // of bytes written and check if they match. - const auto *LaterMemI = dyn_cast(LaterI); - const auto *EarlierMemI = dyn_cast(EarlierI); - if (LaterMemI && EarlierMemI) { - const Value *LaterV = LaterMemI->getLength(); - const Value *EarlierV = EarlierMemI->getLength(); - if (LaterV == EarlierV && AA.isMustAlias(Earlier, Later)) - return OW_Complete; - } - - // Masked stores have imprecise locations, but we can reason about them - // to some extent. - return isMaskedStoreOverwrite(LaterI, EarlierI, AA); - } - - const uint64_t LaterSize = Later.Size.getValue(); - const uint64_t EarlierSize = Earlier.Size.getValue(); - - // Query the alias information - AliasResult AAR = AA.alias(Later, Earlier); - - // If the start pointers are the same, we just have to compare sizes to see if - // the later store was larger than the earlier store. - if (AAR == AliasResult::MustAlias) { - // Make sure that the Later size is >= the Earlier size. - if (LaterSize >= EarlierSize) - return OW_Complete; - } - - // If we hit a partial alias we may have a full overwrite - if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) { - int32_t Off = AAR.getOffset(); - if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize) - return OW_Complete; - } - - // Check to see if the later store is to the entire object (either a global, - // an alloca, or a byval/inalloca argument). If so, then it clearly - // overwrites any other store to the same object. - const Value *P1 = Earlier.Ptr->stripPointerCasts(); - const Value *P2 = Later.Ptr->stripPointerCasts(); - const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); - - // If we can't resolve the same pointers to the same object, then we can't - // analyze them at all. - if (UO1 != UO2) - return OW_Unknown; - - // If the "Later" store is to a recognizable object, get its size. - uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F); - if (ObjectSize != MemoryLocation::UnknownSize) - if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) - return OW_Complete; - - // Okay, we have stores to two completely different pointers. Try to - // decompose the pointer into a "base + constant_offset" form. If the base - // pointers are equal, then we can reason about the two stores. - EarlierOff = 0; - LaterOff = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); - - // If the base pointers still differ, we have two completely different stores. - if (BP1 != BP2) - return OW_Unknown; - - // The later access completely overlaps the earlier store if and only if - // both start and end of the earlier one is "inside" the later one: - // |<->|--earlier--|<->| - // |-------later-------| - // Accesses may overlap if and only if start of one of them is "inside" - // another one: - // |<->|--earlier--|<----->| - // |-------later-------| - // OR - // |----- earlier -----| - // |<->|---later---|<----->| - // - // We have to be careful here as *Off is signed while *.Size is unsigned. - - // Check if the earlier access starts "not before" the later one. - if (EarlierOff >= LaterOff) { - // If the earlier access ends "not after" the later access then the earlier - // one is completely overwritten by the later one. - if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) - return OW_Complete; - // If start of the earlier access is "before" end of the later access then - // accesses overlap. - else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) - return OW_MaybePartial; - } - // If start of the later access is "before" end of the earlier access then - // accesses overlap. - else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { - return OW_MaybePartial; - } - - // Can reach here only if accesses are known not to overlap. There is no - // dedicated code to indicate no overlap so signal "unknown". - return OW_Unknown; -} - /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the @@ -972,6 +855,11 @@ PostDominatorTree &PDT; const TargetLibraryInfo &TLI; const DataLayout &DL; + const LoopInfo &LI; + + // Whether the function contains any irreducible control flow, useful for + // being accurately able to detect loops. + bool ContainsIrreducibleLoops; // All MemoryDefs that potentially could kill other MemDefs. SmallVector MemDefs; @@ -995,14 +883,15 @@ DenseMap IOLs; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + PostDominatorTree &PDT, const TargetLibraryInfo &TLI, + const LoopInfo &LI) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + DL(F.getParent()->getDataLayout()), LI(LI) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { - DSEState State(F, AA, MSSA, DT, PDT, TLI); + const TargetLibraryInfo &TLI, const LoopInfo &LI) { + DSEState State(F, AA, MSSA, DT, PDT, TLI, LI); // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -1030,9 +919,135 @@ State.InvisibleToCallerAfterRet.insert({&AI, true}); } + // Collect whether there is any irreducible control flow in the function. + State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI); + return State; } + /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI + /// instruction) completely overwrites a store to the 'Earlier' location. + /// (by \p EarlierI instruction). + /// Return OW_MaybePartial if \p Later does not completely overwrite + /// \p Earlier, but they both write to the same underlying object. In that + /// case, use isPartialOverwrite to check if \p Later partially overwrites + /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. + OverwriteResult + isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, + const MemoryLocation &Later, const MemoryLocation &Earlier, + int64_t &EarlierOff, int64_t &LaterOff) { + // AliasAnalysis does not account for loops. Limit overwrite checks to + // candidates for which we can guarantee they always store to the same + // memory location and not located in different loops. + if (!IsGuaranteedLoopInvariant(EarlierI, LaterI, Later)) + return OW_Unknown; + + // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll + // get imprecise values here, though (except for unknown sizes). + if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { + // In case no constant size is known, try to an IR values for the number + // of bytes written and check if they match. + const auto *LaterMemI = dyn_cast(LaterI); + const auto *EarlierMemI = dyn_cast(EarlierI); + if (LaterMemI && EarlierMemI) { + const Value *LaterV = LaterMemI->getLength(); + const Value *EarlierV = EarlierMemI->getLength(); + if (LaterV == EarlierV && AA.isMustAlias(Earlier, Later)) + return OW_Complete; + } + + // Masked stores have imprecise locations, but we can reason about them + // to some extent. + return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA); + } + + const uint64_t LaterSize = Later.Size.getValue(); + const uint64_t EarlierSize = Earlier.Size.getValue(); + + // Query the alias information + AliasResult AAR = BatchAA.alias(Later, Earlier); + + // If the start pointers are the same, we just have to compare sizes to see if + // the later store was larger than the earlier store. + if (AAR == AliasResult::MustAlias) { + // Make sure that the Later size is >= the Earlier size. + if (LaterSize >= EarlierSize) + return OW_Complete; + } + + // If we hit a partial alias we may have a full overwrite + if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) { + int32_t Off = AAR.getOffset(); + if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize) + return OW_Complete; + } + + // Check to see if the later store is to the entire object (either a global, + // an alloca, or a byval/inalloca argument). If so, then it clearly + // overwrites any other store to the same object. + const Value *P1 = Earlier.Ptr->stripPointerCasts(); + const Value *P2 = Later.Ptr->stripPointerCasts(); + const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); + + // If we can't resolve the same pointers to the same object, then we can't + // analyze them at all. + if (UO1 != UO2) + return OW_Unknown; + + // If the "Later" store is to a recognizable object, get its size. + uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F); + if (ObjectSize != MemoryLocation::UnknownSize) + if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) + return OW_Complete; + + // Okay, we have stores to two completely different pointers. Try to + // decompose the pointer into a "base + constant_offset" form. If the base + // pointers are equal, then we can reason about the two stores. + EarlierOff = 0; + LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); + + // If the base pointers still differ, we have two completely different stores. + if (BP1 != BP2) + return OW_Unknown; + + // The later access completely overlaps the earlier store if and only if + // both start and end of the earlier one is "inside" the later one: + // |<->|--earlier--|<->| + // |-------later-------| + // Accesses may overlap if and only if start of one of them is "inside" + // another one: + // |<->|--earlier--|<----->| + // |-------later-------| + // OR + // |----- earlier -----| + // |<->|---later---|<----->| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + + // Check if the earlier access starts "not before" the later one. + if (EarlierOff >= LaterOff) { + // If the earlier access ends "not after" the later access then the earlier + // one is completely overwritten by the later one. + if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) + return OW_Complete; + // If start of the earlier access is "before" end of the later access then + // accesses overlap. + else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize) + return OW_MaybePartial; + } + // If start of the later access is "before" end of the earlier access then + // accesses overlap. + else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) { + return OW_MaybePartial; + } + + // Can reach here only if accesses are known not to overlap. There is no + // dedicated code to indicate no overlap so signal "unknown". + return OW_Unknown; + } + bool isInvisibleToCallerAfterRet(const Value *V) { if (isa(V)) return true; @@ -1120,8 +1135,8 @@ int64_t InstWriteOffset, DepWriteOffset; if (auto CC = getLocForWriteEx(UseInst)) - return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; + return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset, + InstWriteOffset) == OW_Complete; return false; } @@ -1224,9 +1239,8 @@ return BatchAA.isMustAlias(TermLoc.Ptr, LocUO); } int64_t InstWriteOffset, DepWriteOffset; - return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, - &F) == OW_Complete; + return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset, + InstWriteOffset) == OW_Complete; } // Returns true if \p Use may read from \p DefLoc. @@ -1256,8 +1270,21 @@ /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. - bool IsGuaranteedLoopInvariant(Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) { + bool IsGuaranteedLoopInvariant(const Instruction *Current, + const Instruction *KillingDef, + MemoryLocation CurrentLoc) { + // Limit elimination to candidates for which we can guarantee they always + // store to the same memory location and not located in different loops. But + // also be careful with irreducible control flow, which can create cycles + // without appearing as loops. + if (Current->getParent() == KillingDef->getParent()) + return true; + const Loop *CurrentLI = LI.getLoopFor(Current->getParent()); + if (!ContainsIrreducibleLoops && CurrentLI && + CurrentLI == LI.getLoopFor(KillingDef->getParent())) + return true; + + auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { Ptr = Ptr->stripPointerCasts(); if (auto *I = dyn_cast(Ptr)) { if (isa(Ptr)) @@ -1271,7 +1298,7 @@ return true; }; - Ptr = Ptr->stripPointerCasts(); + const Value *Ptr = CurrentLoc.Ptr->stripPointerCasts(); if (auto *I = dyn_cast(Ptr)) { if (I->getParent() == &I->getFunction()->getEntryBlock()) { return true; @@ -1402,9 +1429,9 @@ // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same - // memory location and not multiple locations in a loop. - if (Current->getBlock() != KillingDef->getBlock() && - !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { + // memory location and not located in different loops. + if (!IsGuaranteedLoopInvariant(CurrentI, KillingI, *CurrentLoc)) { + LLVM_DEBUG(dbgs() << " ... not guaranteed loop invariant\n"); StepAgain = true; Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; @@ -1422,8 +1449,8 @@ continue; } else { int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, &F); + auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, + DepWriteOffset, InstWriteOffset); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { @@ -1840,12 +1867,13 @@ } }; -bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, - DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { +static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI, + const LoopInfo &LI) { bool MadeChange = false; - DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -1940,9 +1968,8 @@ } else { // Check if NI overwrites SI. int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); + OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc, + DepWriteOffset, InstWriteOffset); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( @@ -2019,8 +2046,9 @@ DominatorTree &DT = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); + LoopInfo &LI = AM.getResult(F); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2035,6 +2063,7 @@ PA.preserveSet(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } @@ -2060,8 +2089,9 @@ MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); + LoopInfo &LI = getAnalysis().getLoopInfo(); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2083,6 +2113,8 @@ AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } }; @@ -2099,6 +2131,7 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -515,8 +515,8 @@ ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: MemCpy Optimization -; GCN-O2-NEXT: Dead Store Elimination ; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Dead Store Elimination ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass @@ -874,8 +874,8 @@ ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: MemCpy Optimization -; GCN-O3-NEXT: Dead Store Elimination ; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Dead Store Elimination ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/opt-O2-pipeline.ll =================================================================== --- llvm/test/Other/opt-O2-pipeline.ll +++ llvm/test/Other/opt-O2-pipeline.ll @@ -162,8 +162,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/opt-O3-pipeline-enable-matrix.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -167,8 +167,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/opt-O3-pipeline.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline.ll +++ llvm/test/Other/opt-O3-pipeline.ll @@ -168,8 +168,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Other/opt-Os-pipeline.ll =================================================================== --- llvm/test/Other/opt-Os-pipeline.ll +++ llvm/test/Other/opt-Os-pipeline.ll @@ -148,8 +148,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass Index: llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll +++ llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll @@ -111,7 +111,6 @@ ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] -; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: @@ -356,3 +355,347 @@ store i16 1, i16* %arrayidx2, align 1 ret i16 0 } + +; Similar to above, but with an irreducible loop. The stores should not be removed. +define i16 @irreducible(i1 %c) { +; CHECK-LABEL: @irreducible( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[B]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: br label [[B]] +; CHECK: B: +; CHECK-NEXT: [[J_0:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[I_0]], [[A]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[J_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[J_0]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[J_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[A]] +; CHECK: exit: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br i1 %c, label %A, label %B + +A: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %B ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + br label %B + +B: + %j.0 = phi i16 [ 0, %entry ], [ %i.0, %A ] + %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %j.0 + store i16 2, i16* %arrayidx, align 1 + %inc = add nuw nsw i16 %j.0, 1 + %exitcond = icmp eq i16 %j.0, 4 + br i1 %exitcond, label %exit, label %A + +exit: + store i16 1, i16* %arrayidx, align 1 + ret i16 0 +} + +; An irreducible loop inside another loop. +define i16 @irreducible_nested() { +; CHECK-LABEL: @irreducible_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[X:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INCX:%.*]], [[OUTERL:%.*]] ] +; CHECK-NEXT: [[C:%.*]] = icmp sgt i16 [[X]], 2 +; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[OUTER]] ], [ [[INC:%.*]], [[B]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: br label [[B]] +; CHECK: B: +; CHECK-NEXT: [[J_0:%.*]] = phi i16 [ 0, [[OUTER]] ], [ [[I_0]], [[A]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[J_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[J_0]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[J_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[OUTERL]], label [[A]] +; CHECK: outerl: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INCX]] = add nuw nsw i16 [[X]], 1 +; CHECK-NEXT: [[EXITCONDX:%.*]] = icmp eq i16 [[X]], 4 +; CHECK-NEXT: br i1 [[EXITCONDX]], label [[END:%.*]], label [[OUTER]] +; CHECK: end: +; CHECK-NEXT: ret i16 0 +; +entry: + br label %outer + +outer: + %x = phi i16 [ 0, %entry ], [ %incx, %outerl ] + %c = icmp sgt i16 %x, 2 + br i1 %c, label %A, label %B + +A: + %i.0 = phi i16 [ 0, %outer ], [ %inc, %B ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + br label %B + +B: + %j.0 = phi i16 [ 0, %outer ], [ %i.0, %A ] + %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %j.0 + store i16 2, i16* %arrayidx, align 1 + %inc = add nuw nsw i16 %j.0, 1 + %exitcond = icmp eq i16 %j.0, 4 + br i1 %exitcond, label %outerl, label %A + +outerl: + store i16 1, i16* %arrayidx, align 1 + %incx = add nuw nsw i16 %x, 1 + %exitcondx = icmp eq i16 %x, 4 + br i1 %exitcondx, label %end, label %outer + +end: + ret i16 0 +} + +define i16 @multi_overwrite(i1 %cond) { +; CHECK-LABEL: @multi_overwrite( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END2:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[DO_STORE:%.*]], label [[IF_END2]] +; CHECK: do.store: +; CHECK-NEXT: store i16 3, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: br label [[IF_END2]] +; CHECK: if.end2: +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end2 ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + br i1 %exitcond, label %exit, label %if.end + +if.end: + br i1 %cond, label %do.store, label %if.end2 + +do.store: + store i16 3, i16* %arrayidx2, align 1 + br label %if.end2 + +if.end2: + %inc = add nuw nsw i16 %i.0, 1 + br label %do.body + +exit: + store i16 1, i16* %arrayidx2, align 1 + ret i16 0 +} + +define void @test(i8* noalias %data1, i8* %data2, i16* %data3, i32 %i1) +; CHECK-LABEL: @test( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[I1:%.*]], 0 +; CHECK-NEXT: br label [[PH0:%.*]] +; CHECK: ph0: +; CHECK-NEXT: br label [[HEADER0:%.*]] +; CHECK: header0: +; CHECK-NEXT: [[P1:%.*]] = phi i32 [ 0, [[PH0]] ], [ [[PN1:%.*]], [[END1:%.*]] ] +; CHECK-NEXT: [[PN1]] = add i32 [[P1]], 1 +; CHECK-NEXT: [[PC1:%.*]] = icmp slt i32 [[PN1]], 5 +; CHECK-NEXT: [[V2:%.*]] = getelementptr [10 x i16], [10 x i16]* @x, i32 0, i32 [[P1]] +; CHECK-NEXT: store i16 1, i16* [[V2]], align 2 +; CHECK-NEXT: br i1 [[C]], label [[THEN1:%.*]], label [[ELSE1:%.*]] +; CHECK: then1: +; CHECK-NEXT: store i16 2, i16* [[V2]], align 2 +; CHECK-NEXT: br label [[END1]] +; CHECK: else1: +; CHECK-NEXT: br label [[END1]] +; CHECK: end1: +; CHECK-NEXT: br i1 [[PC1]], label [[HEADER0]], label [[END0:%.*]] +; CHECK: end0: +; CHECK-NEXT: br label [[HEADER2:%.*]] +; CHECK: header2: +; CHECK-NEXT: [[P3:%.*]] = phi i32 [ 0, [[END0]] ], [ [[PN3:%.*]], [[HEADER2]] ] +; CHECK-NEXT: [[PN3]] = add i32 [[P3]], 1 +; CHECK-NEXT: [[PC3:%.*]] = icmp slt i32 [[PN3]], 5 +; CHECK-NEXT: store i16 4, i16* [[V2]], align 2 +; CHECK-NEXT: br i1 [[PC3]], label [[HEADER2]], label [[END2:%.*]] +; CHECK: end2: +; CHECK-NEXT: ret void +; +{ + %c = icmp eq i32 %i1, 0 + br label %ph0 +ph0: + br label %header0 +header0: + %p1 = phi i32 [0, %ph0], [%pn1, %end1] + %pn1 = add i32 %p1, 1 + %pc1 = icmp slt i32 %pn1, 5 + %v2 = getelementptr [10 x i16], [10 x i16]* @x, i32 0, i32 %p1 + store i16 1, i16* %v2 + br i1 %c, label %then1, label %else1 +then1: + store i16 2, i16* %v2 + br label %end1 +else1: + br label %end1 +end1: + br i1 %pc1, label %header0, label %end0 +end0: + br label %header2 +header2: + %p3 = phi i32 [0, %end0], [%pn3, %header2] + %pn3 = add i32 %p3, 1 + %pc3 = icmp slt i32 %pn3, 5 + store i16 4, i16* %v2 + br i1 %pc3, label %header2, label %end2 +end2: + ret void +} + +; Similar to above, but with multiple partial overlaps +define i16 @partial_override_fromloop(i1 %c, i32 %i) { +; CHECK-LABEL: @partial_override_fromloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END2:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br i1 [[C:%.*]], label [[DO_STORE:%.*]], label [[IF_END2]] +; CHECK: do.store: +; CHECK-NEXT: store i16 3, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: br label [[IF_END2]] +; CHECK: if.end2: +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[ARRAYIDX2]] to i8* +; CHECK-NEXT: [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1 +; CHECK-NEXT: store i8 10, i8* [[BC]], align 1 +; CHECK-NEXT: store i8 11, i8* [[BC2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end2 ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + br i1 %exitcond, label %exit, label %if.end + +if.end: + br i1 %c, label %do.store, label %if.end2 + +do.store: + store i16 3, i16* %arrayidx2, align 1 + br label %if.end2 + +if.end2: + %inc = add nuw nsw i16 %i.0, 1 + br label %do.body + +exit: + %bc = bitcast i16* %arrayidx2 to i8* + %bc2 = getelementptr inbounds i8, i8* %bc, i32 1 + store i8 10, i8* %bc, align 1 + store i8 11, i8* %bc2, align 1 + ret i16 0 +} + + +define i16 @partial_override_overloop(i1 %c, i32 %i) { +; CHECK-LABEL: @partial_override_overloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 [[I:%.*]] +; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX]], align 1 +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[ARRAYIDX]] to i8* +; CHECK-NEXT: [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1 +; CHECK-NEXT: store i8 10, i8* [[BC]], align 1 +; CHECK-NEXT: store i8 11, i8* [[BC2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + %arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 %i + store i16 1, i16* %arrayidx, align 1 + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %exitcond = icmp eq i16 %i.0, 4 + %inc = add nuw nsw i16 %i.0, 1 + br i1 %exitcond, label %exit, label %do.body + +exit: + %bc = bitcast i16* %arrayidx to i8* + %bc2 = getelementptr inbounds i8, i8* %bc, i32 1 + store i8 10, i8* %bc, align 1 + store i8 11, i8* %bc2, align 1 + ret i16 0 +} + +define i16 @partial_override_multi(i1 %c, i32 %i) { +; CHECK-LABEL: @partial_override_multi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]] +; CHECK-NEXT: store i16 10, i16* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[ARRAYIDX2]] to i8* +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[I_0]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[DO_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[BC2:%.*]] = getelementptr inbounds i8, i8* [[BC]], i32 1 +; CHECK-NEXT: store i8 11, i8* [[BC2]], align 1 +; CHECK-NEXT: ret i16 0 +; +entry: + br label %do.body + +do.body: + %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ] + %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0 + store i16 2, i16* %arrayidx2, align 1 + %bc = bitcast i16* %arrayidx2 to i8* + store i8 10, i8* %bc, align 1 + %exitcond = icmp eq i16 %i.0, 4 + %inc = add nuw nsw i16 %i.0, 1 + br i1 %exitcond, label %exit, label %do.body + +exit: + %bc2 = getelementptr inbounds i8, i8* %bc, i32 1 + store i8 11, i8* %bc2, align 1 + ret i16 0 +} +