Index: include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- include/llvm/Transforms/Scalar/JumpThreading.h +++ include/llvm/Transforms/Scalar/JumpThreading.h @@ -59,9 +59,11 @@ class JumpThreadingPass : public PassInfoMixin { TargetLibraryInfo *TLI; LazyValueInfo *LVI; + LoopInfo *LI; std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = false; + Function *Fn; #ifdef NDEBUG SmallPtrSet LoopHeaders; #else @@ -95,7 +97,8 @@ // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, - bool HasProfileData_, std::unique_ptr BFI_, + LoopInfo *LI_, bool HasProfileData_, + std::unique_ptr BFI_, std::unique_ptr BPI_); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -35,6 +35,7 @@ class DbgValueInst; class StoreInst; class LoadInst; +class LoopInfo; class Value; class PHINode; class AllocaInst; @@ -325,7 +326,8 @@ /// Remove all blocks that can not be reached from the function's entry. /// /// Returns true if any basic block was removed. -bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr); +bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr, + LoopInfo *LI = nullptr); /// Combine the metadata of two instructions so that K can replace J /// Index: lib/Analysis/LazyValueInfo.cpp =================================================================== --- lib/Analysis/LazyValueInfo.cpp +++ lib/Analysis/LazyValueInfo.cpp @@ -923,6 +923,28 @@ BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); LVILatticeVal EdgeResult; + + if (PhiVal == PN) + // Identity: we cannot contribute to our own range. + continue; + + if (Result.isConstant() || Result.isConstantRange()) { + // Can we determine this is a monotonically increasing increment? + ConstantInt *Inc; + bool NSW = match(PhiVal, m_NSWAdd(m_Specific(PN), m_ConstantInt(Inc))); + bool NUW = + !NSW && match(PhiVal, m_NUWAdd(m_Specific(PN), m_ConstantInt(Inc))); + if ((NSW || NUW) && !Inc->isNegative()) { + APInt Start = Result.isConstant() + ? cast(Result.getConstant())->getValue() + : Result.getConstantRange().getLower(); + APInt End = NSW ? APInt::getSignedMaxValue(Start.getBitWidth()) + : APInt::getMaxValue(Start.getBitWidth()); + Result = LVILatticeVal::getRange(ConstantRange(Start, End)); + continue; + } + } + // Note that we can provide PN as the context value to getEdgeValue, even // though the results will be cached, because PN is the value being used as // the cache key in the caller. Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -121,15 +122,15 @@ return false; auto TLI = &getAnalysis().getTLI(); auto LVI = &getAnalysis().getLVI(); + LoopInfo LI{DominatorTree(F)}; std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { - LoopInfo LI{DominatorTree(F)}; BPI.reset(new BranchProbabilityInfo(F, LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - return Impl.runImpl(F, TLI, LVI, HasProfileData, std::move(BFI), + return Impl.runImpl(F, TLI, LVI, &LI, HasProfileData, std::move(BFI), std::move(BPI)); } @@ -140,14 +141,14 @@ auto &LVI = AM.getResult(F); std::unique_ptr BFI; std::unique_ptr BPI; + LoopInfo LI{DominatorTree(F)}; bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { - LoopInfo LI{DominatorTree(F)}; BPI.reset(new BranchProbabilityInfo(F, LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = - runImpl(F, &TLI, &LVI, HasProfileData, std::move(BFI), std::move(BPI)); + bool Changed = runImpl(F, &TLI, &LVI, &LI, HasProfileData, std::move(BFI), + std::move(BPI)); // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better // solution? @@ -161,13 +162,16 @@ } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, - LazyValueInfo *LVI_, bool HasProfileData_, + LazyValueInfo *LVI_, LoopInfo *LI_, + bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; + LI = LI_; + Fn = &F; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -186,7 +190,7 @@ // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. bool EverChanged = false; - EverChanged |= removeUnreachableBlocks(F, LVI); + EverChanged |= removeUnreachableBlocks(F, LVI, LI); FindLoopHeaders(F); @@ -210,6 +214,7 @@ LoopHeaders.erase(BB); LVI->eraseBlock(BB); DeleteDeadBlock(BB); + LI->removeBlock(BB); Changed = true; continue; } @@ -237,11 +242,17 @@ // awesome, but it allows us to use AssertingVH to prevent nasty // dangling pointer issues within LazyValueInfo. LVI->eraseBlock(BB); + Loop *L = LI->getLoopFor(BB); + if (L) + LI->removeBlock(BB); if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { Changed = true; // If we deleted BB and BB was the header of a loop, then the // successor is now the header of the loop. BB = Succ; + } else { + if (L) + L->addBasicBlockToLoop(BB, *LI); } if (ErasedFromLoopHeaders) @@ -720,6 +731,7 @@ LoopHeaders.insert(BB); LVI->eraseBlock(SinglePred); + LI->removeBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); return true; @@ -1182,11 +1194,6 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, ConstantPreference Preference, Instruction *CxtI) { - // If threading this would thread across a loop header, don't even try to - // thread the edge. - if (LoopHeaders.count(BB)) - return false; - PredValueInfoTy PredValues; if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI)) return false; @@ -1258,29 +1265,44 @@ if (MostPopularDest == MultipleDestSentinel) MostPopularDest = FindMostPopularDest(BB, PredToDestList); - // Now that we know what the most popular destination is, factor all - // predecessors that will jump to it into a single predecessor. - SmallVector PredsToFactor; - for (const auto &PredToDest : PredToDestList) - if (PredToDest.second == MostPopularDest) { - BasicBlock *Pred = PredToDest.first; - - // This predecessor may be a switch or something else that has multiple - // edges to the block. Factor each of these edges by listing them - // according to # occurrences in PredsToFactor. - for (BasicBlock *Succ : successors(Pred)) - if (Succ == BB) - PredsToFactor.push_back(Pred); - } - // If the threadable edges are branching on an undefined value, we get to pick // the destination that these predecessors should get to. - if (!MostPopularDest) + bool BranchingOnUndefValue = false; + SetVector Dests; + if (!MostPopularDest) { + BranchingOnUndefValue = true; MostPopularDest = BB->getTerminator()-> getSuccessor(GetBestDestForJumpOnUndef(BB)); + Dests.insert(MostPopularDest); + } else { + Dests.insert(MostPopularDest); + for (auto &KV : PredToDestList) + Dests.insert(KV.second); + } + + for (auto *Dest : Dests) { + // Now that we know what the most popular destination is, factor all + // predecessors that will jump to it into a single predecessor. + SmallVector PredsToFactor; + for (const auto &PredToDest : PredToDestList) { + if (PredToDest.second == Dest || + (BranchingOnUndefValue && PredToDest.second == nullptr)) { + BasicBlock *Pred = PredToDest.first; + + // This predecessor may be a switch or something else that has multiple + // edges to the block. Factor each of these edges by listing them + // according to # occurrences in PredsToFactor. + for (BasicBlock *Succ : successors(Pred)) + if (Succ == BB) + PredsToFactor.push_back(Pred); + } + } - // Ok, try to thread it! - return ThreadEdge(BB, PredsToFactor, MostPopularDest); + // Ok, try to thread it! + if (ThreadEdge(BB, PredsToFactor, Dest)) + return true; + } + return false; } /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on @@ -1457,7 +1479,14 @@ // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. - if (LoopHeaders.count(BB)) { + // + // As a special case we allow threading a branch through a loop header to a + // loop exit. + auto *L = LI->getLoopFor(BB); + bool ThreadingOverLoopHeader = false; + if (LoopHeaders.count(BB) && L && L->getExitBlock() == SuccBB) { + ThreadingOverLoopHeader = true; + } else if (LoopHeaders.count(BB)) { DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName() << "' to dest BB '" << SuccBB->getName() << "' - it might create an irreducible loop!\n"); @@ -1499,6 +1528,25 @@ BB->getParent(), BB); NewBB->moveAfter(PredBB); + if (ThreadingOverLoopHeader) { + // Because we're threading to a loop exit, both BB and NewBB will + // now be outside the loop. + if (auto *L = LI->getLoopFor(PredBB)) { + auto *Parent = L->getParentLoop(); + if (Parent) { + L->removeBlockFromLoop(PredBB); + Parent->addBasicBlockToLoop(PredBB, *LI); + Parent->addBasicBlockToLoop(NewBB, *LI); + } else { + LI->removeBlock(PredBB); + LI->removeBlock(NewBB); + } + } + } else { + if (auto *L = LI->getLoopFor(BB)) + L->addBasicBlockToLoop(NewBB, *LI); + } + // Set the block frequency of NewBB. if (HasProfileData) { auto NewBBFreq = @@ -1611,7 +1659,7 @@ for (auto Pred : Preds) PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB); - BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix); + BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix, nullptr, LI); // Set the block frequency of the newly created PredBB, which is the sum of // frequencies of Preds. @@ -1948,11 +1996,19 @@ // The select is now dead. SI->eraseFromParent(); + if (Loop *L = LI->getLoopFor(BB)) + L->addBasicBlockToLoop(NewBB, *LI); + // Update any other PHI nodes in BB. for (BasicBlock::iterator BI = BB->begin(); PHINode *Phi = dyn_cast(BI); ++BI) if (Phi != CondLHS) Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); + + // We have just utterly destroyed LVI's internal model by adding new + // edges. This is a heavy hammer, but when a hammer is the only tool + // you have... + LVI->releaseMemory(); return true; } } @@ -2004,8 +2060,8 @@ if (HasConst) { // Expand the select. - TerminatorInst *Term = - SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + TerminatorInst *Term = SplitBlockAndInsertIfThen( + SI->getCondition(), SI, false, nullptr, nullptr, LI); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); NewPN->addIncoming(SI->getFalseValue(), BB); Index: lib/Transforms/Utils/BasicBlockUtils.cpp =================================================================== --- lib/Transforms/Utils/BasicBlockUtils.cpp +++ lib/Transforms/Utils/BasicBlockUtils.cpp @@ -647,8 +647,10 @@ if (LI) { Loop *L = LI->getLoopFor(Head); - L->addBasicBlockToLoop(ThenBlock, *LI); - L->addBasicBlockToLoop(Tail, *LI); + if (L) { + L->addBasicBlockToLoop(ThenBlock, *LI); + L->addBasicBlockToLoop(Tail, *LI); + } } return CheckTerm; Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -1586,10 +1587,11 @@ TI->eraseFromParent(); } -/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, + LoopInfo *LI) { SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1611,6 +1613,8 @@ Successor->removePredecessor(&*BB); if (LVI) LVI->eraseBlock(&*BB); + if (LI) + LI->removeBlock(&*BB); BB->dropAllReferences(); } Index: test/Transforms/JumpThreading/phi-known.ll =================================================================== --- test/Transforms/JumpThreading/phi-known.ll +++ test/Transforms/JumpThreading/phi-known.ll @@ -44,9 +44,8 @@ ret void } -; If the inputs don't branch the same way, we can't rewrite -; Well, we could unroll this loop exactly twice, but that's -; a different transform. +; Because we know %p1 != null at the end of loop, we can thread +; entry to exit, removing the loop entirely. define void @test_mixed(i8* %p) { ; CHECK-LABEL: @test_mixed entry: @@ -55,8 +54,7 @@ loop: ; CHECK-LABEL: loop: ; CHECK-NEXT: phi -; CHECK-NEXT: %cmp1 = icmp -; CHECK-NEXT: br i1 %cmp1 +; CHECK-NOT: icmp %p1 = phi i8* [%p, %entry], [%p1, %loop] %cmp1 = icmp ne i8* %p1, null br i1 %cmp1, label %exit, label %loop @@ -64,3 +62,4 @@ ret void } +declare void @g() Index: test/Transforms/JumpThreading/singleblock-loop.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/singleblock-loop.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -jump-threading | FileCheck %s + +; %i.0 can only take the value 2. If it gets incremented to 3, the loop will exit. +; jump-threading must unfold the select and thread the 'true' arc directly to the +; loop exit. +define void @f() { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %while.body +; CHECK: while.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 2, %entry ], [ [[I_0:%.*]], %while.body ] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @g() +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK-NEXT: br i1 [[CMP3]], label %return, label %while.body +; CHECK: return: +; CHECK-NEXT: ret void +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %i.0 = phi i32 [ 2, %entry ], [ %add.i.0, %while.body ] + %cmp = icmp slt i32 %i.0, 3 + br i1 %cmp, label %while.body, label %return + +while.body: ; preds = %while.cond + %add = add nsw i32 %i.0, 1 + %call = call i32 @g() + %cmp3 = icmp eq i32 %call, 0 + %add.i.0 = select i1 %cmp3, i32 %add, i32 %i.0 + br label %while.cond + +return: + ret void +} + +declare i32 @g()