diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1588,6 +1588,19 @@ /// SCEVUnknowns and thus don't use this mechanism. ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U); + /// Return true and fill \p SCC with elements of PNINode-composed strongly + /// connected component that contains \p Phi. Here SCC is a maximum by + /// inclusion subgraph composed of Phis that transitively use one another as + /// inputs. Otherwise, return false and conservatively put \p Phi into \p SCC + /// as the only element of its strongly connected component. + bool collectSCC(const PHINode *Phi, + SmallVectorImpl &SCC) const; + + /// Sharpen range of entire SCEVUnknown Phi strongly connected component that + /// includes \p Phi. On output, \p ConservativeResult is the sharpened range. + void sharpenPhiSCCRange(const PHINode *Phi, ConstantRange &ConservativeResult, + ScalarEvolution::RangeSignHint SignHint); + /// We know that there is no SCEV for the specified value. Analyze the /// expression. const SCEV *createSCEV(Value *V); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -145,6 +145,8 @@ "Number of loops without predictable loop counts"); STATISTIC(NumBruteForceTripCountsComputed, "Number of loops with trip counts computed by force"); +STATISTIC(NumFoundPhiSCCs, + "Number of found Phi-composed strongly connected components"); static cl::opt MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, @@ -231,6 +233,12 @@ cl::desc("Use more powerful methods of sharpening expression ranges. May " "be costly in terms of compile time")); +static cl::opt MaxPhiSCCAnalysisSize( + "scalar-evolution-max-scc-analysis-depth", cl::Hidden, + cl::desc("Maximum amount of nodes to process while searching SCEVUnknown " + "Phi strongly connected components"), + cl::init(8)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -6566,29 +6574,130 @@ RangeType); // A range of Phi is a subset of union of all ranges of its input. - if (const PHINode *Phi = dyn_cast(U->getValue())) { - // Make sure that we do not run over cycled Phis. - if (PendingPhiRanges.insert(Phi).second) { - ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); - for (auto &Op : Phi->operands()) { - auto OpRange = getRangeRef(getSCEV(Op), SignHint); - RangeFromOps = RangeFromOps.unionWith(OpRange); - // No point to continue if we already have a full set. - if (RangeFromOps.isFullSet()) - break; + if (const PHINode *Phi = dyn_cast(U->getValue())) + if (!PendingPhiRanges.count(Phi)) + sharpenPhiSCCRange(Phi, ConservativeResult, SignHint); + + return setRange(U, SignHint, std::move(ConservativeResult)); + } + + return setRange(S, SignHint, std::move(ConservativeResult)); +} + +bool ScalarEvolution::collectSCC(const PHINode *Phi, + SmallVectorImpl &SCC) const { + assert(SCC.empty() && "Precondition: SCC should be empty."); + auto Bail = [&]() { + SCC.clear(); + SCC.push_back(Phi); + return false; + }; + SmallPtrSet Reachable; + { + // First, find all PHI nodes that are reachable from Phi. + SmallVector Worklist; + Reachable.insert(Phi); + Worklist.push_back(Phi); + while (!Worklist.empty()) { + if (Reachable.size() > MaxPhiSCCAnalysisSize) + // Too many nodes to process. Assume that SCC is composed of Phi alone. + return Bail(); + auto *Curr = Worklist.pop_back_val(); + for (auto &Op : Curr->operands()) { + if (auto *PhiOp = dyn_cast(&*Op)) { + if (PendingPhiRanges.count(PhiOp)) + // Do not want to deal with this situation, so conservatively bail. + return Bail(); + if (Reachable.insert(PhiOp).second) + Worklist.push_back(PhiOp); } - ConservativeResult = - ConservativeResult.intersectWith(RangeFromOps, RangeType); - bool Erased = PendingPhiRanges.erase(Phi); - assert(Erased && "Failed to erase Phi properly?"); - (void) Erased; } } + } + { + // Out of reachable nodes, find those from which Phi is also reachable. This + // defines a SCC. + SmallVector Worklist; + SmallPtrSet SCCSet; + SCCSet.insert(Phi); + SCC.push_back(Phi); + Worklist.push_back(Phi); + while (!Worklist.empty()) { + auto *Curr = Worklist.pop_back_val(); + for (auto *User : Curr->users()) + if (auto *PN = dyn_cast(User)) + if (Reachable.count(PN) && SCCSet.insert(PN).second) { + Worklist.push_back(PN); + SCC.push_back(PN); + } + } + } + return true; +} - return setRange(U, SignHint, std::move(ConservativeResult)); +void +ScalarEvolution::sharpenPhiSCCRange(const PHINode *Phi, + ConstantRange &ConservativeResult, + ScalarEvolution::RangeSignHint SignHint) { + // Collect strongly connected component (further on - SCC ) composed of Phis. + // Analyze all values that are incoming to this SCC (we call them roots). + // All SCC elements have range that is not wider than union of ranges of + // roots. + SmallVector SCC; + if (collectSCC(Phi, SCC)) + ++NumFoundPhiSCCs; + + // Collect roots: inputs of SCC nodes that come from outside of SCC. + SmallPtrSet Roots; + const SmallPtrSet SCCSet(SCC.begin(), SCC.end()); + for (auto *PN : SCC) + for (auto &Op : PN->operands()) { + auto *PhiInput = dyn_cast(Op); + if (!PhiInput || !SCCSet.count(PhiInput)) + Roots.insert(Op); + } + + // Mark SCC elements as pending to avoid infinite recursion if there is a + // cyclic dependency through some instruction that is not a PHI. + for (auto *PN : SCC) { + bool Inserted = PendingPhiRanges.insert(PN).second; + assert(Inserted && "PHI is already pending?"); + (void)Inserted; + } + + auto BitWidth = ConservativeResult.getBitWidth(); + ConstantRange RangeFromRoots(BitWidth, /*isFullSet=*/false); + for (auto *Root : Roots) { + auto OpRange = getRangeRef(getSCEV(Root), SignHint); + RangeFromRoots = RangeFromRoots.unionWith(OpRange); + // No point to continue if we already have a full set. + if (RangeFromRoots.isFullSet()) + break; } + ConstantRange::PreferredRangeType RangeType = + SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? ConstantRange::Unsigned + : ConstantRange::Signed; + ConservativeResult = + ConservativeResult.intersectWith(RangeFromRoots, RangeType); - return setRange(S, SignHint, std::move(ConservativeResult)); + DenseMap &Cache = + SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges + : SignedRanges; + // Entire SCC has the same range. + for (auto *PN : SCC) { + bool Erased = PendingPhiRanges.erase(PN); + assert(Erased && "Failed to erase Phi properly?"); + (void)Erased; + auto *PNSCEV = getSCEV(const_cast(PN)); + auto I = Cache.find(PNSCEV); + if (I == Cache.end()) + setRange(PNSCEV, SignHint, ConservativeResult); + else { + auto SharpenedRange = + I->second.intersectWith(ConservativeResult, RangeType); + setRange(PNSCEV, SignHint, SharpenedRange); + } + } } // Given a StartRange, Step and MaxBECount for an expression compute a range of diff --git a/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll b/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll --- a/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll +++ b/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll @@ -3,14 +3,13 @@ declare i1 @cond() -; FIXME: Range of phi_1 and phi_2 here can be sharpened to [10, 21). define void @test_01() { ; CHECK-LABEL: 'test_01' ; CHECK-NEXT: Classifying expressions for: @test_01 ; CHECK-NEXT: %phi_1 = phi i32 [ 10, %entry ], [ %phi_2, %loop ] -; CHECK-NEXT: --> %phi_1 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %phi_1 U: [10,21) S: [10,21) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %phi_2 = phi i32 [ 20, %entry ], [ %phi_1, %loop ] -; CHECK-NEXT: --> %phi_2 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %phi_2 U: [10,21) S: [10,21) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond = call i1 @cond() ; CHECK-NEXT: --> %cond U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @test_01 @@ -38,15 +37,15 @@ ; CHECK-NEXT: %start = load i32, i32* %p, align 4, !range !0 ; CHECK-NEXT: --> %start U: [0,1000) S: [0,1000) ; CHECK-NEXT: %outer_phi = phi i32 [ %start, %entry ], [ %inner_lcssa, %outer_backedge ] -; CHECK-NEXT: --> %outer_phi U: full-set S: full-set Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } +; CHECK-NEXT: --> %outer_phi U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } ; CHECK-NEXT: %inner_phi = phi i32 [ %outer_phi, %outer_loop ], [ %inner_load, %inner_loop ] -; CHECK-NEXT: --> %inner_phi U: full-set S: full-set Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } +; CHECK-NEXT: --> %inner_phi U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } ; CHECK-NEXT: %inner_load = load i32, i32* %q, align 4, !range !1 ; CHECK-NEXT: --> %inner_load U: [2000,3000) S: [2000,3000) Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } ; CHECK-NEXT: %inner_cond = call i1 @cond() ; CHECK-NEXT: --> %inner_cond U: full-set S: full-set Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } ; CHECK-NEXT: %inner_lcssa = phi i32 [ %inner_phi, %inner_loop ] -; CHECK-NEXT: --> %inner_lcssa U: full-set S: full-set Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } +; CHECK-NEXT: --> %inner_lcssa U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } ; CHECK-NEXT: %outer_cond = call i1 @cond() ; CHECK-NEXT: --> %outer_cond U: full-set S: full-set Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @test_02 @@ -80,7 +79,6 @@ ret void } -; FIXME: All phis should have range [0, 3000) define void @test_03(i32* %p, i32* %q) { ; CHECK-LABEL: 'test_03' ; CHECK-NEXT: Classifying expressions for: @test_03 @@ -89,15 +87,15 @@ ; CHECK-NEXT: %start_2 = load i32, i32* %q, align 4, !range !1 ; CHECK-NEXT: --> %start_2 U: [2000,3000) S: [2000,3000) ; CHECK-NEXT: %outer_phi = phi i32 [ %start_1, %entry ], [ %inner_lcssa, %outer_backedge ] -; CHECK-NEXT: --> %outer_phi U: full-set S: full-set Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } +; CHECK-NEXT: --> %outer_phi U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } ; CHECK-NEXT: %inner_phi_1 = phi i32 [ %outer_phi, %outer_loop ], [ %inner_phi_2, %inner_loop ] -; CHECK-NEXT: --> %inner_phi_1 U: full-set S: full-set Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } +; CHECK-NEXT: --> %inner_phi_1 U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } ; CHECK-NEXT: %inner_phi_2 = phi i32 [ %start_2, %outer_loop ], [ %inner_phi_1, %inner_loop ] -; CHECK-NEXT: --> %inner_phi_2 U: full-set S: full-set Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } +; CHECK-NEXT: --> %inner_phi_2 U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } ; CHECK-NEXT: %inner_cond = call i1 @cond() ; CHECK-NEXT: --> %inner_cond U: full-set S: full-set Exits: <> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant } ; CHECK-NEXT: %inner_lcssa = phi i32 [ %inner_phi_1, %inner_loop ] -; CHECK-NEXT: --> %inner_lcssa U: full-set S: full-set Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } +; CHECK-NEXT: --> %inner_lcssa U: [0,3000) S: [0,3000) Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } ; CHECK-NEXT: %outer_cond = call i1 @cond() ; CHECK-NEXT: --> %outer_cond U: full-set S: full-set Exits: <> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @test_03 diff --git a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll --- a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll +++ b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll @@ -30,8 +30,6 @@ } define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) { -; TODO: We could be much smarter here. So far we just make sure that we do not -; go into infinite loop analyzing these Phis. ; CHECK-LABEL: 'merge_values_with_ranges_looped' ; CHECK-NEXT: Classifying expressions for: @merge_values_with_ranges_looped ; CHECK-NEXT: %len_a = load i32, i32* %a_len_ptr, align 4, !range !0 @@ -39,9 +37,9 @@ ; CHECK-NEXT: %len_b = load i32, i32* %b_len_ptr, align 4, !range !0 ; CHECK-NEXT: --> %len_b U: [0,2147483647) S: [0,2147483647) ; CHECK-NEXT: %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ] -; CHECK-NEXT: --> %p1 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %p1 U: [0,2147483647) S: [0,2147483647) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ] -; CHECK-NEXT: --> %p2 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %p2 U: [0,2147483647) S: [0,2147483647) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,100) S: [0,100) Exits: 99 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i32 %iv, 1