Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -233,6 +233,12 @@ cl::desc("Use more powerful methods of sharpening expression ranges. May " "be costly in terms of compile time")); +static cl::opt MaxPhiSCCAnalysisSize( + "scalar-evolution-max-scc-analysis-depth", cl::Hidden, + cl::desc("Maximum amount of nodes to process while searching SCEVUnknown " + "Phi strongly connected components"), + cl::init(8)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -6241,21 +6247,93 @@ // A range of Phi is a subset of union of all ranges of its input. if (const PHINode *Phi = dyn_cast(U->getValue())) { - // Make sure that we do not run over cycled Phis. - if (PendingPhiRanges.insert(Phi).second) { - ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); - for (auto &Op : Phi->operands()) { - auto OpRange = getRangeRef(getSCEV(Op), SignHint); - RangeFromOps = RangeFromOps.unionWith(OpRange); + if (!PendingPhiRanges.count(Phi)) { + // Collect strongly connected component (further on - SCC ) composed of + // Phis. Analyze all values that are incoming to this SCC (we call them + // roots). All SCC elements have range that is not wider than union of + // ranges of roots. + SmallVector Worklist; + SmallPtrSet Reachable; + Reachable.insert(Phi); + Worklist.push_back(Phi); + // First, find all PHI nodes that are reachable from Phi. + while (!Worklist.empty()) { + if (Reachable.size() > MaxPhiSCCAnalysisSize) { + // Too many nodes to process. Assume that SCC is composed of Phi + // alone. + Reachable.clear(); + break; + } + auto *Curr = Worklist.pop_back_val(); + for (auto &Op : Curr->operands()) { + if (auto *PhiOp = dyn_cast(&*Op)) { + if (PendingPhiRanges.count(PhiOp)) + continue; + if (Reachable.insert(PhiOp).second) + Worklist.push_back(PhiOp); + } + } + } + + SmallPtrSet SCC; + SCC.insert(Phi); + Worklist.push_back(Phi); + // Out of reachable nodes, find those from which Phi is also reachable. + // This defines a SCC. + while (!Worklist.empty()) { + auto *Curr = Worklist.pop_back_val(); + for (auto *User : Curr->users()) { + auto *PN = dyn_cast(User); + if (PN && Reachable.count(PN) && SCC.insert(PN).second) + Worklist.push_back(PN); + } + } + Reachable.clear(); + + // Collect roots: inputs of SCC nodes that come from outside of SCC. + SmallPtrSet Roots; + for (auto *PN : SCC) + for (auto &Op : PN->operands()) { + auto *PhiInput = dyn_cast(Op); + if (!PhiInput || !SCC.count(PhiInput)) + Roots.insert(Op); + } + + // Mark SCC elements as pending to avoid infinite recursion if there is + // a cyclic dependency through some instruction that is not a PHI. + for (auto *PN : SCC) { + bool Inserted = PendingPhiRanges.insert(PN).second; + assert(Inserted && "PHI is already pending?"); + (void)Inserted; + } + + ConstantRange RangeFromRoots(BitWidth, /*isFullSet=*/false); + for (auto *Root : Roots) { + auto OpRange = getRangeRef(getSCEV(Root), SignHint); + RangeFromRoots = RangeFromRoots.unionWith(OpRange); // No point to continue if we already have a full set. - if (RangeFromOps.isFullSet()) + if (RangeFromRoots.isFullSet()) break; } ConservativeResult = - ConservativeResult.intersectWith(RangeFromOps, RangeType); - bool Erased = PendingPhiRanges.erase(Phi); - assert(Erased && "Failed to erase Phi properly?"); - (void) Erased; + ConservativeResult.intersectWith(RangeFromRoots, RangeType); + + // Entire SCC has the same range. + for (auto *PN : SCC) { + bool Erased = PendingPhiRanges.erase(PN); + assert(Erased && "Failed to erase Phi properly?"); + (void)Erased; + auto *PNSCEV = getSCEV(const_cast(PN)); + DenseMap::iterator I = + Cache.find(PNSCEV); + if (I == Cache.end()) + setRange(PNSCEV, SignHint, ConservativeResult); + else { + auto SharpenedRange = + I->second.intersectWith(ConservativeResult, RangeType); + setRange(PNSCEV, SignHint, SharpenedRange); + } + } } } Index: llvm/test/Analysis/ScalarEvolution/cycled_phis.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/cycled_phis.ll +++ llvm/test/Analysis/ScalarEvolution/cycled_phis.ll @@ -3,14 +3,13 @@ declare i1 @cond() -; FIXME: Range of phi_1 and phi_2 here can be sharpened to [10, 21). define void @test_01() { ; CHECK-LABEL: 'test_01' ; CHECK-NEXT: Classifying expressions for: @test_01 ; CHECK-NEXT: %phi_1 = phi i32 [ 10, %entry ], [ %phi_2, %loop ] -; CHECK-NEXT: --> %phi_1 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %phi_1 U: [10,21) S: [10,21) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %phi_2 = phi i32 [ 20, %entry ], [ %phi_1, %loop ] -; CHECK-NEXT: --> %phi_2 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %phi_2 U: [10,21) S: [10,21) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond = call i1 @cond() ; CHECK-NEXT: --> %cond U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @test_01 Index: llvm/test/Analysis/ScalarEvolution/unknown_phis.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/unknown_phis.ll +++ llvm/test/Analysis/ScalarEvolution/unknown_phis.ll @@ -30,8 +30,6 @@ } define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) { -; TODO: We could be much smarter here. So far we just make sure that we do not -; go into infinite loop analyzing these Phis. ; CHECK-LABEL: 'merge_values_with_ranges_looped' ; CHECK-NEXT: Classifying expressions for: @merge_values_with_ranges_looped ; CHECK-NEXT: %len_a = load i32, i32* %a_len_ptr, align 4, !range !0 @@ -39,9 +37,9 @@ ; CHECK-NEXT: %len_b = load i32, i32* %b_len_ptr, align 4, !range !0 ; CHECK-NEXT: --> %len_b U: [0,2147483647) S: [0,2147483647) ; CHECK-NEXT: %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ] -; CHECK-NEXT: --> %p1 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %p1 U: [0,2147483647) S: [0,2147483647) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ] -; CHECK-NEXT: --> %p2 U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %p2 U: [0,2147483647) S: [0,2147483647) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,100) S: [0,100) Exits: 99 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i32 %iv, 1