Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -2178,6 +2178,81 @@ return Flags; } +/// Determine if the SCEV depends on a SCEVUnknown of an instruction which is +/// dominated by the header of loop L, and this SCEVUnknown can potentially use +/// or be a Phi node of another loop dominated by L, thus, this Phi may change +/// in scope of the loop L. +static bool mayUseVariantPhiOfDominatedLoop(const SCEV *S, const Loop *L, + DominatorTree &DT, LoopInfo &LI) { + struct FindDominatedSCEVUnknown { + bool Found = false; + const Loop *L; + DominatorTree &DT; + LoopInfo &LI; + + FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) + : L(L), DT(DT), LI(LI) {} + + bool checkSCEVUnknown(const SCEVUnknown *SU) { + if (auto *I = dyn_cast(SU->getValue())) { + if (DT.dominates(L->getHeader(), I->getParent())) { + if (auto *UL = LI.getLoopFor(I->getParent())) + assert(UL != L && !L->contains(UL) && + "Invariant SCEVUnknown within the loop?"); + // Now we have a SCEVUnknown which is dominated by our loop. We know + // nothing about its ops and pessimistically assume that it may use + // (or be) a Phi from a loop which lies between the loop and this + // SCEVUnknown. We should reject it if such Phi exists. + auto *LH = DT[L->getHeader()]; + for (auto *BN = DT[I->getParent()]; BN != LH; BN = BN->getIDom()) { + auto *BB = BN->getBlock(); + if (auto *UL = LI.getLoopFor(BB)) + if (UL->getHeader() == BB && isa(BB->begin())) { + // We have found a loop header with Phis between them. We cannot + // be sure that our SCEVUnknown does not use it, so + // coservatively mark it as found. + Found = true; + break; + } + } + } else + assert(DT.dominates(I->getParent(), L->getHeader()) && + "No dominance relationship between SCEV and loop?"); + } + return false; + } + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + return false; + case scAddRecExpr: + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUDivExpr: + return true; + case scUnknown: + return checkSCEVUnknown(cast(S)); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + return false; + } + + bool isDone() { return Found; } + }; + + FindDominatedSCEVUnknown FSU(L, DT, LI); + SCEVTraversal ST(FSU); + ST.visitAll(S); + return FSU.Found; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, @@ -2459,11 +2534,17 @@ const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { - LIOps.push_back(Ops[i]); - Ops.erase(Ops.begin()+i); - --i; --e; - } + if (isLoopInvariant(Ops[i], AddRecLoop)) + // Among our 'invariants' we could occasionally have a Phi of a loop + // which is dominated by our loop, but this Phi was so complex that + // SCEV failed to build an AddRecExpr for it. We do not have a right to + // fold such Phi because we expect that AddRec is a recurrency of the + // bottom-most loop where such folding remains correct. + if (!mayUseVariantPhiOfDominatedLoop(Ops[i], AddRecLoop, DT, LI)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { @@ -2734,11 +2815,17 @@ const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { - LIOps.push_back(Ops[i]); - Ops.erase(Ops.begin()+i); - --i; --e; - } + if (isLoopInvariant(Ops[i], AddRecLoop)) + // Among our 'invariants' we could occasionally have a Phi of a loop + // which is dominated by our loop, but this Phi was so complex that + // SCEV failed to build an AddRecExpr for it. We do not have a right to + // fold such Phi because we expect that AddRec is a recurrency of the + // bottom-most loop where such folding remains correct. + if (!mayUseVariantPhiOfDominatedLoop(Ops[i], AddRecLoop, DT, LI)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { @@ -4368,7 +4455,7 @@ llvm_unreachable("switch should be fully covered!"); } - bool isDone() { return TraversalDone; } + bool isDone() const { return TraversalDone; } }; CheckAvailable CA(L, BB, DT); Index: test/Analysis/ScalarEvolution/different-loops-recs.ll =================================================================== --- test/Analysis/ScalarEvolution/different-loops-recs.ll +++ test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -220,7 +220,8 @@ ; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as ; a recurrence of loop1 because of operands order if we pick recurrencies in an -; incorrect order. +; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown) +; because we cannot prove for sure that it doesn't use Phis of loop 2. define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) { @@ -228,9 +229,9 @@ ; CHECK: %v1 = load i32, i32* %p ; CHECK-NEXT: --> %v1 ; CHECK: %s1 = add i32 %phi1, %v1 -; CHECK-NEXT: --> {(%a + %v1),+,1}<%loop1> +; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1) ; CHECK: %s2 = add i32 %s1, %b -; CHECK-NEXT: --> {(%a + %b + %v1),+,1}<%loop1> +; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1) ; CHECK: %s3 = add i32 %s2, %phi2 ; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1) @@ -452,3 +453,60 @@ %s6 = add i32 %phi3, %phi2 ret void } + +; Make sure that a complicated Phi does not get folded with rec's start value +; of a loop which is above. +define void @test_08() { + +; CHECK-LABEL: Classifying expressions for: @test_08 +; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1 +; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1) +; CHECK: %tmp12 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) +; CHECK: %tmp14 = mul i32 %tmp12, %tmp7 +; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>) +; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1 +; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) + +entry: + br label %loop_1 + +loop_1: + %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ] + %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ] + br label %loop_1_exit + +dead: + br label %loop_1_exit + +loop_1_exit: + %tmp5 = icmp sgt i64 %iv.1.1, 2 + br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch + +loop_1_back_branch: + %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1 + %iv.1.2.next = add nsw i32 %iv.1.2, 1 + br label %loop_1 + +loop_2_preheader: + %tmp6 = sub i64 1, %iv.1.1 + %tmp7 = trunc i64 %tmp6 to i32 + br label %loop_2 + +loop_2: + %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ] + %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ] + %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ] + %tmp11 = add i64 %iv.2.2, %iv.2.1 + %tmp12 = trunc i64 %tmp11 to i32 + %tmp14 = mul i32 %tmp12, %tmp7 + %tmp16 = mul i64 %iv.2.1, %iv.1.1 + %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1 + %iv.2.2.next = add nsw i64 %iv.2.2, -1 + %tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1 + br i1 %tmp17, label %loop_2, label %exit + +exit: + %tmp10 = add i32 %iv.1.2, 3 + ret void +}