Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -2178,6 +2178,48 @@ return Flags; } +/// Determine if the SCEV depends on a SCEVUnknown for a loop Phi, and the loop +/// of this Phi is dominated by the loop L. +static bool isComplexRecBelowLoop(const SCEV *S, const Loop *L, + DominatorTree &DT, LoopInfo &LI) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + case scAddRecExpr: + return false; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return isComplexRecBelowLoop(cast(S)->getOperand(), L, + DT, LI); + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + for (auto *Op : cast(S)->operands()) + if (isComplexRecBelowLoop(Op, L, DT, LI)) + return true; + return false; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast(S); + return isComplexRecBelowLoop(UDiv->getLHS(), L, DT, LI) || + isComplexRecBelowLoop(UDiv->getRHS(), L, DT, LI); + } + case scUnknown: { + auto *SU = dyn_cast(S); + if (auto *Phi = dyn_cast(SU->getValue())) + if (auto *PL = LI.getLoopFor(Phi->getParent())) + if (Phi->getParent() == PL->getHeader()) + if (DT.dominates(L->getHeader(), PL->getHeader())) + return true; + return false; + } + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + return false; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, @@ -2459,11 +2501,17 @@ const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { - LIOps.push_back(Ops[i]); - Ops.erase(Ops.begin()+i); - --i; --e; - } + if (isLoopInvariant(Ops[i], AddRecLoop)) + // Among our 'invariants' we could occasionally have a Phi of a loop + // which is dominated by our loop, but this Phi was so complex that + // SCEV failed to build an AddRecExpr for it. We do not have a right to + // fold such Phi because we expect that AddRec is a recurrency of the + // bottom-most loop where such folding remains correct. + if (!isComplexRecBelowLoop(Ops[i], AddRecLoop, DT, LI)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { @@ -2734,11 +2782,17 @@ const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { - LIOps.push_back(Ops[i]); - Ops.erase(Ops.begin()+i); - --i; --e; - } + if (isLoopInvariant(Ops[i], AddRecLoop)) + // Among our 'invariants' we could occasionally have a Phi of a loop + // which is dominated by our loop, but this Phi was so complex that + // SCEV failed to build an AddRecExpr for it. We do not have a right to + // fold such Phi because we expect that AddRec is a recurrency of the + // bottom-most loop where such folding remains correct. + if (!isComplexRecBelowLoop(Ops[i], AddRecLoop, DT, LI)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { Index: test/Analysis/ScalarEvolution/different-loops-recs.ll =================================================================== --- test/Analysis/ScalarEvolution/different-loops-recs.ll +++ test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -452,3 +452,60 @@ %s6 = add i32 %phi3, %phi2 ret void } + +; Make sure that a complicated Phi does not get folded with rec's start value +; of a loop which is above. +define void @test_08() { + +; CHECK-LABEL: Classifying expressions for: @test_08 +; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1 +; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1) +; CHECK: %tmp12 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) +; CHECK: %tmp14 = mul i32 %tmp12, %tmp7 +; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>) +; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1 +; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) + +entry: + br label %loop_1 + +loop_1: + %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ] + %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ] + br label %loop_1_exit + +dead: + br label %loop_1_exit + +loop_1_exit: + %tmp5 = icmp sgt i64 %iv.1.1, 2 + br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch + +loop_1_back_branch: + %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1 + %iv.1.2.next = add nsw i32 %iv.1.2, 1 + br label %loop_1 + +loop_2_preheader: + %tmp6 = sub i64 1, %iv.1.1 + %tmp7 = trunc i64 %tmp6 to i32 + br label %loop_2 + +loop_2: + %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ] + %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ] + %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ] + %tmp11 = add i64 %iv.2.2, %iv.2.1 + %tmp12 = trunc i64 %tmp11 to i32 + %tmp14 = mul i32 %tmp12, %tmp7 + %tmp16 = mul i64 %iv.2.1, %iv.1.1 + %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1 + %iv.2.2.next = add nsw i64 %iv.2.2, -1 + %tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1 + br i1 %tmp17, label %loop_2, label %exit + +exit: + %tmp10 = add i32 %iv.1.2, 3 + ret void +}