Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -157,6 +157,7 @@ // Find increment and limit from the compare Increment = nullptr; + Value *LatchValue = InductionPHI->getIncomingValueForBlock(Latch); if (match(Compare->getOperand(0), m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { Increment = dyn_cast(Compare->getOperand(0)); @@ -166,6 +167,18 @@ m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { Increment = dyn_cast(Compare->getOperand(1)); Limit = Compare->getOperand(0); + } else if (match(LatchValue, + m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { + // The compare may have been altered by another transformation (e.g icmp ult + // %inc, limit -> icmp ult %j, limit-1), where limit is a constant. In this + // case the increment is obtained from the InductionPHI and the limit is the + // RHS of the compare + 1. + assert(Compare->getOperand(0) == InductionPHI); + Increment = dyn_cast(LatchValue); + ConstantInt *RHS = dyn_cast(Compare->getOperand(1)); + ConstantInt *One = ConstantInt::get(RHS->getType(), 1, true); + Limit = ConstantInt::get(Compare->getContext(), + RHS->getValue() + One->getValue()); } if (!Increment || Increment->hasNUsesOrMore(3)) { LLVM_DEBUG(dbgs() << "Cound not find valid increment\n"); @@ -185,7 +198,10 @@ auto *CI = dyn_cast( InductionPHI->getIncomingValueForBlock(L->getLoopPreheader())); - if (!CI || !CI->isZero()) { + if (!CI) { + LLVM_DEBUG(dbgs() << "PHI value is not zero \n"); + return false; + } else if (!CI->isZero()) { LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump()); return false; } @@ -379,7 +395,14 @@ m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)), m_Value(MatchedItCount))); - if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) { + // The use is in the compare which is also the condition of the inner + // branch. In this case the compare has been altered by another + // transformation (e.g icmp ult %inc, limit -> icmp ult %j, limit-1), where + // limit is a constant. Ignore this use as the compare gets removed later + // anyway. + if (U == FI.InnerBranch->getCondition()) + continue; + else if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) { LLVM_DEBUG(dbgs() << "Use is optimisable\n"); ValidOuterPHIUses.insert(MatchedMul); FI.LinearIVUses.insert(U); Index: llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll =================================================================== --- llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll +++ llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll @@ -341,6 +341,109 @@ ret i32 10 } +; test_10, test_11 and test_12 are for the case when the inner limit is a +; constant (e.g. 20), then InstCombine makes the transformation: +; icmp ult i32 %inc, 20 -> icmp ult i32 %j, 20-step. + +; test_10: The step is not 1. +define i32 @test_10(i32* nocapture %A) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %i.017 = phi i32 [ 0, %entry ], [ %inc, %for.cond.cleanup3 ] + %mul = mul i32 %i.017, 20 + br label %for.body4 + +for.body4: + %j.016 = phi i32 [ 0, %for.cond1.preheader ], [ %add5, %for.body4 ] + %add = add i32 %j.016, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + store i32 30, i32* %arrayidx, align 4 + %add5 = add nuw nsw i32 %j.016, 2 + %cmp2 = icmp ult i32 %j.016, 18 + br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 + +for.cond.cleanup3: + %inc = add i32 %i.017, 1 + %cmp = icmp ult i32 %inc, 11 + br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup + +for.cond.cleanup: + %0 = load i32, i32* %A, align 4 + ret i32 %0 +} + +; test_11: The inner inducation variable is used in a compare which +; isn't the condition of the inner branch. +define i32 @test_11(i32* nocapture %A) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %i.020 = phi i32 [ 0, %entry ], [ %inc7, %for.cond.cleanup3 ] + %mul = mul i32 %i.020, 20 + br label %for.body4 + +for.body4: + %j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ] + %cmp5 = icmp ult i32 %j.019, 5 + %cond = select i1 %cmp5, i32 30, i32 15 + %add = add i32 %j.019, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + store i32 %cond, i32* %arrayidx, align 4 + %inc = add nuw nsw i32 %j.019, 1 + %cmp2 = icmp ult i32 %j.019, 19 + br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 + +for.cond.cleanup3: + %inc7 = add i32 %i.020, 1 + %cmp = icmp ult i32 %inc7, 11 + br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup + +for.cond.cleanup: + %0 = load i32, i32* %A, align 4 + ret i32 %0 +} + +; test_12: Incoming phi node value for preheader is a variable +define i32 @test_12(i32* %A) { +entry: + br label %while.cond1.preheader + +while.cond1.preheader: + %j.017 = phi i32 [ 0, %entry ], [ %j.1, %while.end ] + %i.016 = phi i32 [ 0, %entry ], [ %inc4, %while.end ] + %mul = mul i32 %i.016, 20 + %cmp214 = icmp ult i32 %j.017, 20 + br i1 %cmp214, label %while.body3.preheader, label %while.end + +while.body3.preheader: + br label %while.body3 + +while.body3: + %j.115 = phi i32 [ %inc, %while.body3 ], [ %j.017, %while.body3.preheader ] + %add = add i32 %j.115, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + store i32 30, i32* %arrayidx, align 4 + %inc = add nuw nsw i32 %j.115, 1 + %cmp2 = icmp ult i32 %j.115, 19 + br i1 %cmp2, label %while.body3, label %while.end.loopexit + +while.end.loopexit: + %inc.lcssa = phi i32 [ %inc, %while.body3 ] + br label %while.end + +while.end: + %j.1 = phi i32 [ %j.017, %while.cond1.preheader], [ %inc.lcssa, %while.end.loopexit ] + %inc4 = add i32 %i.016, 1 + %cmp = icmp ult i32 %inc4, 11 + br i1 %cmp, label %while.cond1.preheader, label %while.end5 + +while.end5: + %0 = load i32, i32* %A, align 4 + ret i32 %0 +} ; Outer loop conditional phi define i32 @e() { Index: llvm/test/Transforms/LoopFlatten/loop-flatten.ll =================================================================== --- llvm/test/Transforms/LoopFlatten/loop-flatten.ll +++ llvm/test/Transforms/LoopFlatten/loop-flatten.ll @@ -586,6 +586,59 @@ ret i32 10 } +; When the inner loop limit is a defined integer (e.g. 20) and the step +; is 1, InstCombine causes the transformation: +; icmp ult i32 %inc, 20 -> icmp ult i32 %j, 19. +; This is an 'unoptimizable' use of the inner induction variable %j but +; we should still flatten the loop as this compare instruction is +; removed later anyway. +define i32 @test9(i32* nocapture %A) { +entry: + br label %for.cond1.preheader +; CHECK-LABEL: test9 +; CHECK: entry: +; CHECK: %flatten.tripcount = mul i32 20, 11 +; CHECK: br label %for.cond1.preheader + +for.cond1.preheader: + %i.017 = phi i32 [ 0, %entry ], [ %inc6, %for.cond.cleanup3 ] + %mul = mul i32 %i.017, 20 + br label %for.body4 +; CHECK: for.cond1.preheader: +; CHECK: %i.017 = phi i32 [ 0, %entry ], [ %inc6, %for.cond.cleanup3 ] +; CHECK: %mul = mul i32 %i.017, 20 +; CHECK: br label %for.body4 + +for.cond.cleanup3: + %inc6 = add i32 %i.017, 1 + %cmp = icmp ult i32 %inc6, 11 + br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup +; CHECK: for.cond.cleanup3: +; CHECK: %inc6 = add i32 %i.017, 1 +; CHECK: %cmp = icmp ult i32 %inc6, %flatten.tripcount +; CHECK: br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup + +for.body4: + %j.016 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ] + %add = add i32 %j.016, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + store i32 30, i32* %arrayidx, align 4 + %inc = add nuw nsw i32 %j.016, 1 + %cmp2 = icmp ult i32 %j.016, 19 + br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 +; CHECK: for.body4 +; CHECK: %j.016 = phi i32 [ 0, %for.cond1.preheader ] +; CHECK: %add = add i32 %j.016, %mul +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.017 +; CHECK: store i32 30, i32* %arrayidx, align 4 +; CHECK: %inc = add nuw nsw i32 %j.016, 1 +; CHECK: %cmp2 = icmp ult i32 %j.016, 19 +; CHECK: br label %for.cond.cleanup3 + +for.cond.cleanup: + %0 = load i32, i32* %A, align 4 + ret i32 %0 +} declare i32 @func(i32)