Index: include/llvm/Analysis/ScalarEvolutionExpressions.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpressions.h +++ include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -345,6 +345,12 @@ /// iteration number. const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; + /// Return the minimum bitwidth of the iteration expr to compute this + /// AddRec at the given iteration without overflow. For affine AddRecs its + /// the same as the AddRec type width, for non-affine it might be wider + /// that the AddRec type width. + unsigned minIterationWidthForEvaluateAtIteration(ScalarEvolution &SE) const; + /// Return the number of iterations of this loop that produce /// values in the specified constant range. Another way of /// looking at this is that it returns the first iteration number Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -1108,6 +1108,16 @@ // Simple SCEV method implementations //===----------------------------------------------------------------------===// +static unsigned MinIterationWidthForBinomialCoefficient(unsigned K, + Type *ResultTy, + ScalarEvolution &SE) { + unsigned W = SE.getTypeSizeInBits(ResultTy); + unsigned T = 1; + for (unsigned i = 3; i <= K; ++i) + T += APInt(W, i).countTrailingZeros(); + return W + T; +} + /// Compute BC(It, K). The result has width W. Assume, K > 0. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, @@ -1188,6 +1198,9 @@ // We need at least W + T bits for the multiplication step unsigned CalculationBits = W + T; + assert(MinIterationWidthForBinomialCoefficient(K, ResultTy, SE) == + CalculationBits && + "must be the same!"); // Calculate 2^T, at width T+W. APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); @@ -1243,6 +1256,14 @@ return Result; } +unsigned SCEVAddRecExpr::minIterationWidthForEvaluateAtIteration( + ScalarEvolution &SE) const { + if (isAffine()) + return SE.getTypeSizeInBits(getType()); + return MinIterationWidthForBinomialCoefficient(getNumOperands() - 1, + getType(), SE); +} + //===----------------------------------------------------------------------===// // SCEV Expression folder implementations //===----------------------------------------------------------------------===// Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1476,30 +1476,15 @@ Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); + + unsigned MinCanonicalIVWidth = S->minIterationWidthForEvaluateAtIteration(SE); // First check for an existing canonical IV in a suitable type. PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) - if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + if (SE.getTypeSizeInBits(PN->getType()) >= MinCanonicalIVWidth) CanonicalIV = PN; - // Rewrite an AddRec in terms of the canonical induction variable, if - // its type is more narrow. - if (CanonicalIV && - SE.getTypeSizeInBits(CanonicalIV->getType()) > - SE.getTypeSizeInBits(Ty)) { - SmallVector NewOps(S->getNumOperands()); - for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) - NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - S->getNoWrapFlags(SCEV::FlagNW))); - BasicBlock::iterator NewInsertPt = - findInsertPointAfter(cast(V), Builder.GetInsertBlock()); - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - &*NewInsertPt); - return V; - } - // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { SmallVector NewOps(S->op_begin(), S->op_end()); @@ -1540,12 +1525,14 @@ // specified loop. BasicBlock *Header = L->getHeader(); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); - CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", - &Header->front()); + auto *CanonicalIVType = + IntegerType::get(SE.getContext(), MinCanonicalIVWidth); + CanonicalIV = PHINode::Create(CanonicalIVType, std::distance(HPB, HPE), + "indvar", &Header->front()); rememberInstruction(CanonicalIV); SmallSet PredSeen; - Constant *One = ConstantInt::get(Ty, 1); + Constant *One = ConstantInt::get(CanonicalIVType, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; if (!PredSeen.insert(HP).second) { @@ -1565,17 +1552,20 @@ rememberInstruction(Add); CanonicalIV->addIncoming(Add, HP); } else { - CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); + CanonicalIV->addIncoming(Constant::getNullValue(CanonicalIVType), HP); } } } - // {0,+,1} --> Insert a canonical induction variable into the loop! + // {0,+,1} --> Return the canonical induction variable if (S->isAffine() && S->getOperand(1)->isOne()) { - assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && - "IVs with types different from the canonical IV should " - "already have been handled!"); - return CanonicalIV; + if (Ty == SE.getEffectiveSCEVType(CanonicalIV->getType())) + return CanonicalIV; + + assert(SE.getTypeSizeInBits(CanonicalIV->getType()) >= + SE.getTypeSizeInBits(Ty) && + "Canonical IV can't be more narrow than the expr!"); + return expand(SE.getTruncateExpr(SE.getUnknown(CanonicalIV), Ty)); } // {0,+,F} --> {0,+,1} * F @@ -1595,13 +1585,7 @@ // into this folder. const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. - // Promote S up to the canonical IV type, if the cast is foldable. - const SCEV *NewS = S; - const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); - if (isa(Ext)) - NewS = Ext; - - const SCEV *V = cast(NewS)->evaluateAtIteration(IH, SE); + const SCEV *V = cast(S)->evaluateAtIteration(IH, SE); //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; // Truncate the result down to the original type, if needed. Index: test/Analysis/ScalarEvolution/scev-expand-canonical-iv-type.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/scev-expand-canonical-iv-type.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; This test verifies that the SCEVExpander emits a canonical IV of a proper type +; for non-linear addrec expansion. In this test case loop-vectorize expands +; i8 {0,+,2,+,1} addrec in the outer loop. The expression to compute the addrec +; at a given iteration involves division by 2 (via shr). In order to compute +; this expression without overflow a canonical IV of the wider type i9 should be +; used. +; +; See also: +; SCEVAddRecExpr::minIterationWidthForEvaluateAtIteration, +; ScalarEvolutionsTest::SCEVExpandInsertCanonicalIV. + +define i32 @test(i8* %p) { +; CHECK-LABEL: @test +bb: + br label %outer_loop + +outer_loop: +; Check that the canonical IV inserted by expander is of i9 type +; CHECK: outer_loop: +; CHECK: %indvar = phi i9 [ %indvar.next, %outer_loop_cont ], [ 0, %bb ] + %tmp4 = phi i32 [ 0, %bb ], [ %tmp8, %outer_loop_cont ] + %tmp5 = phi i32 [ 0, %bb ], [ %tmp13, %outer_loop_cont ] + %tmp7 = phi i32 [ 1, %bb ], [ %tmp16, %outer_loop_cont ] + %tmp8 = add i32 %tmp7, %tmp4 + %tmp9 = trunc i32 %tmp8 to i8 + %tmp10 = load i8, i8* %p, align 8 + br label %inner_loop + +inner_loop: + %tmp19 = phi i8 [ %tmp10, %outer_loop ], [ %tmp23, %inner_loop ] + %tmp20 = phi i32 [ %tmp5, %outer_loop ], [ %tmp22, %inner_loop ] + %tmp21 = phi i32 [ 1, %outer_loop ], [ %tmp24, %inner_loop ] + %tmp22 = add i32 %tmp20, 1 + %tmp23 = add i8 %tmp19, %tmp9 + %tmp24 = add nuw nsw i32 %tmp21, 1 + %tmp25 = icmp ugt i32 %tmp21, 75 + br i1 %tmp25, label %outer_loop_cont, label %inner_loop + +outer_loop_cont: + %tmp12 = phi i8 [ %tmp19, %inner_loop ] + %tmp13 = phi i32 [ %tmp22, %inner_loop ] + %tmp14 = phi i8 [ %tmp23, %inner_loop ] + store i8 %tmp14, i8* %p, align 8 + %tmp15 = sext i8 %tmp12 to i32 + %tmp16 = add nuw nsw i32 %tmp7, 1 + %tmp17 = icmp ugt i32 %tmp7, 256 + br i1 %tmp17, label %exit, label %outer_loop + +exit: + %tmp2 = phi i32 [ %tmp15, %outer_loop_cont ] + ret i32 %tmp2 +} \ No newline at end of file Index: test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll =================================================================== --- test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -28,12 +28,11 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[K:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV33:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP4]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP3]], 3 ; CHECK-NEXT: store i32 [[ADD10_US]], i32* [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 @@ -41,12 +40,12 @@ ; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop !2 ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ [[BC_RESUME_VAL:%.*]], [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 -; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 +; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP4]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP5]], 1 ; CHECK-NEXT: store i32 [[ADD5_US]], i32* [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 @@ -54,10 +53,10 @@ ; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop !3 ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], [[INDVARS_IV33]] -; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 -; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP9]], [[K]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[K:%.*]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 +; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP8]], [[K]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV33]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] @@ -65,40 +64,40 @@ ; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP8]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] -; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP7]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP7]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select i1 false, i1 [[TMP11]], i1 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP15:%.*]] = or i1 false, [[TMP14]] +; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[ADD_US]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[ADD_US]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP23]], i32 0 +; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1 ; CHECK-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2 ; CHECK-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3 ; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 -; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] Index: unittests/Analysis/ScalarEvolutionTest.cpp =================================================================== --- unittests/Analysis/ScalarEvolutionTest.cpp +++ unittests/Analysis/ScalarEvolutionTest.cpp @@ -1444,5 +1444,234 @@ EXPECT_EQ(S2S->getExpressionSize(), 5u); } +/// This test verifies that the SCEVExpander emits a canonical IV of a proper +/// type while expanding non-linear addrecs. +/// +/// See also: +/// SCEVAddRecExpr::minIterationWidthForEvaluateAtIteration +/// scev-expand-canonical-iv-type.ll +TEST_F(ScalarEvolutionsTest, SCEVExpandInsertCanonicalIV) { + LLVMContext C; + SMDiagnostic Err; + + // Expand the addrec produced by GetAddRec into a loop without a canonical IV. + // SCEVExpander will produce one to expand the addrec. Check the the type of + // the inserted IV is equal to ExpectedCanonicalIVWidth. + auto TestNoCanonicalIV = [&]( + std::function GetAddRec, + unsigned ExpectedCanonicalIVWidth) { + std::unique_ptr M = + parseAssemblyString("define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + auto *Loop = LI.getLoopFor(I.getParent()); + EXPECT_FALSE(Loop->getCanonicalInductionVariable()); + + auto *AR = GetAddRec(SE, Loop); + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Exp.expandCodeFor(AR, nullptr, InsertAt); + PHINode *CanonicalIV = Loop->getCanonicalInductionVariable(); + unsigned CanonicalIVBitWidth = + cast(CanonicalIV->getType())->getBitWidth(); + EXPECT_EQ(CanonicalIVBitWidth, ExpectedCanonicalIVWidth); + }); + }; + + // Expand the addrec produced by GetAddRec into a loop with a canonical IV + // which is narrower than ExpectedCanonicalIVWidth. + // SCEVExpander will produce a canonical IV of a wider type to expand the + // addrec. Check the the type of the inserted IV is equal to + // ExpectedCanonicalIVWidth. + auto TestNarrowCanonicalIV = [&]( + std::function GetAddRec, + unsigned ExpectedCanonicalIVWidth) { + std::unique_ptr M = parseAssemblyString( + "define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %canonical.iv = phi i8 [ 0, %entry ], [ %canonical.iv.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %canonical.iv.inc = add i8 %canonical.iv, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + + auto *LoopHeaderBB = I.getParent(); + auto *Loop = LI.getLoopFor(LoopHeaderBB); + PHINode *CanonicalIV = Loop->getCanonicalInductionVariable(); + EXPECT_EQ(CanonicalIV, &GetInstByName(F, "canonical.iv")); + + unsigned CanonicalIVBitWidth = + cast(CanonicalIV->getType())->getBitWidth(); + EXPECT_LT(CanonicalIVBitWidth, ExpectedCanonicalIVWidth); + + auto *AR = GetAddRec(SE, Loop); + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Exp.expandCodeFor(AR, nullptr, InsertAt); + + // Loop over all of the PHI nodes, looking for the new canonical indvar. + PHINode *NewCanonicalIV = nullptr; + for (BasicBlock::iterator i = LoopHeaderBB->begin(); isa(i); + ++i) { + PHINode *PN = cast(i); + if (PN == &I || PN == CanonicalIV) + continue; + EXPECT_FALSE(NewCanonicalIV); + NewCanonicalIV = PN; + } + + // Check that NewCanonicalIV is a canonical IV, i.e {0,+,1} + BasicBlock *Incoming = nullptr, *Backedge = nullptr; + EXPECT_TRUE(Loop->getIncomingAndBackEdge(Incoming, Backedge)); + auto *Start = NewCanonicalIV->getIncomingValueForBlock(Incoming); + EXPECT_TRUE(isa(Start)); + EXPECT_TRUE(dyn_cast(Start)->isZero()); + auto *Next = NewCanonicalIV->getIncomingValueForBlock(Backedge); + EXPECT_TRUE(isa(Next)); + auto *NextBinOp = dyn_cast(Next); + EXPECT_EQ(NextBinOp->getOpcode(), Instruction::Add); + EXPECT_EQ(NextBinOp->getOperand(0), NewCanonicalIV); + auto *Step = NextBinOp->getOperand(1); + EXPECT_TRUE(isa(Step)); + EXPECT_TRUE(dyn_cast(Step)->isOne()); + + unsigned NewCanonicalIVBitWidth = + cast(NewCanonicalIV->getType())->getBitWidth(); + EXPECT_EQ(NewCanonicalIVBitWidth, ExpectedCanonicalIVWidth); + }); + }; + + // Expand the addrec produced by GetAddRec into a loop with a canonical IV + // of width ExpectedCanonicalIVWidth. + // To expand the addrec SCEVExpander should use the existing canonical IV. + auto TestMatchingCanonicalIV = [&]( + std::function GetAddRec, + unsigned ExpectedCanonicalIVWidth) { + auto ExpectedCanonicalIVTypeStr = + "i" + std::to_string(ExpectedCanonicalIVWidth); + std::unique_ptr M = parseAssemblyString( + "define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %canonical.iv = phi " + ExpectedCanonicalIVTypeStr + + " [ 0, %entry ], [ %canonical.iv.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %canonical.iv.inc = add " + ExpectedCanonicalIVTypeStr + + " %canonical.iv, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + auto &CanonicalIV = GetInstByName(F, "canonical.iv"); + + auto *LoopHeaderBB = I.getParent(); + auto *Loop = LI.getLoopFor(LoopHeaderBB); + EXPECT_EQ(&CanonicalIV, Loop->getCanonicalInductionVariable()); + unsigned CanonicalIVBitWidth = + cast(CanonicalIV.getType())->getBitWidth(); + EXPECT_EQ(CanonicalIVBitWidth, ExpectedCanonicalIVWidth); + + auto *AR = GetAddRec(SE, Loop); + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Exp.expandCodeFor(AR, nullptr, InsertAt); + + // Loop over all of the PHI nodes, looking if a new canonical indvar was + // introduced. + PHINode *NewCanonicalIV = nullptr; + for (BasicBlock::iterator i = LoopHeaderBB->begin(); isa(i); + ++i) { + PHINode *PN = cast(i); + if (PN == &I || PN == &CanonicalIV) + continue; + NewCanonicalIV = PN; + } + EXPECT_FALSE(NewCanonicalIV); + }); + }; + + unsigned ARBitwidth = 16; + Type *ARType = IntegerType::get(C, ARBitwidth); + + // Expand {5,+,1}, expect ARBitwidth canonical IV width + auto GetAR2 = [&](ScalarEvolution &SE, Loop *L) -> const SCEV * { + return SE.getAddRecExpr(SE.getConstant(APInt(ARBitwidth, 5)), + SE.getOne(ARType), L, SCEV::FlagAnyWrap); + }; + TestNoCanonicalIV(GetAR2, ARBitwidth); + TestNarrowCanonicalIV(GetAR2, ARBitwidth); + TestMatchingCanonicalIV(GetAR2, ARBitwidth); + + // Expand {5,+,1,+,1}, expect ARBitwidth+1 canonical IV width + auto GetAR3 = [&](ScalarEvolution &SE, Loop *L) -> const SCEV * { + SmallVector Ops = {SE.getConstant(APInt(ARBitwidth, 5)), + SE.getOne(ARType), SE.getOne(ARType)}; + return SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap); + }; + TestNoCanonicalIV(GetAR3, ARBitwidth + 1); + TestNarrowCanonicalIV(GetAR3, ARBitwidth + 1); + TestMatchingCanonicalIV(GetAR3, ARBitwidth + 1); + + // Expand {5,+,1,+,1,+,1}, expect ARBitwidth+1 canonical IV width + auto GetAR4 = [&](ScalarEvolution &SE, Loop *L) -> const SCEV * { + SmallVector Ops = {SE.getConstant(APInt(ARBitwidth, 5)), + SE.getOne(ARType), SE.getOne(ARType), + SE.getOne(ARType)}; + return SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap); + }; + TestNoCanonicalIV(GetAR4, ARBitwidth + 1); + TestNarrowCanonicalIV(GetAR4, ARBitwidth + 1); + TestMatchingCanonicalIV(GetAR4, ARBitwidth + 1); + + // Expand {5,+,1,+,1,+,1,+,1}, expect ARBitwidth+3 canonical IV width + auto GetAR5 = [&](ScalarEvolution &SE, Loop *L) -> const SCEV * { + SmallVector Ops = {SE.getConstant(APInt(ARBitwidth, 5)), + SE.getOne(ARType), SE.getOne(ARType), + SE.getOne(ARType), SE.getOne(ARType)}; + return SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap); + }; + TestNoCanonicalIV(GetAR5, ARBitwidth + 3); + TestNarrowCanonicalIV(GetAR5, ARBitwidth + 3); + TestMatchingCanonicalIV(GetAR5, ARBitwidth + 3); +} + } // end anonymous namespace } // end namespace llvm