Index: include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpander.h +++ include/llvm/Analysis/ScalarEvolutionExpander.h @@ -77,9 +77,13 @@ /// Phis that complete an IV chain. Reuse DenseSet> ChainedPhis; - /// When true, expressions are expanded in "canonical" form. In particular, - /// addrecs are expanded as arithmetic based on a canonical induction - /// variable. When false, expression are expanded in a more literal form. + /// When true, SCEVExpander tries to expand expressions in "canonical" form. + /// When false, expressions are expanded in a more literal form. + /// + /// In "canonical" form addrecs are expanded as arithmetic based on a + /// canonical induction variable. Note that CanonicalMode doesn't guarantee + /// that all expressions are expanded in "canonical" form. For some + /// expressions literal mode can be preferred. bool CanonicalMode; /// When invoked from LSR, the expander is in "strength reduction" mode. The Index: include/llvm/Analysis/ScalarEvolutionExpressions.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpressions.h +++ include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -343,8 +343,18 @@ /// Return the value of this chain of recurrences at the specified /// iteration number. + /// + /// Note that for the resulting expression to be correct the iteration + /// number can't be narrower than what + /// minIterationWidthForEvaluateAtIteration returns. const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; + /// Return the minimum bitwidth of the iteration expr to compute this + /// AddRec at the given iteration without overflow. For affine AddRecs its + /// the same as the AddRec type width, for non-affine it might be wider + /// that the AddRec type width. + unsigned minIterationWidthForEvaluateAtIteration(ScalarEvolution &SE) const; + /// Return the number of iterations of this loop that produce /// values in the specified constant range. Another way of /// looking at this is that it returns the first iteration number Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -1108,6 +1108,16 @@ // Simple SCEV method implementations //===----------------------------------------------------------------------===// +static unsigned MinIterationWidthForBinomialCoefficient(unsigned K, + Type *ResultTy, + ScalarEvolution &SE) { + unsigned W = SE.getTypeSizeInBits(ResultTy); + unsigned T = 1; + for (unsigned i = 3; i <= K; ++i) + T += APInt(W, i).countTrailingZeros(); + return W + T; +} + /// Compute BC(It, K). The result has width W. Assume, K > 0. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, @@ -1188,6 +1198,9 @@ // We need at least W + T bits for the multiplication step unsigned CalculationBits = W + T; + assert(MinIterationWidthForBinomialCoefficient(K, ResultTy, SE) == + CalculationBits && + "must be the same!"); // Calculate 2^T, at width T+W. APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); @@ -1243,6 +1256,14 @@ return Result; } +unsigned SCEVAddRecExpr::minIterationWidthForEvaluateAtIteration( + ScalarEvolution &SE) const { + if (isAffine()) + return SE.getTypeSizeInBits(getType()); + return MinIterationWidthForBinomialCoefficient(getNumOperands() - 1, + getType(), SE); +} + //===----------------------------------------------------------------------===// // SCEV Expression folder implementations //===----------------------------------------------------------------------===// Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1472,7 +1472,19 @@ } Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - if (!CanonicalMode) return expandAddRecExprLiterally(S); + // In canonical mode we compute the addrec as an expression of a canonical IV + // using evaluateAtIteration and expand the resulting SCEV expression. This + // way we avoid introducing new IVs to carry on the comutation of the addrec + // throughout the loop. + // + // For non-affine addrecs evaluateAtIteration might need a canonical IV of a + // type wider than the addrec itself (see SCEVAddRecExpr:: + // minIterationWidthForEvaluateAtIteration). Emitting a canonical IV of the + // proper type might produce non-legal types, for example expanding an i64 + // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall + // back to non-canonical mode for non-affine addrecs. + if (!CanonicalMode || !S->isAffine()) + return expandAddRecExprLiterally(S); Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); @@ -1594,6 +1606,12 @@ // simplify the expression without having to build a bunch of special code // into this folder. const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. + // evaluateAtIteration for non-affine addrecs needs canonical IV of a wider + // type. For such addrecs we fall back to non-canonical (literal) expansion + // mode. + assert(SE.getTypeSizeInBits(IH->getType()) >= + S->minIterationWidthForEvaluateAtIteration(SE) && + "Can't use this canonical IV for evaluateAtIteration"); // Promote S up to the canonical IV type, if the cast is foldable. const SCEV *NewS = S; Index: test/Analysis/ScalarEvolution/scev-expander-non-affine.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/scev-expander-non-affine.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; This test verifies that SCEVExpander correctly expands non-affine addrecs in +; "CanonicalMode". Expanding non-affine addrecs in canonical mode takes a +; canonical IV of a type wider than the type of the addrec itself. See +; SCEVAddRecExpr::minIterationWidthForEvaluateAtIteration comment. +; Currently, SCEVExpander just falls back to literal mode for non-affine +; addrecs. +; +; In this test case loop-vectorize expands i8 {0,+,2,+,1} addrec in the outer +; loop. The test checks that after the transform outer_loop has an IV literally +; representing the addrec. + +define i32 @test(i8* %p) { +; CHECK-LABEL: @test +bb: + br label %outer_loop + +outer_loop: +; CHECK: outer_loop: +; CHECK: %induction.iv = phi i8 [ %induction.iv.next, %outer_loop_cont ], [ 1, %bb ] + %tmp4 = phi i32 [ 0, %bb ], [ %tmp8, %outer_loop_cont ] + %tmp5 = phi i32 [ 0, %bb ], [ %tmp13, %outer_loop_cont ] + %tmp7 = phi i32 [ 1, %bb ], [ %tmp16, %outer_loop_cont ] + %tmp8 = add i32 %tmp7, %tmp4 + %tmp9 = trunc i32 %tmp8 to i8 + %tmp10 = load i8, i8* %p, align 8 + br label %inner_loop + +inner_loop: + %tmp19 = phi i8 [ %tmp10, %outer_loop ], [ %tmp23, %inner_loop ] + %tmp20 = phi i32 [ %tmp5, %outer_loop ], [ %tmp22, %inner_loop ] + %tmp21 = phi i32 [ 1, %outer_loop ], [ %tmp24, %inner_loop ] + %tmp22 = add i32 %tmp20, 1 + %tmp23 = add i8 %tmp19, %tmp9 + %tmp24 = add nuw nsw i32 %tmp21, 1 + %tmp25 = icmp ugt i32 %tmp21, 75 + br i1 %tmp25, label %outer_loop_cont, label %inner_loop + +outer_loop_cont: +; CHECK: outer_loop_cont: +; CHECK: %indvar.next = add i8 %indvar, 1 + %tmp12 = phi i8 [ %tmp19, %inner_loop ] + %tmp13 = phi i32 [ %tmp22, %inner_loop ] + %tmp14 = phi i8 [ %tmp23, %inner_loop ] + store i8 %tmp14, i8* %p, align 8 + %tmp15 = sext i8 %tmp12 to i32 + %tmp16 = add nuw nsw i32 %tmp7, 1 + %tmp17 = icmp ugt i32 %tmp7, 256 + br i1 %tmp17, label %exit, label %outer_loop + +exit: + %tmp2 = phi i32 [ %tmp15, %outer_loop_cont ] + ret i32 %tmp2 +} \ No newline at end of file Index: unittests/Analysis/ScalarEvolutionTest.cpp =================================================================== --- unittests/Analysis/ScalarEvolutionTest.cpp +++ unittests/Analysis/ScalarEvolutionTest.cpp @@ -1637,5 +1637,189 @@ TestMatchingCanonicalIV(GetAR2, ARBitWidth); } +// Test expansion of non-affine addrecs in CanonicalMode. +// Expanding non-affine addrecs in canonical mode takes a canonical IV of a +// type wider than the type of the addrec itself. See SCEVAddRecExpr:: +// minIterationWidthForEvaluateAtIteration comment. Currently, SCEVExpander +// just falls back to literal mode for non-affine addrecs. +TEST_F(ScalarEvolutionsTest, SCEVExpandNonAffineAddRec) { + LLVMContext C; + SMDiagnostic Err; + + // Expand the addrec produced by GetAddRec into a loop without a canonical IV. + auto TestNoCanonicalIV = [&](std::function GetAddRec) { + std::unique_ptr M = + parseAssemblyString("define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + auto *Loop = LI.getLoopFor(I.getParent()); + EXPECT_FALSE(Loop->getCanonicalInductionVariable()); + + auto *AR = GetAddRec(SE, Loop); + EXPECT_FALSE(AR->isAffine()); + + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Value *V = Exp.expandCodeFor(AR, nullptr, InsertAt); + auto *ExpandedAR = SE.getSCEV(V); + // Check that the expansion happened literally. + EXPECT_EQ(AR, ExpandedAR); + }); + }; + + // Expand the addrec produced by GetAddRec into a loop with a canonical IV + // which is narrower than addrec type. + auto TestNarrowCanonicalIV = [&]( + std::function + GetAddRec) { + std::unique_ptr M = parseAssemblyString( + "define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %canonical.iv = phi i8 [ 0, %entry ], [ %canonical.iv.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %canonical.iv.inc = add i8 %canonical.iv, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + + auto *LoopHeaderBB = I.getParent(); + auto *Loop = LI.getLoopFor(LoopHeaderBB); + PHINode *CanonicalIV = Loop->getCanonicalInductionVariable(); + EXPECT_EQ(CanonicalIV, &GetInstByName(F, "canonical.iv")); + + auto *AR = GetAddRec(SE, Loop); + EXPECT_FALSE(AR->isAffine()); + + unsigned ExpectedCanonicalIVWidth = SE.getTypeSizeInBits(AR->getType()); + unsigned CanonicalIVBitWidth = + cast(CanonicalIV->getType())->getBitWidth(); + EXPECT_LT(CanonicalIVBitWidth, ExpectedCanonicalIVWidth); + + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Value *V = Exp.expandCodeFor(AR, nullptr, InsertAt); + auto *ExpandedAR = SE.getSCEV(V); + // Check that the expansion happened literally. + EXPECT_EQ(AR, ExpandedAR); + }); + }; + + // Expand the addrec produced by GetAddRec into a loop with a canonical IV + // of addrec width. + auto TestMatchingCanonicalIV = [&]( + std::function + GetAddRec, + unsigned ARBitWidth) { + auto ARBitWidthTypeStr = "i" + std::to_string(ARBitWidth); + std::unique_ptr M = parseAssemblyString( + "define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %canonical.iv = phi " + ARBitWidthTypeStr + + " [ 0, %entry ], [ %canonical.iv.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %canonical.iv.inc = add " + ARBitWidthTypeStr + + " %canonical.iv, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + auto &CanonicalIV = GetInstByName(F, "canonical.iv"); + + auto *LoopHeaderBB = I.getParent(); + auto *Loop = LI.getLoopFor(LoopHeaderBB); + EXPECT_EQ(&CanonicalIV, Loop->getCanonicalInductionVariable()); + unsigned CanonicalIVBitWidth = + cast(CanonicalIV.getType())->getBitWidth(); + + auto *AR = GetAddRec(SE, Loop); + EXPECT_FALSE(AR->isAffine()); + EXPECT_EQ(ARBitWidth, SE.getTypeSizeInBits(AR->getType())); + EXPECT_EQ(CanonicalIVBitWidth, ARBitWidth); + + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Value *V = Exp.expandCodeFor(AR, nullptr, InsertAt); + auto *ExpandedAR = SE.getSCEV(V); + // Check that the expansion happened literally. + EXPECT_EQ(AR, ExpandedAR); + }); + }; + + unsigned ARBitWidth = 16; + Type *ARType = IntegerType::get(C, ARBitWidth); + + // Expand {5,+,1,+,1} + auto GetAR3 = [&](ScalarEvolution &SE, Loop *L) -> const SCEVAddRecExpr * { + SmallVector Ops = {SE.getConstant(APInt(ARBitWidth, 5)), + SE.getOne(ARType), SE.getOne(ARType)}; + return cast(SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap)); + }; + TestNoCanonicalIV(GetAR3); + TestNarrowCanonicalIV(GetAR3); + TestMatchingCanonicalIV(GetAR3, ARBitWidth); + + // Expand {5,+,1,+,1,+,1} + auto GetAR4 = [&](ScalarEvolution &SE, Loop *L) -> const SCEVAddRecExpr * { + SmallVector Ops = {SE.getConstant(APInt(ARBitWidth, 5)), + SE.getOne(ARType), SE.getOne(ARType), + SE.getOne(ARType)}; + return cast(SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap)); + }; + TestNoCanonicalIV(GetAR4); + TestNarrowCanonicalIV(GetAR4); + TestMatchingCanonicalIV(GetAR4, ARBitWidth); + + // Expand {5,+,1,+,1,+,1,+,1} + auto GetAR5 = [&](ScalarEvolution &SE, Loop *L) -> const SCEVAddRecExpr * { + SmallVector Ops = {SE.getConstant(APInt(ARBitWidth, 5)), + SE.getOne(ARType), SE.getOne(ARType), + SE.getOne(ARType), SE.getOne(ARType)}; + return cast(SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap)); + }; + TestNoCanonicalIV(GetAR5); + TestNarrowCanonicalIV(GetAR5); + TestMatchingCanonicalIV(GetAR5, ARBitWidth); +} + + } // end anonymous namespace } // end namespace llvm