Index: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -77,9 +77,13 @@ /// Phis that complete an IV chain. Reuse DenseSet> ChainedPhis; - /// When true, expressions are expanded in "canonical" form. In particular, - /// addrecs are expanded as arithmetic based on a canonical induction - /// variable. When false, expression are expanded in a more literal form. + /// When true, SCEVExpander tries to expand expressions in "canonical" form. + /// When false, expressions are expanded in a more literal form. + /// + /// In "canonical" form addrecs are expanded as arithmetic based on a + /// canonical induction variable. Note that CanonicalMode doesn't guarantee + /// that all expressions are expanded in "canonical" form. For some + /// expressions literal mode can be preferred. bool CanonicalMode; /// When invoked from LSR, the expander is in "strength reduction" mode. The Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1486,7 +1486,18 @@ } Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - if (!CanonicalMode) return expandAddRecExprLiterally(S); + // In canonical mode we compute the addrec as an expression of a canonical IV + // using evaluateAtIteration and expand the resulting SCEV expression. This + // way we avoid introducing new IVs to carry on the comutation of the addrec + // throughout the loop. + // + // For nested addrecs evaluateAtIteration might need a canonical IV of a + // type wider than the addrec itself. Emitting a canonical IV of the + // proper type might produce non-legal types, for example expanding an i64 + // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall + // back to non-canonical mode for nested addrecs. + if (!CanonicalMode || (S->getNumOperands() > 2)) + return expandAddRecExprLiterally(S); Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); Index: llvm/trunk/unittests/Analysis/ScalarEvolutionTest.cpp =================================================================== --- llvm/trunk/unittests/Analysis/ScalarEvolutionTest.cpp +++ llvm/trunk/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1745,4 +1745,186 @@ }); } +// Test expansion of nested addrecs in CanonicalMode. +// Expanding nested addrecs in canonical mode requiers a canonical IV of a +// type wider than the type of the addrec itself. Currently, SCEVExpander +// just falls back to literal mode for nested addrecs. +TEST_F(ScalarEvolutionsTest, SCEVExpandNonAffineAddRec) { + LLVMContext C; + SMDiagnostic Err; + + // Expand the addrec produced by GetAddRec into a loop without a canonical IV. + auto TestNoCanonicalIV = [&](std::function GetAddRec) { + std::unique_ptr M = + parseAssemblyString("define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + auto *Loop = LI.getLoopFor(I.getParent()); + EXPECT_FALSE(Loop->getCanonicalInductionVariable()); + + auto *AR = GetAddRec(SE, Loop); + EXPECT_FALSE(AR->isAffine()); + + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Value *V = Exp.expandCodeFor(AR, nullptr, InsertAt); + auto *ExpandedAR = SE.getSCEV(V); + // Check that the expansion happened literally. + EXPECT_EQ(AR, ExpandedAR); + }); + }; + + // Expand the addrec produced by GetAddRec into a loop with a canonical IV + // which is narrower than addrec type. + auto TestNarrowCanonicalIV = [&]( + std::function + GetAddRec) { + std::unique_ptr M = parseAssemblyString( + "define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %canonical.iv = phi i8 [ 0, %entry ], [ %canonical.iv.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %canonical.iv.inc = add i8 %canonical.iv, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + + auto *LoopHeaderBB = I.getParent(); + auto *Loop = LI.getLoopFor(LoopHeaderBB); + PHINode *CanonicalIV = Loop->getCanonicalInductionVariable(); + EXPECT_EQ(CanonicalIV, &GetInstByName(F, "canonical.iv")); + + auto *AR = GetAddRec(SE, Loop); + EXPECT_FALSE(AR->isAffine()); + + unsigned ExpectedCanonicalIVWidth = SE.getTypeSizeInBits(AR->getType()); + unsigned CanonicalIVBitWidth = + cast(CanonicalIV->getType())->getBitWidth(); + EXPECT_LT(CanonicalIVBitWidth, ExpectedCanonicalIVWidth); + + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Value *V = Exp.expandCodeFor(AR, nullptr, InsertAt); + auto *ExpandedAR = SE.getSCEV(V); + // Check that the expansion happened literally. + EXPECT_EQ(AR, ExpandedAR); + }); + }; + + // Expand the addrec produced by GetAddRec into a loop with a canonical IV + // of addrec width. + auto TestMatchingCanonicalIV = [&]( + std::function + GetAddRec, + unsigned ARBitWidth) { + auto ARBitWidthTypeStr = "i" + std::to_string(ARBitWidth); + std::unique_ptr M = parseAssemblyString( + "define i32 @test(i32 %limit) { " + "entry: " + " br label %loop " + "loop: " + " %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] " + " %canonical.iv = phi " + ARBitWidthTypeStr + + " [ 0, %entry ], [ %canonical.iv.inc, %loop ] " + " %i.inc = add nsw i32 %i, 1 " + " %canonical.iv.inc = add " + ARBitWidthTypeStr + + " %canonical.iv, 1 " + " %cont = icmp slt i32 %i.inc, %limit " + " br i1 %cont, label %loop, label %exit " + "exit: " + " ret i32 %i.inc " + "}", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto &I = GetInstByName(F, "i"); + auto &CanonicalIV = GetInstByName(F, "canonical.iv"); + + auto *LoopHeaderBB = I.getParent(); + auto *Loop = LI.getLoopFor(LoopHeaderBB); + EXPECT_EQ(&CanonicalIV, Loop->getCanonicalInductionVariable()); + unsigned CanonicalIVBitWidth = + cast(CanonicalIV.getType())->getBitWidth(); + + auto *AR = GetAddRec(SE, Loop); + EXPECT_FALSE(AR->isAffine()); + EXPECT_EQ(ARBitWidth, SE.getTypeSizeInBits(AR->getType())); + EXPECT_EQ(CanonicalIVBitWidth, ARBitWidth); + + SCEVExpander Exp(SE, M->getDataLayout(), "expander"); + auto *InsertAt = I.getNextNode(); + Value *V = Exp.expandCodeFor(AR, nullptr, InsertAt); + auto *ExpandedAR = SE.getSCEV(V); + // Check that the expansion happened literally. + EXPECT_EQ(AR, ExpandedAR); + }); + }; + + unsigned ARBitWidth = 16; + Type *ARType = IntegerType::get(C, ARBitWidth); + + // Expand {5,+,1,+,1} + auto GetAR3 = [&](ScalarEvolution &SE, Loop *L) -> const SCEVAddRecExpr * { + SmallVector Ops = {SE.getConstant(APInt(ARBitWidth, 5)), + SE.getOne(ARType), SE.getOne(ARType)}; + return cast(SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap)); + }; + TestNoCanonicalIV(GetAR3); + TestNarrowCanonicalIV(GetAR3); + TestMatchingCanonicalIV(GetAR3, ARBitWidth); + + // Expand {5,+,1,+,1,+,1} + auto GetAR4 = [&](ScalarEvolution &SE, Loop *L) -> const SCEVAddRecExpr * { + SmallVector Ops = {SE.getConstant(APInt(ARBitWidth, 5)), + SE.getOne(ARType), SE.getOne(ARType), + SE.getOne(ARType)}; + return cast(SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap)); + }; + TestNoCanonicalIV(GetAR4); + TestNarrowCanonicalIV(GetAR4); + TestMatchingCanonicalIV(GetAR4, ARBitWidth); + + // Expand {5,+,1,+,1,+,1,+,1} + auto GetAR5 = [&](ScalarEvolution &SE, Loop *L) -> const SCEVAddRecExpr * { + SmallVector Ops = {SE.getConstant(APInt(ARBitWidth, 5)), + SE.getOne(ARType), SE.getOne(ARType), + SE.getOne(ARType), SE.getOne(ARType)}; + return cast(SE.getAddRecExpr(Ops, L, SCEV::FlagAnyWrap)); + }; + TestNoCanonicalIV(GetAR5); + TestNarrowCanonicalIV(GetAR5); + TestMatchingCanonicalIV(GetAR5, ARBitWidth); +} + } // end namespace llvm