Index: llvm/trunk/include/llvm/Transforms/Utils/SimplifyIndVar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ llvm/trunk/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -26,6 +26,7 @@ class LoopInfo; class PHINode; class ScalarEvolution; +class SCEVExpander; /// Interface for visiting interesting IV users that are recognized but not /// simplified by this utility. @@ -47,7 +48,7 @@ /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl &Dead, - IVVisitor *V = nullptr); + SCEVExpander &Rewriter, IVVisitor *V = nullptr); /// SimplifyLoopIVs - Simplify users of induction variables within this /// loop. This does not actually change or add IVs. Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1759,7 +1759,8 @@ // Information about sign/zero extensions of CurrIV. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor); + Changed |= + simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); if (Visitor.WI.WidestNativeType) { WideIVs.push_back(Visitor.WI); Index: llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -19,7 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -55,15 +55,17 @@ LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; - + SCEVExpander &Rewriter; SmallVectorImpl &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead) - : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { + LoopInfo *LI, SCEVExpander &Rewriter, + SmallVectorImpl &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead), + Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -77,7 +79,7 @@ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); - bool foldConstantSCEV(Instruction *UseInst); + bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool eliminateOverflowIntrinsic(CallInst *CI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); @@ -536,31 +538,34 @@ return false; } -/// Replace the UseInst with a constant if possible -bool SimplifyIndvar::foldConstantSCEV(Instruction *I) { +static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { + if (auto *BB = L->getLoopPreheader()) + return BB->getTerminator(); + + return Hint; +} + +/// Replace the UseInst with a constant if possible. +bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { if (!SE->isSCEVable(I->getType())) return false; // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); - const Loop *L = LI->getLoopFor(I->getParent()); - S = SE->getSCEVAtScope(S, L); - auto *C = dyn_cast(S); - - if (!C) + if (!SE->isLoopInvariant(S, L)) return false; - Constant *V = C->getValue(); - // The SCEV will have a different type than the instruction if the instruction - // has a pointer type. Skip the replacement - // TODO: Replace ConstantInt Zero by ConstantPointerNull - if (V->getType() != I->getType()) + // Do not generate something ridiculous even if S is loop invariant. + if (Rewriter.isHighCostExpansion(S, L, I)) return false; - I->replaceAllUsesWith(V); - DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I << " with constant: " << *C - << '\n'); + auto *IP = GetLoopInvariantInsertPosition(L, I); + auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); + + I->replaceAllUsesWith(Invariant); + DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I + << " with loop invariant: " << *S << '\n'); ++NumFoldedUser; Changed = true; DeadInsts.emplace_back(I); @@ -702,7 +707,7 @@ /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( - Instruction *Def, + Instruction *Def, Loop *L, SmallPtrSet &Simplified, SmallVectorImpl< std::pair > &SimpleIVUsers) { @@ -713,8 +718,19 @@ // Also ensure unique worklist users. // If Def is a LoopPhi, it may not be in the Simplified set, so check for // self edges first. - if (UI != Def && Simplified.insert(UI).second) - SimpleIVUsers.push_back(std::make_pair(UI, Def)); + if (UI == Def) + continue; + + // Only change the current Loop, do not change the other parts (e.g. other + // Loops). + if (!L->contains(UI)) + continue; + + // Do not push the same instruction more than once. + if (!Simplified.insert(UI).second) + continue; + + SimpleIVUsers.push_back(std::make_pair(UI, Def)); } } @@ -764,7 +780,7 @@ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be // called multiple times for the same LoopPhi. This is the proper thing to // do for loop header phis that use each other. - pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); while (!SimpleIVUsers.empty()) { std::pair UseOper = @@ -774,8 +790,9 @@ // Bypass back edges to avoid extra work. if (UseInst == CurrIV) continue; - // Try to replace UseInst with a constant before any other simplifications - if (foldConstantSCEV(UseInst)) + // Try to replace UseInst with a loop invariant before any other + // simplifications. + if (replaceIVUserWithLoopInvariant(UseInst)) continue; Instruction *IVOperand = UseOper.second; @@ -791,7 +808,7 @@ continue; if (eliminateIVUser(UseOper.first, IVOperand)) { - pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); continue; } @@ -801,7 +818,7 @@ (isa(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. - pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); } } @@ -811,7 +828,7 @@ continue; } if (isSimpleIVUser(UseOper.first, L, SE)) { - pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers); } } } @@ -824,8 +841,9 @@ /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl &Dead, - IVVisitor *V) { - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); + SCEVExpander &Rewriter, IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter, + Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } @@ -834,9 +852,13 @@ /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl &Dead) { + SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead); + Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead, Rewriter); } return Changed; } Index: llvm/trunk/test/Transforms/IndVarSimplify/constant-fold-1.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/constant-fold-1.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/constant-fold-1.ll @@ -1,39 +0,0 @@ -; RUN: opt < %s -indvars -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @test(i64* %arg) unnamed_addr align 2 { -bb: - switch i64 undef, label %bb1 [ - ] - -bb1: ; preds = %bb - br label %bb2 - -bb2: ; preds = %bb6, %bb1 - %tmp = phi i64* [%arg, %bb1 ], [ %tmp7, %bb6 ] - switch i2 undef, label %bb6 [ - i2 1, label %bb5 - i2 -2, label %bb3 - i2 -1, label %bb3 - ] - -bb3: ; preds = %bb2, %bb2 - %tmp4 = call fastcc i32* @wobble(i64* nonnull %tmp, i32* null) - %tmp5 = load i32, i32* %tmp4 , align 8 - br label %bb6 - -bb5: ; preds = %bb2 - unreachable - -bb6: ; preds = %bb3, %bb2 - %tmp7 = load i64*, i64** undef, align 8 - br label %bb2 -} - -declare i32* @wobble(i64*, i32* returned) - -; Should not fail when SCEV is fold to ConstantPointerNull -; CHECK-LABEL: void @test -; CHECK: load i32, i32* %{{[a-zA-Z$._0-9]+}} Index: llvm/trunk/test/Transforms/IndVarSimplify/replace-iv-with-loop-invariant.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/replace-iv-with-loop-invariant.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/replace-iv-with-loop-invariant.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -indvars -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@G = external global i32 + +define void @test0(i64* %arg) { +bb: + br label %bb2 + +bb2: + %tmp = phi i64* [%arg, %bb ], [ %tmp7, %bb2 ] + %tmp4 = call i32* @wobble(i64* nonnull %tmp, i32* null) + %tmp5 = load i32, i32* %tmp4, align 8 + %tmp7 = load i64*, i64** undef, align 8 + br label %bb2 +} + +; CHECK-LABEL: void @test0 +; CHECK: load i32, i32* null + +define void @test1(i64* %arg) { +bb: + br label %bb2 + +bb2: + %tmp = phi i64* [%arg, %bb ], [ %tmp7, %bb2 ] + %tmp4 = call i32* @wobble(i64* nonnull %tmp, i32* inttoptr (i64 4 to i32*)) + %tmp5 = load i32, i32* %tmp4 + %tmp7 = load i64*, i64** undef, align 8 + br label %bb2 +} + +; CHECK-LABEL: void @test1 +; CHECK: load i32, i32* inttoptr (i64 4 to i32*) + +define void @test2(i64* %arg) { +bb: + br label %bb2 + +bb2: + %tmp = phi i64* [%arg, %bb ], [ %tmp7, %bb2 ] + %tmp4 = call i32* @wobble(i64* nonnull %tmp, i32* @G) + %tmp5 = load i32, i32* %tmp4 + %tmp7 = load i64*, i64** undef, align 8 + br label %bb2 +} + +; CHECK-LABEL: void @test2 +; CHECK: load i32, i32* @G + + +define void @test3(i64* %arg, i32* %loop.invariant) { +bb: + br label %bb2 + +bb2: + %tmp = phi i64* [%arg, %bb ], [ %tmp7, %bb2 ] + %tmp4 = call i32* @wobble(i64* nonnull %tmp, i32* %loop.invariant) + %tmp5 = load i32, i32* %tmp4 + %tmp7 = load i64*, i64** undef, align 8 + br label %bb2 +} + +; CHECK-LABEL: void @test3 +; CHECK: load i32, i32* %loop.invariant + +define void @test4(i64* %arg, i32* %loop.invariant, i64 %N) { +bb: + br label %bb2 + +bb2: + %tmp = phi i64* [%arg, %bb ], [ %tmp7, %bb2 ] + %mul = mul nsw i64 %N, 64 + %ptr = getelementptr inbounds i32, i32* %loop.invariant, i64 %mul + %tmp4 = call i32* @wobble(i64* nonnull %tmp, i32* %ptr) + %tmp5 = load i32, i32* %tmp4 + %tmp7 = load i64*, i64** undef, align 8 + br label %bb2 +} + +; CHECK-LABEL: void @test4 +; CHECK: [[P:%[a-zA-Z$._0-9]+]] = getelementptr i32, i32* %loop.invariant +; CHECK: phi +; CHECK: load i32, i32* [[P]] + +declare i32* @wobble(i64*, i32* returned)