Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -32,6 +32,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include +#include namespace llvm { class APInt; @@ -245,6 +246,22 @@ /// counts and things. SCEVCouldNotCompute CouldNotCompute; + /// HasRecMapType - The typedef for HasRecMap. + /// + typedef DenseMap HasRecMapType; + + /// HasRecMap -- This is a cache to store the analysis result about whether + /// a SCEV contains any scAddRecExpr. + HasRecMapType HasRecMap; + + /// ExprValueMapType - The typedef for ExprValueMap. + /// + typedef DenseMap> ExprValueMapType; + + /// ExprValueMap -- This map records the original value from which + /// the SCEV expr is generated from. + ExprValueMapType ExprValueMap; + /// ValueExprMapType - The typedef for ValueExprMap. /// typedef DenseMap > @@ -625,6 +642,15 @@ /// this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; + /// containsAddRecurrence - Return true if the SCEV is a scAddRecExpr or + /// it contains scAddRecExpr. The result will be cached in HasRecMap. + /// + bool containsAddRecurrence(const SCEV *S); + + /// getSCEVValue - Return the WeakVH set from which the SCEV expr is + /// generated. + std::set *getSCEVValue(const SCEV *S); + /// getSCEV - Return a SCEV expression for the full generality of the /// specified expression. const SCEV *getSCEV(Value *V); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -3311,6 +3311,48 @@ return !F.FindOne; } +namespace { +// Helper class working with SCEVTraversal to figure out if a SCEV contains +// a sub SCEV of scAddRecExpr type. FindInvalidSCEVUnknown::FindOne is set +// iff if such sub scAddRecExpr type SCEV is found. +struct FindAddRecurrence { + bool FindOne; + FindAddRecurrence() : FindOne(false) {} + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scAddRecExpr: + FindOne = true; + case scConstant: + case scUnknown: + case scCouldNotCompute: + return false; + default: + return true; + } + } + bool isDone() const { return FindOne; } +}; +} + +bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { + HasRecMapType::iterator I = HasRecMap.find_as(S); + if (I != HasRecMap.end()) + return I->second; + + FindAddRecurrence F; + SCEVTraversal ST(F); + ST.visitAll(S); + HasRecMap.insert(std::make_pair(S, F.FindOne)); + return F.FindOne; +} + +/// getSCEVValue - Return the Value set from S. +std::set *ScalarEvolution::getSCEVValue(const SCEV *S) { + ExprValueMapType::iterator SI = ExprValueMap.find_as(S); + return (SI == ExprValueMap.end()) ? nullptr : &SI->second; +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3321,6 +3363,8 @@ S = createSCEV(V); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); } + + ExprValueMap[S].insert(WeakVH(V)); return S; } @@ -8296,7 +8340,9 @@ U->~SCEVUnknown(); FirstUnknown = nullptr; + ExprValueMap.clear(); ValueExprMap.clear(); + HasRecMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. @@ -8651,6 +8697,8 @@ BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + ExprValueMap.erase(S); + HasRecMap.erase(S); for (DenseMap::iterator I = BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1602,6 +1602,12 @@ return V; } +// The expansion of SCEV will either reuse a previous Value in ExprValueMap, +// or expand the SCEV literally. Specifically, if the expansion is in LSRMode, +// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded +// literally, to prevent LSR transformed SCEV from being reverted. Otherwise, +// the expansion will try to reuse Value from ExprValueMap, and only when it +// fails, expand the SCEV literally. Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. @@ -1642,7 +1648,25 @@ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. - Value *V = visit(S); + std::set *Set = SE.getSCEVValue(S); + Value *V = nullptr; + // If the expansion is in LSRMode, and the SCEV contains any sub scAddRecExpr + // type SCEV, it will be expanded literally, to prevent LSR transformed SCEV + // from being reverted. + if (!(LSRMode && SE.containsAddRecurrence(S))) { + if (Set) { + // Choose a Value from the set which dominates the insertPt. + for (auto const &Ent : *Set) { + if (Ent && isa(Ent) && S->getType() == Ent->getType() && + SE.DT->dominates(cast(Ent), InsertPt)) { + V = Ent; + break; + } + } + } + } + if (!V) + V = visit(S); // Remember the expanded value for this SCEV at this location. // Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2595,6 +2595,10 @@ // adding one to the backedge-taken count will not overflow. BasicBlock *NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked"); + // Update dominate tree immediately if the generated block is a + // LoopBypassBlock because SCEV expansions to generate loop bypass + // checks may query it before the current func is finished. + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); ReplaceInstWithInst( @@ -2635,6 +2639,7 @@ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2655,6 +2660,7 @@ VectorPH->setName("vector.stridecheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2680,6 +2686,7 @@ VectorPH->setName("vector.memcheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -3697,10 +3704,6 @@ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); - // Due to if predication of stores we might create a sequence of "if(pred) // a[i] = ...; " blocks. for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) { Index: test/Analysis/ScalarEvolution/scev-expander-existing-value.ll =================================================================== --- test/Analysis/ScalarEvolution/scev-expander-existing-value.ll +++ test/Analysis/ScalarEvolution/scev-expander-existing-value.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S |FileCheck %s + +; SCEV expansion uses existing value when the SCEV has no AddRec expr. +; CHECK: select +; CHECK-NOT: select + +@a = common global [1000 x i16] zeroinitializer, align 16 + +define i32 @foo(i32 %x, i32 %y) { +entry: + %cmp = icmp slt i32 %x, %y + %cond = select i1 %cmp, i32 %x, i32 %y + %cmp1.10 = icmp sgt i32 %cond, 0 + br i1 %cmp1.10, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp = sext i32 %cond to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %total.011 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds [1000 x i16], [1000 x i16]* @a, i64 0, i64 %indvars.iv + %tmp1 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %tmp1 to i32 + %add = add nsw i32 %conv, %total.011 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp1 = icmp slt i64 %indvars.iv.next, %tmp + br i1 %cmp1, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %total.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] + ret i32 %total.0.lcssa +} Index: test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll =================================================================== --- test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll +++ test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll @@ -8,7 +8,6 @@ ; -- The loop following the load should only use a single add-literation ; instruction. ; CHECK: vldr -; CHECK: adds r{{[0-9]+.*}}#1 ; CHECK-NOT: adds ; CHECK: subsections_via_symbols Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -28,7 +28,6 @@ ret void ; CHECK: loop.preheader: -; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1 ; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] Index: test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -11,7 +11,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(2)* @@ -43,7 +43,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(3)* Index: test/Transforms/IndVarSimplify/udiv.ll =================================================================== --- test/Transforms/IndVarSimplify/udiv.ll +++ test/Transforms/IndVarSimplify/udiv.ll @@ -127,12 +127,12 @@ declare i32 @printf(i8* nocapture, ...) nounwind -; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in -; the original code. +; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will +; find out there's already a udiv in the original code. ; CHECK-LABEL: @foo( ; CHECK: for.body.preheader: -; CHECK-NEXT: udiv +; CHECK-NOT: udiv define void @foo(double* %p, i64 %n) nounwind { entry: Index: test/Transforms/IndVarSimplify/ult-sub-to-eq.ll =================================================================== --- test/Transforms/IndVarSimplify/ult-sub-to-eq.ll +++ test/Transforms/IndVarSimplify/ult-sub-to-eq.ll @@ -32,15 +32,9 @@ ; CHECK-LABEL: @test1( -; First check that we move the sub into the preheader, it doesn't have to be -; executed if %cmp4 == false -; CHECK: for.body.preheader: -; CHECK: sub i32 %data_len, %sample -; CHECK: br label %for.body - -; Second, check that we turn the IV test into an eq. +; check that we turn the IV test into an eq. ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0 +; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit } Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -4,12 +4,14 @@ ; LSR should properly handle the post-inc offset when folding the ; non-IV operand of an icmp into the IV. -; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast -; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1 -; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 +; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast +; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1 +; CHECK: [[r3:%[a-z0-9\.]+]] = bitcast i64 [[r2]] to i64 +; CHECK: for.body.lr.ph: +; CHECK: [[r4:%[a-z0-9]+]] = shl i64 [[r3]], 1 ; CHECK: br label %for.body ; CHECK: for.body: -; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ] +; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r4]], %for.body.lr.ph ] ; CHECK: %lsr.iv.next = add i64 %lsr.iv2, -2 ; CHECK: %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16* ; CHECK: %cmp27 = icmp eq i16* %lsr.iv.next3, null