Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -23,6 +23,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -382,6 +383,22 @@ /// This SCEV is used to represent unknown trip counts and things. std::unique_ptr CouldNotCompute; + /// HasRecMapType - The typedef for HasRecMap. + /// + typedef DenseMap HasRecMapType; + + /// HasRecMap -- This is a cache to record whether a SCEV contains + /// any scAddRecExpr. + HasRecMapType HasRecMap; + + /// ExprValueMapType - The typedef for ExprValueMap. + /// + typedef DenseMap> ExprValueMapType; + + /// ExprValueMap -- This map records the original values from which + /// the SCEV expr is generated from. + ExprValueMapType ExprValueMap; + /// The typedef for ValueExprMap. /// typedef DenseMap > @@ -821,6 +838,18 @@ /// return true. For pointer types, this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; + /// containsAddRecurrence - Return true if the SCEV is a scAddRecExpr or + /// it contains scAddRecExpr. The result will be cached in HasRecMap. + /// + bool containsAddRecurrence(const SCEV *S); + + /// getSCEVValues - Return the Value set from which the SCEV expr is + /// generated. + SetVector *getSCEVValues(const SCEV *S); + + /// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap. + void eraseValueFromMap(Value *V); + /// Return a SCEV expression for the full generality of the specified /// expression. const SCEV *getSCEV(Value *V); Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -115,6 +115,10 @@ static cl::opt VerifySCEV("verify-scev", cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); +static cl::opt + VerifySCEVMap("verify-scev-maps", + cl::desc("Verify no dangling value in ScalarEvolution's" + "ExprValueMap (slow)")); //===----------------------------------------------------------------------===// // SCEV class definitions @@ -3310,6 +3314,73 @@ return !F.FindOne; } +namespace { +// Helper class working with SCEVTraversal to figure out if a SCEV contains +// a sub SCEV of scAddRecExpr type. FindInvalidSCEVUnknown::FoundOne is set +// iff if such sub scAddRecExpr type SCEV is found. +struct FindAddRecurrence { + bool FoundOne; + FindAddRecurrence() : FoundOne(false) {} + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scAddRecExpr: + FoundOne = true; + case scConstant: + case scUnknown: + case scCouldNotCompute: + return false; + default: + return true; + } + } + bool isDone() const { return FoundOne; } +}; +} + +bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { + HasRecMapType::iterator I = HasRecMap.find_as(S); + if (I != HasRecMap.end()) + return I->second; + + FindAddRecurrence F; + SCEVTraversal ST(F); + ST.visitAll(S); + HasRecMap.insert(std::make_pair(S, F.FoundOne)); + return F.FoundOne; +} + +/// getSCEVValues - Return the Value set from S. +SetVector *ScalarEvolution::getSCEVValues(const SCEV *S) { + ExprValueMapType::iterator SI = ExprValueMap.find_as(S); + if (SI == ExprValueMap.end()) + return nullptr; +#ifndef NDEBUG + if (VerifySCEVMap) { + // Check there is no dangling Value in the set returned. + for (const auto &VE : SI->second) + assert(ValueExprMap.count(VE)); + } +#endif + return &SI->second; +} + +/// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap. +/// If ValueExprMap.erase(V) is not used together with forgetMemoizedResults(S), +/// eraseValueFromMap should be used instead to ensure whenever V->S is removed +/// from ValueExprMap, V is also removed from the set of ExprValueMap[S]. +void ScalarEvolution::eraseValueFromMap(Value *V) { + ValueExprMapType::iterator I = ValueExprMap.find_as(V); + if (I != ValueExprMap.end()) { + const SCEV *S = I->second; + SetVector *SV = getSCEVValues(S); + // Remove V from the set of ExprValueMap[S] + if (SV) + SV->remove(V); + ValueExprMap.erase(V); + } +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3318,7 +3389,13 @@ const SCEV *S = getExistingSCEV(V); if (S == nullptr) { S = createSCEV(V); - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + // During PHI resolution, it is possible to create two SCEVs for the same + // V, so it is needed to double check whether V->S is inserted into + // ValueExprMap before insert S->V into ExprValueMap. + std::pair Pair = + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + if (Pair.second) + ExprValueMap[S].insert(V); } return S; } @@ -3331,6 +3408,7 @@ const SCEV *S = I->second; if (checkValidity(S)) return S; + forgetMemoizedResults(S); ValueExprMap.erase(I); } return nullptr; @@ -8967,7 +9045,7 @@ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->ValueExprMap.erase(getValPtr()); + SE->eraseValueFromMap(getValPtr()); // this now dangles! } @@ -8990,13 +9068,13 @@ continue; if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->ValueExprMap.erase(U); + SE->eraseValueFromMap(U); Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); } // Delete the Old value. if (PHINode *PN = dyn_cast(Old)) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->ValueExprMap.erase(Old); + SE->eraseValueFromMap(Old); // this now dangles! } @@ -9046,7 +9124,9 @@ } FirstUnknown = nullptr; + ExprValueMap.clear(); ValueExprMap.clear(); + HasRecMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. @@ -9375,6 +9455,8 @@ BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + ExprValueMap.erase(S); + HasRecMap.erase(S); for (DenseMap::iterator I = BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1600,6 +1600,12 @@ return V; } +// The expansion of SCEV will either reuse a previous Value in ExprValueMap, +// or expand the SCEV literally. Specifically, if the expansion is in LSRMode, +// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded +// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, +// the expansion will try to reuse Value from ExprValueMap, and only when it +// fails, expand the SCEV literally. Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. @@ -1639,7 +1645,25 @@ Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - Value *V = visit(S); + SetVector *Set = SE.getSCEVValues(S); + Value *V = nullptr; + // If the expansion is in LSRMode, and the SCEV contains any sub scAddRecExpr + // type SCEV, it will be expanded literally, to prevent LSR's transformed SCEV + // from being reverted. + if (!(LSRMode && SE.containsAddRecurrence(S))) { + if (Set) { + // Choose a Value from the set which dominates the insertPt. + for (auto const &Ent : *Set) { + if (Ent && isa(Ent) && S->getType() == Ent->getType() && + SE.DT.dominates(cast(Ent), InsertPt)) { + V = Ent; + break; + } + } + } + } + if (!V) + V = visit(S); // Remember the expanded value for this SCEV at this location. // Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2718,6 +2718,10 @@ BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked"); + // Update dominator tree immediately if the generated block is a + // LoopBypassBlock because SCEV expansions to generate loop bypass + // checks may query it before the current function is finished. + DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), @@ -2740,6 +2744,10 @@ // adding one to the backedge-taken count will not overflow. BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); + // Update dominator tree immediately if the generated block is a + // LoopBypassBlock because SCEV expansions to generate loop bypass + // checks may query it before the current function is finished. + DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), @@ -2765,6 +2773,10 @@ // Create a new block containing the stride check. BB->setName("vector.scevcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); + // Update dominator tree immediately if the generated block is a + // LoopBypassBlock because SCEV expansions to generate loop bypass + // checks may query it before the current function is finished. + DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), @@ -2790,6 +2802,10 @@ // Create a new block containing the memory check. BB->setName("vector.memcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); + // Update dominator tree immediately if the generated block is a + // LoopBypassBlock because SCEV expansions to generate loop bypass + // checks may query it before the current function is finished. + DT->addNewBlock(NewBB, BB); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), @@ -3957,10 +3973,6 @@ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); - // We don't predicate stores by this point, so the vector body should be a // single loop. assert(LoopVectorBody.size() == 1 && "Expected single block loop!"); Index: llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -verify-scev-maps -S |FileCheck %s + +; SCEV expansion uses existing value when the SCEV has no AddRec expr. +; CHECK: select +; CHECK-NOT: select + +@a = common global [1000 x i16] zeroinitializer, align 16 + +define i32 @foo(i32 %x, i32 %y) { +entry: + %cmp = icmp slt i32 %x, %y + %cond = select i1 %cmp, i32 %x, i32 %y + %cmp1.10 = icmp sgt i32 %cond, 0 + br i1 %cmp1.10, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp = sext i32 %cond to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %total.011 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds [1000 x i16], [1000 x i16]* @a, i64 0, i64 %indvars.iv + %tmp1 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %tmp1 to i32 + %add = add nsw i32 %conv, %total.011 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp1 = icmp slt i64 %indvars.iv.next, %tmp + br i1 %cmp1, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %total.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] + ret i32 %total.0.lcssa +} Index: llvm/trunk/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll +++ llvm/trunk/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll @@ -8,7 +8,6 @@ ; -- The loop following the load should only use a single add-literation ; instruction. ; CHECK: vldr -; CHECK: adds r{{[0-9]+.*}}#1 ; CHECK-NOT: adds ; CHECK: subsections_via_symbols Index: llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll +++ llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll @@ -28,7 +28,6 @@ ret void ; CHECK: loop.preheader: -; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1 ; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] Index: llvm/trunk/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -11,7 +11,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(2)* @@ -43,7 +43,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(3)* Index: llvm/trunk/test/Transforms/IndVarSimplify/pr24783.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/pr24783.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/pr24783.ll @@ -6,9 +6,6 @@ define void @f(i32* %end.s, i8** %loc, i32 %p) { ; CHECK-LABEL: @f( entry: -; CHECK: [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64 -; CHECK: [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]] - %end = getelementptr inbounds i32, i32* %end.s, i32 %p %init = bitcast i32* %end.s to i8* br label %while.body.i @@ -22,7 +19,7 @@ loop.exit: ; CHECK: loop.exit: -; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8* +; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %end to i8* ; CHECK: store i8* [[END_BCASTED]], i8** %loc %ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ] store i8* %ptr.inc.lcssa, i8** %loc Index: llvm/trunk/test/Transforms/IndVarSimplify/udiv.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/udiv.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/udiv.ll @@ -127,12 +127,12 @@ declare i32 @printf(i8* nocapture, ...) nounwind -; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in -; the original code. +; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will +; find out there's already a udiv in the original code. ; CHECK-LABEL: @foo( ; CHECK: for.body.preheader: -; CHECK-NEXT: udiv +; CHECK-NOT: udiv define void @foo(double* %p, i64 %n) nounwind { entry: Index: llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll @@ -32,15 +32,9 @@ ; CHECK-LABEL: @test1( -; First check that we move the sub into the preheader, it doesn't have to be -; executed if %cmp4 == false -; CHECK: for.body.preheader: -; CHECK: sub i32 %data_len, %sample -; CHECK: br label %for.body - -; Second, check that we turn the IV test into an eq. +; check that we turn the IV test into an eq. ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0 +; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit } Index: llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -4,8 +4,9 @@ ; LSR should properly handle the post-inc offset when folding the ; non-IV operand of an icmp into the IV. -; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast -; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1 +; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast +; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1 +; CHECK: for.body.lr.ph: ; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 ; CHECK: br label %for.body ; CHECK: for.body: