Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -245,6 +245,22 @@ /// counts and things. SCEVCouldNotCompute CouldNotCompute; + /// HasRecMapType - The typedef for HasRecMap. + /// + typedef DenseMap HasRecMapType; + + /// HasRecMap -- This is a cache to store the analysis result about whether + /// a SCEV contains any scAddRecExpr. + HasRecMapType HasRecMap; + + /// ExprValueMapType - The typedef for ExprValueMap. + /// + typedef DenseMap> ExprValueMapType; + + /// ExprValueMap -- This map records the original value from which + /// the SCEV expr is generated from. + ExprValueMapType ExprValueMap; + /// ValueExprMapType - The typedef for ValueExprMap. /// typedef DenseMap > @@ -625,6 +641,15 @@ /// this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; + /// hasAnyRec - Return true if the SCEV is a scAddRecExpr or it + /// contains scAddRecExpr. The result will be cached in HasRecMap. + /// + bool hasAnyRec(const SCEV *S); + + /// getSCEVValue - Return the WeakVH vector from which the SCEV expr is + /// generated. + std::vector *getSCEVValue(const SCEV *S); + /// getSCEV - Return a SCEV expression for the full generality of the /// specified expression. const SCEV *getSCEV(Value *V); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -3311,6 +3311,69 @@ return !F.FindOne; } +bool ScalarEvolution::hasAnyRec(const SCEV *S) { + HasRecMapType::iterator I = HasRecMap.find_as(S); + if (I != HasRecMap.end()) + return I->second == true; + + bool AnyRec = false; + switch (static_cast(S->getSCEVType())) { + case scConstant: { + AnyRec = false; + break; + } + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast(S); + AnyRec = hasAnyRec(Trunc->getOperand()); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *ZExt = cast(S); + AnyRec = hasAnyRec(ZExt->getOperand()); + break; + } + case scSignExtend: { + const SCEVSignExtendExpr *SExt = cast(S); + AnyRec = hasAnyRec(SExt->getOperand()); + break; + } + case scAddRecExpr: { + AnyRec = true; + break; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast(S); + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) + AnyRec = AnyRec || hasAnyRec(*I); + break; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast(S); + AnyRec = hasAnyRec(UDiv->getLHS()) || hasAnyRec(UDiv->getRHS()); + break; + } + case scUnknown: + case scCouldNotCompute: { + AnyRec = false; + break; + } + default: + llvm_unreachable("Unknown SCEV kind!"); + } + HasRecMap.insert(std::make_pair(S, AnyRec)); + return AnyRec; +} + +/// getSCEVValue - Return the Value vector from S. +std::vector *ScalarEvolution::getSCEVValue(const SCEV *S) { + ExprValueMapType::iterator SI = ExprValueMap.find_as(S); + return (SI == ExprValueMap.end()) ? nullptr : &SI->second; +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3321,6 +3384,16 @@ S = createSCEV(V); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); } + + ExprValueMapType::iterator SI = ExprValueMap.find_as(S); + if (SI == ExprValueMap.end()) { + std::vector Vec; + Vec.push_back(WeakVH(V)); + ExprValueMap.insert(std::make_pair(S, Vec)); + } else { + std::vector &Vec = SI->second; + Vec.push_back(WeakVH(V)); + } return S; } @@ -8296,7 +8369,9 @@ U->~SCEVUnknown(); FirstUnknown = nullptr; + ExprValueMap.clear(); ValueExprMap.clear(); + HasRecMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. @@ -8651,6 +8726,8 @@ BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + ExprValueMap.erase(S); + HasRecMap.erase(S); for (DenseMap::iterator I = BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1642,7 +1642,20 @@ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. - Value *V = visit(S); + std::vector *Vec = SE.getSCEVValue(S); + Value *V = nullptr; + if (Vec && !SE.hasAnyRec(S)) { + // Choose a Value from the vector which dominates the insertPt. + for (auto const &ent : *Vec) { + if (ent && isa(ent) && S->getType() == ent->getType() && + SE.DT->dominates(cast(ent), InsertPt)) { + V = ent; + break; + } + } + } + if (!V) + V = visit(S); // Remember the expanded value for this SCEV at this location. // Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2595,6 +2595,11 @@ // adding one to the backedge-taken count will not overflow. BasicBlock *NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked"); + // Update dominate tree immediately if the generated block is a + // LoopBypassBlock + // because SCEV expansions to generate loop bypass checks may query it before + // the current func is finished. + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); ReplaceInstWithInst( @@ -2635,6 +2640,7 @@ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2655,6 +2661,7 @@ VectorPH->setName("vector.stridecheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2680,6 +2687,7 @@ VectorPH->setName("vector.memcheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -3697,10 +3705,6 @@ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); - // Due to if predication of stores we might create a sequence of "if(pred) // a[i] = ...; " blocks. for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) { Index: test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll =================================================================== --- test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll +++ test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll @@ -8,7 +8,6 @@ ; -- The loop following the load should only use a single add-literation ; instruction. ; CHECK: vldr -; CHECK: adds r{{[0-9]+.*}}#1 ; CHECK-NOT: adds ; CHECK: subsections_via_symbols Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -28,7 +28,6 @@ ret void ; CHECK: loop.preheader: -; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1 ; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] Index: test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -11,7 +11,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(2)* @@ -43,7 +43,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(3)* Index: test/Transforms/IndVarSimplify/udiv.ll =================================================================== --- test/Transforms/IndVarSimplify/udiv.ll +++ test/Transforms/IndVarSimplify/udiv.ll @@ -127,12 +127,12 @@ declare i32 @printf(i8* nocapture, ...) nounwind -; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in -; the original code. +; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will +; find out there's already a udiv in the original code. ; CHECK-LABEL: @foo( ; CHECK: for.body.preheader: -; CHECK-NEXT: udiv +; CHECK-NOT: udiv define void @foo(double* %p, i64 %n) nounwind { entry: Index: test/Transforms/IndVarSimplify/ult-sub-to-eq.ll =================================================================== --- test/Transforms/IndVarSimplify/ult-sub-to-eq.ll +++ test/Transforms/IndVarSimplify/ult-sub-to-eq.ll @@ -32,15 +32,9 @@ ; CHECK-LABEL: @test1( -; First check that we move the sub into the preheader, it doesn't have to be -; executed if %cmp4 == false -; CHECK: for.body.preheader: -; CHECK: sub i32 %data_len, %sample -; CHECK: br label %for.body - -; Second, check that we turn the IV test into an eq. +; check that we turn the IV test into an eq. ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0 +; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit } Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -4,8 +4,9 @@ ; LSR should properly handle the post-inc offset when folding the ; non-IV operand of an icmp into the IV. -; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast -; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1 +; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast +; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1 +; CHECK: for.body.lr.ph: ; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 ; CHECK: br label %for.body ; CHECK: for.body: