Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -100,8 +100,8 @@ FlagNSW = (1 << 2), // No signed wrap. NoWrapMask = (1 << 3) -1 }; - explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) : - FastID(ID), SCEVType(SCEVTy), SubclassData(0) {} + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) + : FastID(ID), SCEVType(SCEVTy), SubclassData(0) {} unsigned getSCEVType() const { return SCEVType; } @@ -245,6 +245,25 @@ /// counts and things. SCEVCouldNotCompute CouldNotCompute; + /// HasRecMapType - The typedef for HasRecMap. + /// + typedef DenseMap HasRecMapType; + + /// HasRecMap -- This is a cache to store the analysis result about whether + /// a SCEV contains any scAddRecExpr. + HasRecMapType HasRecMap; + + /// ExprValueMapType - The typedef for ExprValueMap. + /// + typedef DenseMap ExprValueMapType; + + /// ExprValueMap -- This map records the original value from which + /// the SCEV expr is generated from. To ensure the validity of the mapping + /// entries, the entry of a SCEV is removed whenever a SCEV is removed + /// from UniqueSCEVs. The mapping entry S->V is invalid when V->S is + /// not present in ValueExprMap (getSCEVValue will not use it). + ExprValueMapType ExprValueMap; + /// ValueExprMapType - The typedef for ValueExprMap. /// typedef DenseMap > @@ -625,6 +644,18 @@ /// this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; + /// hasAnyRec - Return true if the SCEV is a scAddRecExpr or it + /// contains scAddRecExpr. The result will be cached in HasRecMap. + /// + bool hasAnyRec(const SCEV *S); + + /// getSCEVValue - Return the Value from which the SCEV expr is generated. + Value *getSCEVValue(const SCEV *S); + + /// eraseExprValueMap - Erase the entry V->S and S->V from ValueExprMap + /// and ExprValueMap. + void eraseExprValueMap(Value *V); + /// getSCEV - Return a SCEV expression for the full generality of the /// specified expression. const SCEV *getSCEV(Value *V); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -3311,6 +3311,93 @@ return !F.FindOne; } +bool ScalarEvolution::hasAnyRec(const SCEV *S) { + HasRecMapType::iterator I = HasRecMap.find_as(S); + if (I != HasRecMap.end()) + return I->second == true; + + bool AnyRec = false; + switch (static_cast(S->getSCEVType())) { + case scConstant: { + AnyRec = false; + break; + } + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast(S); + AnyRec = hasAnyRec(Trunc->getOperand()); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *ZExt = cast(S); + AnyRec = hasAnyRec(ZExt->getOperand()); + break; + } + case scSignExtend: { + const SCEVSignExtendExpr *SExt = cast(S); + AnyRec = hasAnyRec(SExt->getOperand()); + break; + } + case scAddRecExpr: { + AnyRec = true; + break; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast(S); + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) + AnyRec = AnyRec || hasAnyRec(*I); + break; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast(S); + AnyRec = hasAnyRec(UDiv->getLHS()) || hasAnyRec(UDiv->getRHS()); + break; + } + case scUnknown: + case scCouldNotCompute: { + AnyRec = false; + break; + } + default: + llvm_unreachable("Unknown SCEV kind!"); + } + HasRecMap.insert(std::make_pair(S, AnyRec)); + return AnyRec; +} + +/// getSCEVValue - Return the Value V mapping from S, only when +/// the mapping V->S is in ValueExprMap. This is to ensure the +/// Value V in S->V mapping in ExprValueMap is valid because +/// ValueExprMap uses SCEVCallbackVH. +Value *ScalarEvolution::getSCEVValue(const SCEV *S) { + ExprValueMapType::iterator SI = ExprValueMap.find_as(S); + if (SI == ExprValueMap.end()) + return nullptr; + ValueExprMapType::iterator VI = ValueExprMap.find_as(SI->second); + if (VI == ValueExprMap.end()) + return nullptr; + if (VI->second != S) + return nullptr; + return SI->second; +} + +/// eraseExprValueMap - Erase V->S entry from ValueExprMap. ExprValueMap +/// may contain S->V'. If V' equals to V, remove S->V' from ExprValueMap. +void ScalarEvolution::eraseExprValueMap(Value *V) { + ValueExprMap.erase(V); + + ValueExprMapType::iterator I = ValueExprMap.find_as(V); + if (I == ValueExprMap.end()) + return; + Value *VS = getSCEVValue(I->second); + if (VS != V) + return; + ExprValueMap.erase(I->second); +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3319,6 +3406,7 @@ const SCEV *S = getExistingSCEV(V); if (S == nullptr) { S = createSCEV(V); + ExprValueMap.insert(std::make_pair(S, V)); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); } return S; @@ -3332,7 +3420,7 @@ const SCEV *S = I->second; if (checkValidity(S)) return S; - ValueExprMap.erase(I); + eraseExprValueMap(V); } return nullptr; } @@ -3583,7 +3671,7 @@ !isa(Old) || (I != PN && Old == SymName)) { forgetMemoizedResults(Old); - ValueExprMap.erase(It); + eraseExprValueMap(static_cast(I)); } } @@ -3622,6 +3710,7 @@ const SCEV *SymbolicName = getUnknown(PN); assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); + ExprValueMap.insert(std::make_pair(SymbolicName, PN)); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); // Using this symbolic name for the PHI, analyze the value coming around @@ -3701,6 +3790,7 @@ // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. ForgetSymbolicName(PN, SymbolicName); + ExprValueMap[PHISCEV] = PN; ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } @@ -3729,6 +3819,7 @@ // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. ForgetSymbolicName(PN, SymbolicName); + ExprValueMap[PHISCEV] = PN; ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } @@ -4737,7 +4828,7 @@ // own when it gets to that point. if (!isa(I) || !isa(Old)) { forgetMemoizedResults(Old); - ValueExprMap.erase(It); + eraseExprValueMap(static_cast(I)); } if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -4781,7 +4872,7 @@ ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); - ValueExprMap.erase(It); + eraseExprValueMap(static_cast(I)); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } @@ -4816,7 +4907,7 @@ ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); - ValueExprMap.erase(It); + eraseExprValueMap(static_cast(I)); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } @@ -8234,7 +8325,7 @@ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->ValueExprMap.erase(getValPtr()); + SE->eraseExprValueMap(getValPtr()); // this now dangles! } @@ -8257,13 +8348,13 @@ continue; if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->ValueExprMap.erase(U); + SE->eraseExprValueMap(U); Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); } // Delete the Old value. if (PHINode *PN = dyn_cast(Old)) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->ValueExprMap.erase(Old); + SE->eraseExprValueMap(Old); // this now dangles! } @@ -8296,7 +8387,9 @@ U->~SCEVUnknown(); FirstUnknown = nullptr; + ExprValueMap.clear(); ValueExprMap.clear(); + HasRecMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. @@ -8651,6 +8744,8 @@ BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + ExprValueMap.erase(S); + HasRecMap.erase(S); for (DenseMap::iterator I = BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1642,7 +1642,11 @@ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. - Value *V = visit(S); + Value *V = SE.getSCEVValue(S); + if (V == nullptr || !isa(V) || SE.hasAnyRec(S) || + S->getType() != V->getType() || + !SE.DT->dominates(cast(V), InsertPt)) + V = visit(S); // Remember the expanded value for this SCEV at this location. // Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2595,6 +2595,11 @@ // adding one to the backedge-taken count will not overflow. BasicBlock *NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked"); + // Update dominate tree immediately if the generated block is a + // LoopBypassBlock + // because SCEV expansions to generate loop bypass checks may query it before + // the current func is finished. + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); ReplaceInstWithInst( @@ -2635,6 +2640,7 @@ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2655,6 +2661,7 @@ VectorPH->setName("vector.stridecheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2680,6 +2687,7 @@ VectorPH->setName("vector.memcheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -3697,10 +3705,6 @@ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); - // Due to if predication of stores we might create a sequence of "if(pred) // a[i] = ...; " blocks. for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) { Index: test/Analysis/ScalarEvolution/scev-expander-existing-value.ll =================================================================== --- test/Analysis/ScalarEvolution/scev-expander-existing-value.ll +++ test/Analysis/ScalarEvolution/scev-expander-existing-value.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S |FileCheck %s + +; SCEV expansion uses existing value when the SCEV has no AddRec expr. +; CHECK: select +; CHECK-NOT: select + +@a = common global [1000 x i16] zeroinitializer, align 16 + +define i32 @foo(i32 %x, i32 %y) { +entry: + %cmp = icmp slt i32 %x, %y + %cond = select i1 %cmp, i32 %x, i32 %y + %cmp1.10 = icmp sgt i32 %cond, 0 + br i1 %cmp1.10, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp = sext i32 %cond to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %total.011 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds [1000 x i16], [1000 x i16]* @a, i64 0, i64 %indvars.iv + %tmp1 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %tmp1 to i32 + %add = add nsw i32 %conv, %total.011 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp1 = icmp slt i64 %indvars.iv.next, %tmp + br i1 %cmp1, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %total.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] + ret i32 %total.0.lcssa +} Index: test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll =================================================================== --- test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll +++ test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll @@ -8,7 +8,6 @@ ; -- The loop following the load should only use a single add-literation ; instruction. ; CHECK: vldr -; CHECK: adds r{{[0-9]+.*}}#1 ; CHECK-NOT: adds ; CHECK: subsections_via_symbols Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -28,7 +28,6 @@ ret void ; CHECK: loop.preheader: -; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1 ; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] Index: test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -11,7 +11,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(2)* @@ -43,7 +43,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(3)* Index: test/Transforms/IndVarSimplify/ult-sub-to-eq.ll =================================================================== --- test/Transforms/IndVarSimplify/ult-sub-to-eq.ll +++ test/Transforms/IndVarSimplify/ult-sub-to-eq.ll @@ -32,15 +32,9 @@ ; CHECK-LABEL: @test1( -; First check that we move the sub into the preheader, it doesn't have to be -; executed if %cmp4 == false -; CHECK: for.body.preheader: -; CHECK: sub i32 %data_len, %sample -; CHECK: br label %for.body - -; Second, check that we turn the IV test into an eq. +; check that we turn the IV test into an eq. ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0 +; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit } Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -4,8 +4,9 @@ ; LSR should properly handle the post-inc offset when folding the ; non-IV operand of an icmp into the IV. -; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast -; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1 +; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast +; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1 +; CHECK: for.body.lr.ph: ; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 ; CHECK: br label %for.body ; CHECK: for.body: