Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -66,6 +66,14 @@ // The SCEV baseclass this node corresponds to const unsigned short SCEVType; + // The Value this node is created for + Value *Val; + + enum SCEVHasAnyRecur { scevUnInit, scevHasRec, scevHasNoRec }; + + /// Indicate the SCEV is a scAddRecExpr or has scAddRecExpr type sub SCEV. + enum SCEVHasAnyRecur HasRecur; + protected: /// SubclassData - This field is initialized to zero and may be used in /// subclasses to store miscellaneous information. @@ -100,8 +108,9 @@ FlagNSW = (1 << 2), // No signed wrap. NoWrapMask = (1 << 3) -1 }; - explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) : - FastID(ID), SCEVType(SCEVTy), SubclassData(0) {} + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) + : FastID(ID), SCEVType(SCEVTy), Val(nullptr), HasRecur(scevUnInit), + SubclassData(0) {} unsigned getSCEVType() const { return SCEVType; } @@ -109,6 +118,19 @@ /// Type *getType() const; + /// getValue - Return the Value this SCEV expression is created for. + /// + Value *getValue() const { return Val; } + + /// setValue - Set Value. + /// + void setValue(Value *V) { Val = V; } + + /// hasAnyRec - Return true if the SCEV is a scAddRecExpr or it + /// contains scAddRecExpr. Cache the result in HasRecur. + /// + bool hasAnyRec(); + /// isZero - Return true if the expression is a constant zero. /// bool isZero() const; Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -292,6 +292,64 @@ return false; } +bool SCEV::hasAnyRec() { + if (HasRecur != scevUnInit) + return HasRecur == scevHasRec; + + bool RecAppr = false; + switch (static_cast(getSCEVType())) { + case scConstant: + RecAppr = false; + break; + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast(this); + SCEV *Op = const_cast(Trunc->getOperand()); + RecAppr = Op->hasAnyRec(); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *ZExt = cast(this); + SCEV *Op = const_cast(ZExt->getOperand()); + RecAppr = Op->hasAnyRec(); + break; + } + case scSignExtend: { + const SCEVSignExtendExpr *SExt = cast(this); + SCEV *Op = const_cast(SExt->getOperand()); + RecAppr = Op->hasAnyRec(); + break; + } + case scAddRecExpr: { + RecAppr = true; + break; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast(this); + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) + RecAppr = RecAppr || (const_cast(*I))->hasAnyRec(); + break; + } + case scUDivExpr: { + SCEVUDivExpr *UDiv = cast(this); + RecAppr = const_cast(UDiv->getLHS())->hasAnyRec() || + const_cast(UDiv->getRHS())->hasAnyRec(); + break; + } + case scUnknown: + case scCouldNotCompute: + RecAppr = false; + break; + default: + llvm_unreachable("Unknown SCEV kind!"); + } + HasRecur = RecAppr ? scevHasRec : scevHasNoRec; + return RecAppr; +} + /// isNonConstantNegative - Return true if the specified scev is negated, but /// not a constant. bool SCEV::isNonConstantNegative() const { @@ -3316,9 +3374,10 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - const SCEV *S = getExistingSCEV(V); + SCEV *S = const_cast(getExistingSCEV(V)); if (S == nullptr) { - S = createSCEV(V); + S = const_cast(createSCEV(V)); + S->setValue(V); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); } return S; Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1538,9 +1538,9 @@ } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { - Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + Value *LHS = expand(S->getOperand(0)); Type *Ty = LHS->getType(); - for (int i = S->getNumOperands()-2; i >= 0; --i) { + for (unsigned i = 1; i < S->getNumOperands(); i++) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. if (S->getOperand(i)->getType() != Ty) { @@ -1642,7 +1642,11 @@ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. - Value *V = visit(S); + Value *V = S->getValue(); + if (V == nullptr || !isa(V) || + (const_cast(S))->hasAnyRec() || S->getType() != V->getType() || + !SE.DT->dominates(cast(V), InsertPt)) + V = visit(S); // Remember the expanded value for this SCEV at this location. // Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2595,6 +2595,11 @@ // adding one to the backedge-taken count will not overflow. BasicBlock *NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked"); + // Update dominate tree immediately if the generated block is a + // LoopBypassBlock + // because SCEV expansions to generate loop bypass checks may query it before + // the current func is finished. + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); ReplaceInstWithInst( @@ -2635,6 +2640,7 @@ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2655,6 +2661,7 @@ VectorPH->setName("vector.stridecheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -2680,6 +2687,7 @@ VectorPH->setName("vector.memcheck"); NewVectorPH = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + DT->addNewBlock(NewVectorPH, VectorPH); if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); LoopBypassBlocks.push_back(VectorPH); @@ -3697,10 +3705,6 @@ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); - // Due to if predication of stores we might create a sequence of "if(pred) // a[i] = ...; " blocks. for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) { Index: test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll =================================================================== --- test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll +++ test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll @@ -8,7 +8,6 @@ ; -- The loop following the load should only use a single add-literation ; instruction. ; CHECK: vldr -; CHECK: adds r{{[0-9]+.*}}#1 ; CHECK-NOT: adds ; CHECK: subsections_via_symbols Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -28,14 +28,13 @@ ret void ; CHECK: loop.preheader: -; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1 -; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] -; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] +; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len +; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_len]] +; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_n]], i32 [[not_len]] ; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] -; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 -; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 +; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 0, [[len_hiclamp]] +; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 0, i32 [[len_hiclamp]] ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 } Index: test/Transforms/IRCE/multiple-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/multiple-access-no-preloop.ll +++ test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -37,16 +37,16 @@ ; CHECK-LABEL: multiple_access_no_preloop ; CHECK-LABEL: loop.preheader: -; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b -; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a -; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] -; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] -; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] +; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a +; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_len_a]] +; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_n]], i32 [[not_len_a]] +; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b +; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_len_b]] +; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_len_b]] ; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] -; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 -; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 +; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 0, [[upper_limit_loclamp]] +; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 0, i32 [[upper_limit_loclamp]] ; CHECK-LABEL: loop: ; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds Index: test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-no-preloop.ll +++ test/Transforms/IRCE/single-access-no-preloop.ll @@ -84,13 +84,13 @@ ; CHECK-LABEL: single_access_no_preloop_with_offset ; CHECK-LABEL: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] -; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 -; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 +; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 0, [[exit_main_loop_at_hiclamp]] +; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 0, i32 [[exit_main_loop_at_hiclamp]] ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] ; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit Index: test/Transforms/IRCE/single-access-with-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-with-preloop.ll +++ test/Transforms/IRCE/single-access-with-preloop.ll @@ -29,22 +29,21 @@ } ; CHECK-LABEL: loop.preheader: -; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] -; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] +; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1 +; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_start]] +; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_n]], i32 [[not_safe_start]] ; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] -; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 -; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 +; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 0, [[exit_preloop_at_loclamp]] +; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 0, i32 [[exit_preloop_at_loclamp]] -; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 %offset, -1 -; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len -; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]] -; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_end]], i32 [[not_n]] +; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start]], %len +; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_end]] +; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_n]], i32 [[not_safe_end]] ; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_loclamp]] -; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_loclamp]], 0 -; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_loclamp]], i32 0 +; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 0, [[exit_mainloop_at_loclamp]] +; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 0, i32 [[exit_mainloop_at_loclamp]] ; CHECK-LABEL: in.bounds: Index: test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -11,7 +11,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(2)* @@ -43,7 +43,7 @@ br i1 %cmp1, label %for.body, label %for.end ; Make sure the added GEP has the right index type -; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0 +; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc ; CHECK: for.body: ; CHECK: phi i8 addrspace(3)* Index: test/Transforms/IndVarSimplify/udiv.ll =================================================================== --- test/Transforms/IndVarSimplify/udiv.ll +++ test/Transforms/IndVarSimplify/udiv.ll @@ -127,12 +127,12 @@ declare i32 @printf(i8* nocapture, ...) nounwind -; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in -; the original code. +; SCEV expansion in IndVars pass will reuse the udiv value already computed +; in the entry bb. ; CHECK-LABEL: @foo( ; CHECK: for.body.preheader: -; CHECK-NEXT: udiv +; CHECK-NOT: udiv define void @foo(double* %p, i64 %n) nounwind { entry: Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -4,12 +4,14 @@ ; LSR should properly handle the post-inc offset when folding the ; non-IV operand of an icmp into the IV. -; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast -; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1 -; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 +; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast +; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1 +; CHECK: [[r3:%[a-z0-9\.]+]] = bitcast i64 [[r2]] to i64 +; CHECK: for.body.lr.ph: +; CHECK: [[r4:%[a-z0-9]+]] = shl i64 [[r3]], 1 ; CHECK: br label %for.body ; CHECK: for.body: -; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ] +; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r4]], %for.body.lr.ph ] ; CHECK: %lsr.iv.next = add i64 %lsr.iv2, -2 ; CHECK: %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16* ; CHECK: %cmp27 = icmp eq i16* %lsr.iv.next3, null