Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -598,6 +598,8 @@ /// \p IndexExprs The expressions for the indices. const SCEV *getGEPExpr(GEPOperator *GEP, const SmallVectorImpl &IndexExprs); + const SCEV *getMinMaxExpr(unsigned Kind, + SmallVectorImpl &Operands); const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getSMaxExpr(SmallVectorImpl &Operands); const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); Index: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -367,6 +367,10 @@ Value *visitUMaxExpr(const SCEVUMaxExpr *S); + Value *visitSMinExpr(const SCEVSMinExpr *S); + + Value *visitUMinExpr(const SCEVUMinExpr *S); + Value *visitUnknown(const SCEVUnknown *S) { return S->getValue(); } Index: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -39,7 +39,7 @@ // These should be ordered in terms of increasing complexity to make the // folders simpler. scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, - scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, + scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr, scUnknown, scCouldNotCompute }; @@ -190,10 +190,9 @@ /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { - return S->getSCEVType() == scAddExpr || - S->getSCEVType() == scMulExpr || - S->getSCEVType() == scSMaxExpr || - S->getSCEVType() == scUMaxExpr || + return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || + S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr || + S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr || S->getSCEVType() == scAddRecExpr; } }; @@ -208,10 +207,9 @@ public: /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { - return S->getSCEVType() == scAddExpr || - S->getSCEVType() == scMulExpr || - S->getSCEVType() == scSMaxExpr || - S->getSCEVType() == scUMaxExpr; + return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || + S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr || + S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr; } /// Set flags for a non-recurrence without clearing previously set flags. @@ -366,18 +364,54 @@ } }; - /// This class represents a signed maximum selection. - class SCEVSMaxExpr : public SCEVCommutativeExpr { + /// This node is the base class min/max selections. + class SCEVMinMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; - SCEVSMaxExpr(const FoldingSetNodeIDRef ID, - const SCEV *const *O, size_t N) - : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) { - // Max never overflows. + static bool isMinMaxType(enum SCEVTypes T) { + return T == scSMaxExpr || T == scUMaxExpr || T == scSMinExpr || + T == scUMinExpr; + } + + protected: + /// Note: Constructing subclasses via this constructor is allowed + SCEVMinMaxExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, T, O, N) { + assert(isMinMaxType(T)); + // Min and max never overflow setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); } public: + static bool classof(const SCEV *S) { + return isMinMaxType(static_cast(S->getSCEVType())); + } + + static enum SCEVTypes negate(enum SCEVTypes T) { + switch (T) { + case scSMaxExpr: + return scSMinExpr; + case scSMinExpr: + return scSMaxExpr; + case scUMaxExpr: + return scUMinExpr; + case scUMinExpr: + return scUMaxExpr; + default: + llvm_unreachable("Not a min or max SCEV type!"); + } + } + }; + + /// This class represents a signed maximum selection. + class SCEVSMaxExpr : public SCEVMinMaxExpr { + friend class ScalarEvolution; + + SCEVSMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) + : SCEVMinMaxExpr(ID, scSMaxExpr, O, N) {} + + public: /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { return S->getSCEVType() == scSMaxExpr; @@ -385,15 +419,11 @@ }; /// This class represents an unsigned maximum selection. - class SCEVUMaxExpr : public SCEVCommutativeExpr { + class SCEVUMaxExpr : public SCEVMinMaxExpr { friend class ScalarEvolution; - SCEVUMaxExpr(const FoldingSetNodeIDRef ID, - const SCEV *const *O, size_t N) - : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) { - // Max never overflows. - setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); - } + SCEVUMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) + : SCEVMinMaxExpr(ID, scUMaxExpr, O, N) {} public: /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -402,6 +432,34 @@ } }; + /// This class represents a signed minimum selection. + class SCEVSMinExpr : public SCEVMinMaxExpr { + friend class ScalarEvolution; + + SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) + : SCEVMinMaxExpr(ID, scSMinExpr, O, N) {} + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scSMinExpr; + } + }; + + /// This class represents an unsigned minimum selection. + class SCEVUMinExpr : public SCEVMinMaxExpr { + friend class ScalarEvolution; + + SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) + : SCEVMinMaxExpr(ID, scUMinExpr, O, N) {} + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scUMinExpr; + } + }; + /// This means that we are dealing with an entirely unknown SCEV /// value, and only represent it as its LLVM Value. This is the /// "bottom" value for the analysis. @@ -474,6 +532,10 @@ return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); case scUMaxExpr: return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); + case scSMinExpr: + return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S); + case scUMinExpr: + return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S); case scUnknown: return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); case scCouldNotCompute: @@ -527,6 +589,8 @@ case scMulExpr: case scSMaxExpr: case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: case scAddRecExpr: for (const auto *Op : cast(S)->operands()) push(Op); @@ -689,6 +753,26 @@ return !Changed ? Expr : SE.getUMaxExpr(Operands); } + const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { + SmallVector Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC *)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getSMinExpr(Operands); + } + + const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { + SmallVector Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC *)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getUMinExpr(Operands); + } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { return Expr; } Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -277,7 +277,9 @@ case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { const SCEVNAryExpr *NAry = cast(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { @@ -285,6 +287,12 @@ case scMulExpr: OpStr = " * "; break; case scUMaxExpr: OpStr = " umax "; break; case scSMaxExpr: OpStr = " smax "; break; + case scUMinExpr: + OpStr = " umin "; + break; + case scSMinExpr: + OpStr = " smin "; + break; } OS << "("; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); @@ -353,6 +361,8 @@ case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: return cast(this)->getType(); case scAddExpr: return cast(this)->getType(); @@ -717,7 +727,9 @@ case scAddExpr: case scMulExpr: case scSMaxExpr: - case scUMaxExpr: { + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { const SCEVNAryExpr *LC = cast(LHS); const SCEVNAryExpr *RC = cast(RHS); @@ -927,6 +939,8 @@ void visitUDivExpr(const SCEVUDivExpr *Numerator) {} void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitSMinExpr(const SCEVSMinExpr *Numerator) {} + void visitUMinExpr(const SCEVUMinExpr *Numerator) {} void visitUnknown(const SCEVUnknown *Numerator) {} void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} @@ -3516,12 +3530,6 @@ return getAddExpr(BaseExpr, TotalOffset, Wrap); } -const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, - const SCEV *RHS) { - SmallVector Ops = {LHS, RHS}; - return getSMaxExpr(Ops); -} - std::tuple ScalarEvolution::findExistingSCEVInCache(int SCEVType, ArrayRef Ops) { @@ -3534,22 +3542,25 @@ UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); } -const SCEV * -ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { - assert(!Ops.empty() && "Cannot get empty smax!"); +const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, + SmallVectorImpl &Ops) { + assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && - "SCEVSMaxExpr operand types don't match!"); + "Operand types don't match!"); #endif + bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; + bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; + // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); - // Check if we have created the same SMax expression before. - if (const SCEV *S = std::get<0>(findExistingSCEVInCache(scSMaxExpr, Ops))) { + // Check if we have created the same expression before. + if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) { return S; } @@ -3558,190 +3569,127 @@ if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); + auto FoldOp = [&](const APInt &LHS, const APInt &RHS) { + if (Kind == scSMaxExpr) + return APIntOps::smax(LHS, RHS); + else if (Kind == scSMinExpr) + return APIntOps::smin(LHS, RHS); + else if (Kind == scUMaxExpr) + return APIntOps::umax(LHS, RHS); + else if (Kind == scUMinExpr) + return APIntOps::umin(LHS, RHS); + llvm_unreachable("Unknown SCEV min/max opcode"); + }; + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get( - getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); + getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } - // If we are left with a constant minimum-int, strip it off. - if (cast(Ops[0])->getValue()->isMinValue(true)) { + bool IsMinV = LHSC->getValue()->isMinValue(IsSigned); + bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned); + + if (IsMax ? IsMinV : IsMaxV) { + // If we are left with a constant minimum(/maximum)-int, strip it off. Ops.erase(Ops.begin()); --Idx; - } else if (cast(Ops[0])->getValue()->isMaxValue(true)) { - // If we have an smax with a constant maximum-int, it will always be - // maximum-int. - return Ops[0]; + } else if (IsMax ? IsMaxV : IsMinV) { + // If we have a max(/min) with a constant maximum(/minimum)-int, + // it will always be the extremum. + return LHSC; } if (Ops.size() == 1) return Ops[0]; } - // Find the first SMax - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + // Find the first operation of the same kind + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind) ++Idx; - // Check to see if one of the operands is an SMax. If so, expand its operands - // onto our operand list, and recurse to simplify. + // Check to see if one of the operands is of the same kind. If so, expand its + // operands onto our operand list, and recurse to simplify. if (Idx < Ops.size()) { - bool DeletedSMax = false; - while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { + bool DeletedAny = false; + while (Ops[Idx]->getSCEVType() == Kind) { + const SCEVMinMaxExpr *SMME = cast(Ops[Idx]); Ops.erase(Ops.begin()+Idx); - Ops.append(SMax->op_begin(), SMax->op_end()); - DeletedSMax = true; + Ops.append(SMME->op_begin(), SMME->op_end()); + DeletedAny = true; } - if (DeletedSMax) - return getSMaxExpr(Ops); + if (DeletedAny) + return getMinMaxExpr(Kind, Ops); } // Okay, check to see if the same value occurs in the operand list twice. If // so, delete one. Since we sorted the list, these values are required to // be adjacent. - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - // X smax Y smax Y --> X smax Y - // X smax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning( - ICmpInst::ICMP_SGE, Ops[i], Ops[i + 1])) { - Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); - --i; --e; - } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, Ops[i], + llvm::CmpInst::Predicate GEPred = + IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + llvm::CmpInst::Predicate LEPred = + IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; + llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; + for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) { + if (Ops[i] == Ops[i + 1] || + isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) { + // X op Y op Y --> X op Y + // X op Y --> X, if we know X, Y are ordered appropriately + Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); + --i; + --e; + } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i], Ops[i + 1])) { - Ops.erase(Ops.begin()+i, Ops.begin()+i+1); - --i; --e; + // X op Y --> Y, if we know X, Y are ordered appropriately + Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); + --i; + --e; } + } if (Ops.size() == 1) return Ops[0]; assert(!Ops.empty() && "Reduced smax down to nothing!"); - // Okay, it looks like we really DO need an smax expr. Check to see if we + // Okay, it looks like we really DO need an expr. Check to see if we // already have one, otherwise create a new one. const SCEV *ExistingSCEV; FoldingSetNodeID ID; void *IP; - std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(scSMaxExpr, Ops); + std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops); if (ExistingSCEV) return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = - new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); + SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( + ID.Intern(SCEVAllocator), static_cast(Kind), O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } -const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, - const SCEV *RHS) { +const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = {LHS, RHS}; - return getUMaxExpr(Ops); + return getSMaxExpr(Ops); } -const SCEV * -ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { - assert(!Ops.empty() && "Cannot get empty umax!"); - if (Ops.size() == 1) return Ops[0]; -#ifndef NDEBUG - Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); - for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && - "SCEVUMaxExpr operand types don't match!"); -#endif - - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI, DT); - - // Check if we have created the same UMax expression before. - if (const SCEV *S = std::get<0>(findExistingSCEVInCache(scUMaxExpr, Ops))) { - return S; - } - - // If there are any constants, fold them together. - unsigned Idx = 0; - if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { - ++Idx; - assert(Idx < Ops.size()); - while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { - // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( - getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); - Ops[0] = getConstant(Fold); - Ops.erase(Ops.begin()+1); // Erase the folded element - if (Ops.size() == 1) return Ops[0]; - LHSC = cast(Ops[0]); - } - - // If we are left with a constant minimum-int, strip it off. - if (cast(Ops[0])->getValue()->isMinValue(false)) { - Ops.erase(Ops.begin()); - --Idx; - } else if (cast(Ops[0])->getValue()->isMaxValue(false)) { - // If we have an umax with a constant maximum-int, it will always be - // maximum-int. - return Ops[0]; - } - - if (Ops.size() == 1) return Ops[0]; - } - - // Find the first UMax - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) - ++Idx; - - // Check to see if one of the operands is a UMax. If so, expand its operands - // onto our operand list, and recurse to simplify. - if (Idx < Ops.size()) { - bool DeletedUMax = false; - while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { - Ops.erase(Ops.begin()+Idx); - Ops.append(UMax->op_begin(), UMax->op_end()); - DeletedUMax = true; - } - - if (DeletedUMax) - return getUMaxExpr(Ops); - } - - // Okay, check to see if the same value occurs in the operand list twice. If - // so, delete one. Since we sorted the list, these values are required to - // be adjacent. - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - // X umax Y umax Y --> X umax Y - // X umax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning( - ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) { - Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); - --i; --e; - } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i], - Ops[i + 1])) { - Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); - --i; --e; - } - - if (Ops.size() == 1) return Ops[0]; +const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { + return getMinMaxExpr(scSMaxExpr, Ops); +} - assert(!Ops.empty() && "Reduced umax down to nothing!"); +const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { + SmallVector Ops = {LHS, RHS}; + return getUMaxExpr(Ops); +} - // Okay, it looks like we really DO need a umax expr. Check to see if we - // already have one, otherwise create a new one. - const SCEV *ExistingSCEV; - FoldingSetNodeID ID; - void *IP; - std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(scUMaxExpr, Ops); - if (ExistingSCEV) - return ExistingSCEV; - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); - return S; +const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { + return getMinMaxExpr(scUMaxExpr, Ops); } const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, @@ -3751,11 +3699,7 @@ } const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl &Ops) { - // ~smax(~x, ~y, ~z) == smin(x, y, z). - SmallVector NotOps; - for (auto *S : Ops) - NotOps.push_back(getNotSCEV(S)); - return getNotSCEV(getSMaxExpr(NotOps)); + return getMinMaxExpr(scSMinExpr, Ops); } const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, @@ -3765,16 +3709,7 @@ } const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { - assert(!Ops.empty() && "At least one operand must be!"); - // Trivial case. - if (Ops.size() == 1) - return Ops[0]; - - // ~umax(~x, ~y, ~z) == umin(x, y, z). - SmallVector NotOps; - for (auto *S : Ops) - NotOps.push_back(getNotSCEV(S)); - return getNotSCEV(getUMaxExpr(NotOps)); + return getMinMaxExpr(scUMinExpr, Ops); } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { @@ -4016,12 +3951,45 @@ V, getConstant(cast(Constant::getAllOnesValue(Ty))), Flags); } +/// If Expr computes ~A, return A else return nullptr +static const SCEV *MatchNotExpr(const SCEV *Expr) { + const SCEVAddExpr *Add = dyn_cast(Expr); + if (!Add || Add->getNumOperands() != 2 || + !Add->getOperand(0)->isAllOnesValue()) + return nullptr; + + const SCEVMulExpr *AddRHS = dyn_cast(Add->getOperand(1)); + if (!AddRHS || AddRHS->getNumOperands() != 2 || + !AddRHS->getOperand(0)->isAllOnesValue()) + return nullptr; + + return AddRHS->getOperand(1); +} + /// Return a SCEV corresponding to ~V = -1-V const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast(V)) return getConstant( cast(ConstantExpr::getNot(VC->getValue()))); + // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y) + if (const SCEVMinMaxExpr *MME = dyn_cast(V)) { + auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) { + SmallVector MatchedOperands; + for (const SCEV *Operand : MME->operands()) { + const SCEV *Matched = MatchNotExpr(Operand); + if (!Matched) + return (const SCEV *)nullptr; + MatchedOperands.push_back(Matched); + } + return getMinMaxExpr( + SCEVMinMaxExpr::negate(static_cast(MME->getSCEVType())), + MatchedOperands); + }; + if (const SCEV *Replaced = MatchMinMaxNegation(MME)) + return Replaced; + } + Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); const SCEV *AllOnes = @@ -5210,6 +5178,8 @@ switch (S->getSCEVType()) { case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: // These expressions are available if their operand(s) is/are. return true; @@ -8115,7 +8085,9 @@ } case scSMaxExpr: case scUMaxExpr: - break; // TODO: smax, umax. + case scSMinExpr: + case scUMinExpr: + break; // TODO: smax, umax, smin, umax. } return nullptr; } @@ -8243,10 +8215,8 @@ return getAddExpr(NewOps); if (isa(Comm)) return getMulExpr(NewOps); - if (isa(Comm)) - return getSMaxExpr(NewOps); - if (isa(Comm)) - return getUMaxExpr(NewOps); + if (isa(Comm)) + return getMinMaxExpr(Comm->getSCEVType(), NewOps); llvm_unreachable("Unknown commutative SCEV type!"); } } @@ -10087,41 +10057,15 @@ getNotSCEV(FoundLHS)); } -/// If Expr computes ~A, return A else return nullptr -static const SCEV *MatchNotExpr(const SCEV *Expr) { - const SCEVAddExpr *Add = dyn_cast(Expr); - if (!Add || Add->getNumOperands() != 2 || - !Add->getOperand(0)->isAllOnesValue()) - return nullptr; - - const SCEVMulExpr *AddRHS = dyn_cast(Add->getOperand(1)); - if (!AddRHS || AddRHS->getNumOperands() != 2 || - !AddRHS->getOperand(0)->isAllOnesValue()) - return nullptr; - - return AddRHS->getOperand(1); -} - -/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? -template -static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, - const SCEV *Candidate) { - const MaxExprType *MaxExpr = dyn_cast(MaybeMaxExpr); - if (!MaxExpr) return false; - - return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); -} - -/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? -template -static bool IsMinConsistingOf(ScalarEvolution &SE, - const SCEV *MaybeMinExpr, - const SCEV *Candidate) { - const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr); - if (!MaybeMaxExpr) +/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? +template +static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, + const SCEV *Candidate) { + const MinMaxExprType *MinMaxExpr = dyn_cast(MaybeMinMaxExpr); + if (!MinMaxExpr) return false; - return IsMaxConsistingOf(MaybeMaxExpr, SE.getNotSCEV(Candidate)); + return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); } static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, @@ -10170,20 +10114,20 @@ LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: return - // min(A, ...) <= A - IsMinConsistingOf(SE, LHS, RHS) || - // A <= max(A, ...) - IsMaxConsistingOf(RHS, LHS); + // min(A, ...) <= A + IsMinMaxConsistingOf(LHS, RHS) || + // A <= max(A, ...) + IsMinMaxConsistingOf(RHS, LHS); case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_ULE: return - // min(A, ...) <= A - IsMinConsistingOf(SE, LHS, RHS) || - // A <= max(A, ...) - IsMaxConsistingOf(RHS, LHS); + // min(A, ...) <= A + IsMinMaxConsistingOf(LHS, RHS) || + // A <= max(A, ...) + IsMinMaxConsistingOf(RHS, LHS); } llvm_unreachable("covered switch fell through?!"); @@ -11651,7 +11595,9 @@ case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { bool HasVarying = false; for (auto *Op : cast(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); @@ -11738,7 +11684,9 @@ case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { const SCEVNAryExpr *NAry = cast(S); bool Proper = true; for (const SCEV *NAryOp : NAry->operands()) { Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1633,7 +1633,8 @@ for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. - if (S->getOperand(i)->getType() != Ty) { + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } @@ -1657,7 +1658,8 @@ for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. - if (S->getOperand(i)->getType() != Ty) { + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } @@ -1675,6 +1677,56 @@ return LHS; } +Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands() - 2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands() - 2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpULT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *IP) { setInsertPoint(IP); @@ -1778,7 +1830,7 @@ // IndVarSimplify sometimes sets the insertion point at the block start, even // when there are PHIs at that point. We must correct for this. - if (isa(*InsertPt)) + if (isa(*InsertPt)) InsertPt = &*InsertPt->getParent()->getFirstInsertionPt(); // Check to see if we already expanded this here. @@ -2110,7 +2162,7 @@ // HowManyLessThans uses a Max expression whenever the loop is not guarded by // the exit condition. - if (isa(S) || isa(S)) + if (isa(S)) return true; // Recurse past nary expressions, which commonly occur in the Index: llvm/trunk/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll =================================================================== --- llvm/trunk/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll +++ llvm/trunk/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll @@ -0,0 +1,50 @@ +; RUN: opt -loop-versioning -S < %s | FileCheck %s + +; NB: addrspaces 10-13 are non-integral +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" + +%jl_value_t = type opaque +%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } + +define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) { +; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]] +; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]] +top: + %1 = alloca [3 x i64], align 8 + %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8 + %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1 + %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8 + %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0 + store i64 1, i64* %5, align 8 + %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1 + %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8 + %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)* + %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)* + %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8 + %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)* + %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)* + %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8 + %14 = load i64, i64* %6, align 8 + br label %L74 + +L74: + %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ] + %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ] + %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ] + %15 = add i64 %value_phi21, -1 + %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15 + %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)* + %18 = load i64, i64 addrspace(13)* %17, align 8 + %19 = add i64 %value_phi20, -1 + %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19 + %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)* + store i64 %18, i64 addrspace(13)* %21, align 8 + %22 = add i64 %value_phi20, 1 + %23 = add i64 %14, %value_phi21 + %24 = icmp eq i64 %value_phi22, %7 + %25 = add i64 %value_phi22, 1 + br i1 %24, label %L94, label %L74 + +L94: + ret void +} Index: llvm/trunk/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll =================================================================== --- llvm/trunk/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +++ llvm/trunk/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll @@ -58,7 +58,7 @@ ; Here it is not obvious what the limits are, since 'step' could be negative. -; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a))))) +; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) ; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) define void @g(i64 %step) { Index: llvm/trunk/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll @@ -22,5 +22,5 @@ ret i32 %j.0.lcssa } -; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}})))) +; CHECK: backedge-taken count is (-2147483633 + (-1 * (%{{[xy]}} smin %{{[xy]}}))) Index: llvm/trunk/test/Analysis/ScalarEvolution/max-expr-cache.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/max-expr-cache.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/max-expr-cache.ll @@ -55,7 +55,7 @@ %tmp45 = icmp slt i32 %tmp43, 256 %tmp46 = select i1 %tmp45, i32 %tmp43, i32 256 ; CHECK: %tmp46 = select i1 %tmp45, i32 %tmp43, i32 256 -; CHECK-NEXT: --> (-1 + (-1 * (-257 smax (-1 + (-257 smax (-1 + (-257 smax (-1 + (-257 smax (-1 + (-257 smax (-1 + (-257 smax (-1 + (-257 smax (-1 + (-257 smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) smax {(-1 + (-1 * %tmp3)),+,256}<%bb4>))) +; CHECK-NEXT: --> (256 smin (1 + (256 smin (1 + (256 smin (1 + (256 smin (1 + (256 smin (1 + (256 smin (1 + (256 smin (1 + (256 smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>)) smin {%tmp3,+,-256}<%bb4>) %tmp47 = icmp sgt i32 %tmp44, %tmp46 %tmp48 = select i1 %tmp47, i32 %tmp44, i32 %tmp46 %tmp49 = ashr i32 %tmp48, 3 @@ -130,7 +130,7 @@ %tmp45 = icmp ult i32 %tmp43, 256 %tmp46 = select i1 %tmp45, i32 %tmp43, i32 256 ; CHECK: %tmp46 = select i1 %tmp45, i32 %tmp43, i32 256 -; CHECK-NEXT: --> (-1 + (-1 * (-257 umax (-1 + (-257 umax (-1 + (-257 umax (-1 + (-257 umax (-1 + (-257 umax (-1 + (-257 umax (-1 + (-257 umax (-1 + (-257 umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>)) umax {(-1 + (-1 * %tmp3)),+,256}<%bb4>))) +; CHECK-NEXT: --> (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>)) umin {%tmp3,+,-256}<%bb4>) %tmp47 = icmp ugt i32 %tmp44, %tmp46 %tmp48 = select i1 %tmp47, i32 %tmp44, i32 %tmp46 %tmp49 = ashr i32 %tmp48, 3 Index: llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll @@ -33,7 +33,7 @@ %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6 ; min(N, i+3) ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6 -; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64)))))) +; CHECK-NEXT: --> ((sext i32 {3,+,1}<%bb1> to i64) smin (sext i32 %N to i64)) %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9 %tmp12 = load i32, i32* %tmp11, align 4 %tmp13 = shl nsw i32 %tmp12, 1 Index: llvm/trunk/test/Analysis/ScalarEvolution/predicated-trip-count.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/predicated-trip-count.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -80,7 +80,7 @@ ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) ; CHECK: Loop %bb3: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32)) smin %M))) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: Index: llvm/trunk/test/Analysis/ScalarEvolution/trip-count14.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/trip-count14.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/trip-count14.ll @@ -81,7 +81,7 @@ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit -; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) +; CHECK-NEXT: Loop %do.body: backedge-taken count is (((-1 * %n) + ((2 + %n) smax %n)) umin ((-1 * %n) + %x)) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: @@ -169,7 +169,7 @@ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit -; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) umax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) +; CHECK-NEXT: Loop %do.body: backedge-taken count is (((-1 * %n) + ((2 + %n) umax %n)) umin ((-1 * %n) + %x)) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: Index: llvm/trunk/test/Analysis/ScalarEvolution/trip-count3.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/trip-count3.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/trip-count3.ll @@ -4,7 +4,7 @@ ; dividing by the stride will have a remainder. This could theoretically ; be teaching it how to use a more elaborate trip count computation. -; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) +; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431 %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } Index: llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll +++ llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll @@ -5,17 +5,15 @@ ; CHECK-LABEL: @f_0( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] +; CHECK: [[len_sub:[^ ]+]] = add i32 %len, -4 +; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]] +; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] -; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit +; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit -; CHECK: loop.preheader2: +; CHECK: [[loop_preheader2]]: ; CHECK: br label %loop entry: @@ -35,9 +33,9 @@ ; CHECK: loop: ; CHECK: %cond = load volatile i1, i1* %cond_buf ; CHECK: %abc = and i1 %cond, true -; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit3, !prof !1 +; CHECK: br i1 %abc, label %in.bounds, label %[[loop_exit:[^ ,]+]], !prof !1 -; CHECK: out.of.bounds.loopexit: +; CHECK: [[loop_exit]]: ; CHECK: br label %out.of.bounds in.bounds: @@ -58,14 +56,10 @@ ; CHECK-LABEL: @f_1( ; CHECK: loop.preheader: -; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b -; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a -; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] -; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] -; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] -; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] +; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a +; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a +; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n +; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 @@ -85,9 +79,9 @@ ; CHECK: loop: ; CHECK: %abc = and i1 true, true -; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit4, !prof !1 +; CHECK: br i1 %abc, label %in.bounds, label %[[oob_loopexit:[^ ,]+]], !prof !1 -; CHECK: out.of.bounds.loopexit: +; CHECK: [[oob_loopexit]]: ; CHECK-NEXT: br label %out.of.bounds Index: llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll +++ llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll @@ -29,11 +29,8 @@ ret void ; CHECK: loop.preheader: -; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] -; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] -; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] +; CHECK: [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n +; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n ; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 ; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 Index: llvm/trunk/test/Transforms/IRCE/multiple-access-no-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -38,14 +38,10 @@ ; CHECK-LABEL: @multiple_access_no_preloop( ; CHECK: loop.preheader: -; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b -; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a -; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] -; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] -; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] -; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] +; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a +; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a +; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n +; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 Index: llvm/trunk/test/Transforms/IRCE/ranges_of_different_types.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/ranges_of_different_types.ll +++ llvm/trunk/test/Transforms/IRCE/ranges_of_different_types.ll @@ -23,12 +23,11 @@ ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -83,13 +82,11 @@ ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 -; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] -; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: loop.preloop: ; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] @@ -151,14 +148,11 @@ ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit ; CHECK: postloop: @@ -208,10 +202,9 @@ ; CHECK-LABEL: test_04( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop @@ -252,12 +245,11 @@ ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -297,13 +289,11 @@ ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 -; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] -; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop @@ -344,14 +334,11 @@ ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -388,10 +375,9 @@ ; CHECK-LABEL: test_08( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop Index: llvm/trunk/test/Transforms/IRCE/rc-negative-bound.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/rc-negative-bound.ll +++ llvm/trunk/test/Transforms/IRCE/rc-negative-bound.ll @@ -114,49 +114,44 @@ ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[BOUND]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 -1, [[SMAX1]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SMAX2]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = sub i32 -1, [[N]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[SMAX3]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0 -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] -; CHECK: loop.preheader5: +; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1 +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SMIN1]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0 +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader4: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ] ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 ; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]] -; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] ; CHECK-NEXT: store i32 0, i32* [[ADDR]] ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: br label [[POSTLOOP:%.*]] ; CHECK: out.of.bounds.loopexit: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] -; CHECK: out.of.bounds.loopexit6: +; CHECK: out.of.bounds.loopexit5: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] ; CHECK: out.of.bounds: ; CHECK-NEXT: ret void @@ -211,47 +206,41 @@ ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SMAX1]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = sub i32 -1, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[N]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] -; CHECK: loop.preheader2: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1 +; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMIN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader1: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ] ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 ; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]] -; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] ; CHECK-NEXT: store i32 0, i32* [[ADDR]] ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: br label [[POSTLOOP:%.*]] ; CHECK: out.of.bounds.loopexit: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] -; CHECK: out.of.bounds.loopexit3: +; CHECK: out.of.bounds.loopexit2: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] ; CHECK: out.of.bounds: ; CHECK-NEXT: ret void @@ -413,49 +402,44 @@ ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[BOUND]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 -1, [[SMAX1]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SMAX2]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = sub i32 -1, [[N]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[SMAX3]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0 -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] -; CHECK: loop.preheader5: +; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1 +; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SMIN1]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0 +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader4: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ] ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 ; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]] -; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] ; CHECK-NEXT: store i32 0, i32* [[ADDR]] ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: br label [[POSTLOOP:%.*]] ; CHECK: out.of.bounds.loopexit: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] -; CHECK: out.of.bounds.loopexit6: +; CHECK: out.of.bounds.loopexit5: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] ; CHECK: out.of.bounds: ; CHECK-NEXT: ret void @@ -512,47 +496,41 @@ ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SMAX1]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = sub i32 -1, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[N]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] -; CHECK: loop.preheader2: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1 +; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMIN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader1: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ] ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 ; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]] -; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0 ; CHECK: in.bounds: ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] ; CHECK-NEXT: store i32 0, i32* [[ADDR]] ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] -; CHECK-NEXT: br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ; CHECK: main.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: main.pseudo.exit: ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] ; CHECK-NEXT: br label [[POSTLOOP:%.*]] ; CHECK: out.of.bounds.loopexit: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] -; CHECK: out.of.bounds.loopexit3: +; CHECK: out.of.bounds.loopexit2: ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] ; CHECK: out.of.bounds: ; CHECK-NEXT: ret void Index: llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll @@ -86,15 +86,13 @@ ; CHECK-LABEL: @single_access_no_preloop_with_offset( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] +; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4 +; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] +; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] -; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit +; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader:[^ ,]+]], label %main.pseudo.exit ; CHECK: loop: ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds Index: llvm/trunk/test/Transforms/IRCE/single-access-with-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/single-access-with-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/single-access-with-preloop.ll @@ -34,11 +34,9 @@ ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. -; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1 -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] -; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] -; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] +; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]] +; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]] +; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]] ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 @@ -46,17 +44,15 @@ ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647 ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] -; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. -; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len -; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] -; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] +; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]] +; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]] +; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]] ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 -; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 -; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] -; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] -; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] +; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]] +; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]] +; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]] ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 @@ -67,7 +63,7 @@ ; CHECK: %abc.high = icmp slt i32 %array.idx, %len ; CHECK: %abc.low = icmp sge i32 %array.idx, 0 ; CHECK: %abc = and i1 true, true -; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11 +; CHECK: br i1 %abc, label %in.bounds, label %[[loopexit:[^ ,]+]] ; CHECK: in.bounds: ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]] Index: llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ugt.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ugt.ll +++ llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ugt.ll @@ -58,8 +58,8 @@ ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 -; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit ; CHECK: mainloop: @@ -149,8 +149,8 @@ ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 -; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit ; CHECK: mainloop: Index: llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ult.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ult.ll +++ llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ult.ll @@ -61,8 +61,8 @@ ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 -; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit ; CHECK: mainloop: @@ -194,8 +194,8 @@ ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 -; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 -; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 +; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 +; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit ; CHECK: mainloop: Index: llvm/trunk/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll @@ -6,13 +6,12 @@ define i32 @remove_loop(i32 %size) { ; CHECK-LABEL: @remove_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[SIZE:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -32 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SIZE]], [[UMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 5 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SIZE]], 31 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[SIZE]], i32 31 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[UMAX]] +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: ; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] @@ -20,8 +19,8 @@ ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[SIZE]], [[TMP5]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: br label %while.cond Index: llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll =================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -14,8 +14,6 @@ ; current LSR cost model. ; CHECK-NOT: = ptrtoint i8* undef to i64 ; CHECK: .lr.ph -; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 -; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} ; CHECK: ret void define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { bb: Index: llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll +++ llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -27,7 +27,6 @@ ; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[CONV17]], 0 ; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END12:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[TMP2]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ] @@ -37,77 +36,74 @@ ; CHECK: for.body8.lr.ph: ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8 ; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 -1, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP3]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[UMAX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[CONV3]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 -; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i32 [[TMP3]], [[TMP12]] -; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP3]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[UMAX1]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]] -; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP16]]) +; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]] +; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 +; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = add i8 [[TMP10]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP18:%.*]] = sub i8 [[TMP10]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp ugt i8 [[TMP18]], [[TMP10]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp ult i8 [[TMP17]], [[TMP10]] -; CHECK-NEXT: [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp ugt i32 [[TMP15]], 255 -; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = or i1 [[TMP23]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP25:%.*]] = or i1 false, [[TMP24]] -; CHECK-NEXT: br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255 +; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP21:%.*]] = or i1 false, [[TMP20]] +; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]] ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]] -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP26]], [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP27:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP27]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[OFFSET_IDX]], -4 -; CHECK-NEXT: [[TMP30]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP31]] = add <4 x i32> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP32:%.*]] = add i8 [[TMP28]], -1 -; CHECK-NEXT: [[TMP33:%.*]] = add i8 [[TMP29]], -1 -; CHECK-NEXT: [[TMP34:%.*]] = zext i8 [[TMP32]] to i32 -; CHECK-NEXT: [[TMP35:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4 +; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1 +; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[TMP25]], -1 +; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP31]], [[TMP30]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] @@ -118,7 +114,7 @@ ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2 ; CHECK: for.cond4.for.inc9_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 ; CHECK-NEXT: br label [[FOR_INC9]] ; CHECK: for.inc9: