Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -15,7 +15,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { @@ -450,6 +452,150 @@ const SmallVectorImpl &PointerChecks, ScalarEvolution *SE); +//===----------------------------------------------------------------------===// +// Widen Induction Variables - Extend the width of an IV to cover its +// widest uses. +//===----------------------------------------------------------------------===// + +class WidenIV { + // Parameters + PHINode *OrigPhi; + Type *WideType; + + // Context + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + + // Does the module have any calls to the llvm.experimental.guard intrinsic + // at all? If not we can avoid scanning instructions looking for guards. + bool HasGuards; + + bool UsePostIncrementRanges; + + // Statistics + unsigned NumElimExt = 0; + unsigned NumWidened = 0; + + // Result + PHINode *WidePhi = nullptr; + Instruction *WideInc = nullptr; + const SCEV *WideIncExpr = nullptr; + SmallVectorImpl &DeadInsts; + + SmallPtrSet Widened; + + enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + + // A map tracking the kind of extension used to widen each narrow IV + // and narrow IV user. + // Key: pointer to a narrow IV or IV user. + // Value: the kind of extension used to widen this Instruction. + DenseMap, ExtendKind> ExtendKindMap; + + using DefUserPair = std::pair, AssertingVH>; + + // A map with control-dependent ranges for post increment IV uses. The key is + // a pair of IV def and a use of this def denoting the context. The value is + // a ConstantRange representing possible values of the def at the given + // context. + DenseMap PostIncRangeInfos; + + Optional getPostIncRangeInfo(Value *Def, + Instruction *UseI) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + return It == PostIncRangeInfos.end() + ? Optional(None) + : Optional(It->second); + } + + void calculatePostIncRanges(PHINode *OrigPhi); + void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + if (It == PostIncRangeInfos.end()) + PostIncRangeInfos.insert({Key, R}); + else + It->second = R.intersectWith(It->second); + } + +public: + // Collect information about induction variables that are used by sign/zero + // extend operations. This information is recorded by CollectExtend and provides + // the input to WidenIV. + struct WideIVInfo { + PHINode *NarrowIV = nullptr; + + // Widest integer type created [sz]ext + Type *WidestNativeType = nullptr; + + // Was a sext user seen before a zext? + bool IsSigned = false; + }; + + /// Record a link in the Narrow IV def-use chain along with the WideIV that + /// computes the same value as the Narrow IV def. This avoids caching Use* + /// pointers. + struct NarrowIVDefUse { + Instruction *NarrowDef = nullptr; + Instruction *NarrowUse = nullptr; + Instruction *WideDef = nullptr; + + // True if the narrow def is never negative. Tracking this information lets + // us use a sign extension instead of a zero extension or vice versa, when + // profitable and legal. + bool NeverNegative = false; + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, + bool NeverNegative) + : NarrowDef(ND), NarrowUse(NU), WideDef(WD), + NeverNegative(NeverNegative) {} + }; + + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl &DI, + bool HasGuards, bool UsePostIncrementRanges = true); + + PHINode *createWideIV(SCEVExpander &Rewriter); + + unsigned getNumElimExt() { return NumElimExt; }; + unsigned getNumWidened() { return NumWidened; }; + +protected: + Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); + Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR); + Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); + + ExtendKind getExtendKind(Instruction *I); + + using WidenedRecTy = std::pair; + + WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); + + WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); + + const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const; + + Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); + + bool widenLoopCompare(NarrowIVDefUse DU); + bool widenWithVariantUse(NarrowIVDefUse DU); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); + +private: + SmallVector NarrowIVUsers; +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -182,57 +182,6 @@ } // end anonymous namespace -/// Determine the insertion point for this user. By default, insert immediately -/// before the user. SCEVExpander or LICM will hoist loop invariants out of the -/// loop. For PHI nodes, there may be multiple uses, so compute the nearest -/// common dominator for the incoming blocks. A nullptr can be returned if no -/// viable location is found: it may happen if User is a PHI and Def only comes -/// to this PHI from unreachable blocks. -static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT, LoopInfo *LI) { - PHINode *PHI = dyn_cast(User); - if (!PHI) - return User; - - Instruction *InsertPt = nullptr; - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - if (PHI->getIncomingValue(i) != Def) - continue; - - BasicBlock *InsertBB = PHI->getIncomingBlock(i); - - if (!DT->isReachableFromEntry(InsertBB)) - continue; - - if (!InsertPt) { - InsertPt = InsertBB->getTerminator(); - continue; - } - InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); - InsertPt = InsertBB->getTerminator(); - } - - // If we have skipped all inputs, it means that Def only comes to Phi from - // unreachable blocks. - if (!InsertPt) - return nullptr; - - auto *DefI = dyn_cast(Def); - if (!DefI) - return InsertPt; - - assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); - - auto *L = LI->getLoopFor(DefI->getParent()); - assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); - - for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) - if (LI->getLoopFor(DTN->getBlock()) == L) - return DTN->getBlock()->getTerminator(); - - llvm_unreachable("DefI dominates InsertPt!"); -} - //===----------------------------------------------------------------------===// // rewriteNonIntegerIVs and helpers. Prefer integer IVs. //===----------------------------------------------------------------------===// @@ -554,27 +503,11 @@ // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// -namespace { - -// Collect information about induction variables that are used by sign/zero -// extend operations. This information is recorded by CollectExtend and provides -// the input to WidenIV. -struct WideIVInfo { - PHINode *NarrowIV = nullptr; - - // Widest integer type created [sz]ext - Type *WidestNativeType = nullptr; - - // Was a sext user seen before a zext? - bool IsSigned = false; -}; - -} // end anonymous namespace - /// Update information about the induction variable that is extended by this /// sign or zero extend operation. This is used to determine the final width of /// the IV before actually widening it. -static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, +static void visitIVCast(CastInst *Cast, WidenIV::WideIVInfo &WI, + ScalarEvolution *SE, const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) @@ -620,966 +553,6 @@ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); } -namespace { - -/// Record a link in the Narrow IV def-use chain along with the WideIV that -/// computes the same value as the Narrow IV def. This avoids caching Use* -/// pointers. -struct NarrowIVDefUse { - Instruction *NarrowDef = nullptr; - Instruction *NarrowUse = nullptr; - Instruction *WideDef = nullptr; - - // True if the narrow def is never negative. Tracking this information lets - // us use a sign extension instead of a zero extension or vice versa, when - // profitable and legal. - bool NeverNegative = false; - - NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, - bool NeverNegative) - : NarrowDef(ND), NarrowUse(NU), WideDef(WD), - NeverNegative(NeverNegative) {} -}; - -/// The goal of this transform is to remove sign and zero extends without -/// creating any new induction variables. To do this, it creates a new phi of -/// the wider type and redirects all users, either removing extends or inserting -/// truncs whenever we stop propagating the type. -class WidenIV { - // Parameters - PHINode *OrigPhi; - Type *WideType; - - // Context - LoopInfo *LI; - Loop *L; - ScalarEvolution *SE; - DominatorTree *DT; - - // Does the module have any calls to the llvm.experimental.guard intrinsic - // at all? If not we can avoid scanning instructions looking for guards. - bool HasGuards; - - // Result - PHINode *WidePhi = nullptr; - Instruction *WideInc = nullptr; - const SCEV *WideIncExpr = nullptr; - SmallVectorImpl &DeadInsts; - - SmallPtrSet Widened; - SmallVector NarrowIVUsers; - - enum ExtendKind { ZeroExtended, SignExtended, Unknown }; - - // A map tracking the kind of extension used to widen each narrow IV - // and narrow IV user. - // Key: pointer to a narrow IV or IV user. - // Value: the kind of extension used to widen this Instruction. - DenseMap, ExtendKind> ExtendKindMap; - - using DefUserPair = std::pair, AssertingVH>; - - // A map with control-dependent ranges for post increment IV uses. The key is - // a pair of IV def and a use of this def denoting the context. The value is - // a ConstantRange representing possible values of the def at the given - // context. - DenseMap PostIncRangeInfos; - - Optional getPostIncRangeInfo(Value *Def, - Instruction *UseI) { - DefUserPair Key(Def, UseI); - auto It = PostIncRangeInfos.find(Key); - return It == PostIncRangeInfos.end() - ? Optional(None) - : Optional(It->second); - } - - void calculatePostIncRanges(PHINode *OrigPhi); - void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); - - void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { - DefUserPair Key(Def, UseI); - auto It = PostIncRangeInfos.find(Key); - if (It == PostIncRangeInfos.end()) - PostIncRangeInfos.insert({Key, R}); - else - It->second = R.intersectWith(It->second); - } - -public: - WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, - DominatorTree *DTree, SmallVectorImpl &DI, - bool HasGuards) - : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), - L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - HasGuards(HasGuards), DeadInsts(DI) { - assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); - ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; - } - - PHINode *createWideIV(SCEVExpander &Rewriter); - -protected: - Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, - Instruction *Use); - - Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); - Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, - const SCEVAddRecExpr *WideAR); - Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); - - ExtendKind getExtendKind(Instruction *I); - - using WidenedRecTy = std::pair; - - WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); - - WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); - - const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, - unsigned OpCode) const; - - Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); - - bool widenLoopCompare(NarrowIVDefUse DU); - bool widenWithVariantUse(NarrowIVDefUse DU); - - void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); -}; - -} // end anonymous namespace - -Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, - bool IsSigned, Instruction *Use) { - // Set the debug location and conservative insertion point. - IRBuilder<> Builder(Use); - // Hoist the insertion point into loop preheaders as far as possible. - for (const Loop *L = LI->getLoopFor(Use->getParent()); - L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); - L = L->getParentLoop()) - Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); - - return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : - Builder.CreateZExt(NarrowOper, WideType); -} - -/// Instantiate a wide operation to replace a narrow operation. This only needs -/// to handle operations that can evaluation to SCEVAddRec. It can safely return -/// 0 for any operation we decide not to clone. -Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, - const SCEVAddRecExpr *WideAR) { - unsigned Opcode = DU.NarrowUse->getOpcode(); - switch (Opcode) { - default: - return nullptr; - case Instruction::Add: - case Instruction::Mul: - case Instruction::UDiv: - case Instruction::Sub: - return cloneArithmeticIVUser(DU, WideAR); - - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return cloneBitwiseIVUser(DU); - } -} - -Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { - Instruction *NarrowUse = DU.NarrowUse; - Instruction *NarrowDef = DU.NarrowDef; - Instruction *WideDef = DU.WideDef; - - LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); - - // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything - // about the narrow operand yet so must insert a [sz]ext. It is probably loop - // invariant and will be folded or hoisted. If it actually comes from a - // widened IV, it should be removed during a future call to widenIVUse. - bool IsSigned = getExtendKind(NarrowDef) == SignExtended; - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - IsSigned, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - IsSigned, NarrowUse); - - auto *NarrowBO = cast(NarrowUse); - auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, - NarrowBO->getName()); - IRBuilder<> Builder(NarrowUse); - Builder.Insert(WideBO); - WideBO->copyIRFlags(NarrowBO); - return WideBO; -} - -Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, - const SCEVAddRecExpr *WideAR) { - Instruction *NarrowUse = DU.NarrowUse; - Instruction *NarrowDef = DU.NarrowDef; - Instruction *WideDef = DU.WideDef; - - LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); - - unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; - - // We're trying to find X such that - // - // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X - // - // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), - // and check using SCEV if any of them are correct. - - // Returns true if extending NonIVNarrowDef according to `SignExt` is a - // correct solution to X. - auto GuessNonIVOperand = [&](bool SignExt) { - const SCEV *WideLHS; - const SCEV *WideRHS; - - auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { - if (SignExt) - return SE->getSignExtendExpr(S, Ty); - return SE->getZeroExtendExpr(S, Ty); - }; - - if (IVOpIdx == 0) { - WideLHS = SE->getSCEV(WideDef); - const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); - WideRHS = GetExtend(NarrowRHS, WideType); - } else { - const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); - WideLHS = GetExtend(NarrowLHS, WideType); - WideRHS = SE->getSCEV(WideDef); - } - - // WideUse is "WideDef `op.wide` X" as described in the comment. - const SCEV *WideUse = nullptr; - - switch (NarrowUse->getOpcode()) { - default: - llvm_unreachable("No other possibility!"); - - case Instruction::Add: - WideUse = SE->getAddExpr(WideLHS, WideRHS); - break; - - case Instruction::Mul: - WideUse = SE->getMulExpr(WideLHS, WideRHS); - break; - - case Instruction::UDiv: - WideUse = SE->getUDivExpr(WideLHS, WideRHS); - break; - - case Instruction::Sub: - WideUse = SE->getMinusSCEV(WideLHS, WideRHS); - break; - } - - return WideUse == WideAR; - }; - - bool SignExtend = getExtendKind(NarrowDef) == SignExtended; - if (!GuessNonIVOperand(SignExtend)) { - SignExtend = !SignExtend; - if (!GuessNonIVOperand(SignExtend)) - return nullptr; - } - - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - SignExtend, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - SignExtend, NarrowUse); - - auto *NarrowBO = cast(NarrowUse); - auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, - NarrowBO->getName()); - - IRBuilder<> Builder(NarrowUse); - Builder.Insert(WideBO); - WideBO->copyIRFlags(NarrowBO); - return WideBO; -} - -WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { - auto It = ExtendKindMap.find(I); - assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); - return It->second; -} - -const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, - unsigned OpCode) const { - if (OpCode == Instruction::Add) - return SE->getAddExpr(LHS, RHS); - if (OpCode == Instruction::Sub) - return SE->getMinusSCEV(LHS, RHS); - if (OpCode == Instruction::Mul) - return SE->getMulExpr(LHS, RHS); - - llvm_unreachable("Unsupported opcode."); -} - -/// No-wrap operations can transfer sign extension of their result to their -/// operands. Generate the SCEV value for the widened operation without -/// actually modifying the IR yet. If the expression after extending the -/// operands is an AddRec for this loop, return the AddRec and the kind of -/// extension used. -WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { - // Handle the common case of add - const unsigned OpCode = DU.NarrowUse->getOpcode(); - // Only Add/Sub/Mul instructions supported yet. - if (OpCode != Instruction::Add && OpCode != Instruction::Sub && - OpCode != Instruction::Mul) - return {nullptr, Unknown}; - - // One operand (NarrowDef) has already been extended to WideDef. Now determine - // if extending the other will lead to a recurrence. - const unsigned ExtendOperIdx = - DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; - assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - - const SCEV *ExtendOperExpr = nullptr; - const OverflowingBinaryOperator *OBO = - cast(DU.NarrowUse); - ExtendKind ExtKind = getExtendKind(DU.NarrowDef); - if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) - ExtendOperExpr = SE->getSignExtendExpr( - SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) - ExtendOperExpr = SE->getZeroExtendExpr( - SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else - return {nullptr, Unknown}; - - // When creating this SCEV expr, don't apply the current operations NSW or NUW - // flags. This instruction may be guarded by control flow that the no-wrap - // behavior depends on. Non-control-equivalent instructions can be mapped to - // the same SCEV expression, and it would be incorrect to transfer NSW/NUW - // semantics to those operations. - const SCEV *lhs = SE->getSCEV(DU.WideDef); - const SCEV *rhs = ExtendOperExpr; - - // Let's swap operands to the initial order for the case of non-commutative - // operations, like SUB. See PR21014. - if (ExtendOperIdx == 0) - std::swap(lhs, rhs); - const SCEVAddRecExpr *AddRec = - dyn_cast(getSCEVByOpCode(lhs, rhs, OpCode)); - - if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; - - return {AddRec, ExtKind}; -} - -/// Is this instruction potentially interesting for further simplification after -/// widening it's type? In other words, can the extend be safely hoisted out of -/// the loop with SCEV reducing the value to a recurrence on the same loop. If -/// so, return the extended recurrence and the kind of extension used. Otherwise -/// return {nullptr, Unknown}. -WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { - if (!SE->isSCEVable(DU.NarrowUse->getType())) - return {nullptr, Unknown}; - - const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); - if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= - SE->getTypeSizeInBits(WideType)) { - // NarrowUse implicitly widens its operand. e.g. a gep with a narrow - // index. So don't follow this use. - return {nullptr, Unknown}; - } - - const SCEV *WideExpr; - ExtendKind ExtKind; - if (DU.NeverNegative) { - WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - if (isa(WideExpr)) - ExtKind = SignExtended; - else { - WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; - } - } else if (getExtendKind(DU.NarrowDef) == SignExtended) { - WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - ExtKind = SignExtended; - } else { - WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; - } - const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr); - if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; - return {AddRec, ExtKind}; -} - -/// This IV user cannot be widened. Replace this use of the original narrow IV -/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { - auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); - if (!InsertPt) - return; - LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " - << *DU.NarrowUse << "\n"); - IRBuilder<> Builder(InsertPt); - Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); -} - -/// If the narrow use is a compare instruction, then widen the compare -// (and possibly the other operand). The extend operation is hoisted into the -// loop preheader as far as possible. -bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { - ICmpInst *Cmp = dyn_cast(DU.NarrowUse); - if (!Cmp) - return false; - - // We can legally widen the comparison in the following two cases: - // - // - The signedness of the IV extension and comparison match - // - // - The narrow IV is always positive (and thus its sign extension is equal - // to its zero extension). For instance, let's say we're zero extending - // %narrow for the following use - // - // icmp slt i32 %narrow, %val ... (A) - // - // and %narrow is always positive. Then - // - // (A) == icmp slt i32 sext(%narrow), sext(%val) - // == icmp slt i32 zext(%narrow), sext(%val) - bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; - if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) - return false; - - Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); - unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); - unsigned IVWidth = SE->getTypeSizeInBits(WideType); - assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); - - // Widen the compare instruction. - auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); - if (!InsertPt) - return false; - IRBuilder<> Builder(InsertPt); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); - - // Widen the other operand of the compare, if necessary. - if (CastWidth < IVWidth) { - Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); - DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); - } - return true; -} - -// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this -// will not work when: -// 1) SCEV traces back to an instruction inside the loop that SCEV can not -// expand, eg. add %indvar, (load %addr) -// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant -// While SCEV fails to avoid trunc, we can still try to use instruction -// combining approach to prove trunc is not required. This can be further -// extended with other instruction combining checks, but for now we handle the -// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") -// -// Src: -// %c = sub nsw %b, %indvar -// %d = sext %c to i64 -// Dst: -// %indvar.ext1 = sext %indvar to i64 -// %m = sext %b to i64 -// %d = sub nsw i64 %m, %indvar.ext1 -// Therefore, as long as the result of add/sub/mul is extended to wide type, no -// trunc is required regardless of how %b is generated. This pattern is common -// when calculating address in 64 bit architecture -bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) { - Instruction *NarrowUse = DU.NarrowUse; - Instruction *NarrowDef = DU.NarrowDef; - Instruction *WideDef = DU.WideDef; - - // Handle the common case of add - const unsigned OpCode = NarrowUse->getOpcode(); - // Only Add/Sub/Mul instructions are supported. - if (OpCode != Instruction::Add && OpCode != Instruction::Sub && - OpCode != Instruction::Mul) - return false; - - // The operand that is not defined by NarrowDef of DU. Let's call it the - // other operand. - assert((NarrowUse->getOperand(0) == NarrowDef || - NarrowUse->getOperand(1) == NarrowDef) && - "bad DU"); - - const OverflowingBinaryOperator *OBO = - cast(NarrowUse); - ExtendKind ExtKind = getExtendKind(NarrowDef); - bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); - bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); - if (!CanSignExtend && !CanZeroExtend) - return false; - - // Verifying that Defining operand is an AddRec - const SCEV *Op1 = SE->getSCEV(WideDef); - const SCEVAddRecExpr *AddRecOp1 = dyn_cast(Op1); - if (!AddRecOp1 || AddRecOp1->getLoop() != L) - return false; - - if (ExtKind == SignExtended) { - for (Use &U : NarrowUse->uses()) { - SExtInst *User = dyn_cast(U.getUser()); - if (!User || User->getType() != WideType) - return false; - } - } else { // ExtKind == ZeroExtended - for (Use &U : NarrowUse->uses()) { - ZExtInst *User = dyn_cast(U.getUser()); - if (!User || User->getType() != WideType) - return false; - } - } - - LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); - - // Generating a widening use instruction. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - ExtKind, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - ExtKind, NarrowUse); - - auto *NarrowBO = cast(NarrowUse); - auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, - NarrowBO->getName()); - IRBuilder<> Builder(NarrowUse); - Builder.Insert(WideBO); - WideBO->copyIRFlags(NarrowBO); - ExtendKindMap[NarrowUse] = ExtKind; - - for (Use &U : NarrowUse->uses()) { - Instruction *User = nullptr; - if (ExtKind == SignExtended) - User = cast(U.getUser()); - else - User = cast(U.getUser()); - assert(User->getType() == WideType && "Checked before!"); - LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " - << *WideBO << "\n"); - ++NumElimExt; - User->replaceAllUsesWith(WideBO); - DeadInsts.emplace_back(User); - } - return true; -} - -/// Determine whether an individual user of the narrow IV can be widened. If so, -/// return the wide clone of the user. -Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { - assert(ExtendKindMap.count(DU.NarrowDef) && - "Should already know the kind of extension used to widen NarrowDef"); - - // Stop traversing the def-use chain at inner-loop phis or post-loop phis. - if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) { - if (LI->getLoopFor(UsePhi->getParent()) != L) { - // For LCSSA phis, sink the truncate outside the loop. - // After SimplifyCFG most loop exit targets have a single predecessor. - // Otherwise fall back to a truncate within the loop. - if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT, LI); - else { - // Widening the PHI requires us to insert a trunc. The logical place - // for this trunc is in the same BB as the PHI. This is not possible if - // the BB is terminated by a catchswitch. - if (isa(UsePhi->getParent()->getTerminator())) - return nullptr; - - PHINode *WidePhi = - PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", - UsePhi); - WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); - IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); - Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); - UsePhi->replaceAllUsesWith(Trunc); - DeadInsts.emplace_back(UsePhi); - LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " - << *WidePhi << "\n"); - } - return nullptr; - } - } - - // This narrow use can be widened by a sext if it's non-negative or its narrow - // def was widended by a sext. Same for zext. - auto canWidenBySExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; - }; - auto canWidenByZExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; - }; - - // Our raison d'etre! Eliminate sign and zero extension. - if ((isa(DU.NarrowUse) && canWidenBySExt()) || - (isa(DU.NarrowUse) && canWidenByZExt())) { - Value *NewDef = DU.WideDef; - if (DU.NarrowUse->getType() != WideType) { - unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); - unsigned IVWidth = SE->getTypeSizeInBits(WideType); - if (CastWidth < IVWidth) { - // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(DU.NarrowUse); - NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); - } - else { - // A wider extend was hidden behind a narrower one. This may induce - // another round of IV widening in which the intermediate IV becomes - // dead. It should be very rare. - LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi - << " not wide enough to subsume " << *DU.NarrowUse - << "\n"); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); - NewDef = DU.NarrowUse; - } - } - if (NewDef != DU.NarrowUse) { - LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse - << " replaced by " << *DU.WideDef << "\n"); - ++NumElimExt; - DU.NarrowUse->replaceAllUsesWith(NewDef); - DeadInsts.emplace_back(DU.NarrowUse); - } - // Now that the extend is gone, we want to expose it's uses for potential - // further simplification. We don't need to directly inform SimplifyIVUsers - // of the new users, because their parent IV will be processed later as a - // new loop phi. If we preserved IVUsers analysis, we would also want to - // push the uses of WideDef here. - - // No further widening is needed. The deceased [sz]ext had done it for us. - return nullptr; - } - - // Does this user itself evaluate to a recurrence after widening? - WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); - if (!WideAddRec.first) - WideAddRec = getWideRecurrence(DU); - - assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); - if (!WideAddRec.first) { - // If use is a loop condition, try to promote the condition instead of - // truncating the IV first. - if (widenLoopCompare(DU)) - return nullptr; - - // We are here about to generate a truncate instruction that may hurt - // performance because the scalar evolution expression computed earlier - // in WideAddRec.first does not indicate a polynomial induction expression. - // In that case, look at the operands of the use instruction to determine - // if we can still widen the use instead of truncating its operand. - if (widenWithVariantUse(DU)) - return nullptr; - - // This user does not evaluate to a recurrence after widening, so don't - // follow it. Instead insert a Trunc to kill off the original use, - // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT, LI); - return nullptr; - } - // Assume block terminators cannot evaluate to a recurrence. We can't to - // insert a Trunc after a terminator if there happens to be a critical edge. - assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && - "SCEV is not expected to evaluate a block terminator"); - - // Reuse the IV increment that SCEVExpander created as long as it dominates - // NarrowUse. - Instruction *WideUse = nullptr; - if (WideAddRec.first == WideIncExpr && - Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) - WideUse = WideInc; - else { - WideUse = cloneIVUser(DU, WideAddRec.first); - if (!WideUse) - return nullptr; - } - // Evaluation of WideAddRec ensured that the narrow expression could be - // extended outside the loop without overflow. This suggests that the wide use - // evaluates to the same expression as the extended narrow use, but doesn't - // absolutely guarantee it. Hence the following failsafe check. In rare cases - // where it fails, we simply throw away the newly created wide use. - if (WideAddRec.first != SE->getSCEV(WideUse)) { - LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " - << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first - << "\n"); - DeadInsts.emplace_back(WideUse); - return nullptr; - } - - // if we reached this point then we are going to replace - // DU.NarrowUse with WideUse. Reattach DbgValue then. - replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); - - ExtendKindMap[DU.NarrowUse] = WideAddRec.second; - // Returning WideUse pushes it on the worklist. - return WideUse; -} - -/// Add eligible users of NarrowDef to NarrowIVUsers. -void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { - const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); - bool NonNegativeDef = - SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, - SE->getZero(NarrowSCEV->getType())); - for (User *U : NarrowDef->users()) { - Instruction *NarrowUser = cast(U); - - // Handle data flow merges and bizarre phi cycles. - if (!Widened.insert(NarrowUser).second) - continue; - - bool NonNegativeUse = false; - if (!NonNegativeDef) { - // We might have a control-dependent range information for this context. - if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) - NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); - } - - NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, - NonNegativeDef || NonNegativeUse); - } -} - -/// Process a single induction variable. First use the SCEVExpander to create a -/// wide induction variable that evaluates to the same recurrence as the -/// original narrow IV. Then use a worklist to forward traverse the narrow IV's -/// def-use chain. After widenIVUse has processed all interesting IV users, the -/// narrow IV will be isolated for removal by DeleteDeadPHIs. -/// -/// It would be simpler to delete uses as they are processed, but we must avoid -/// invalidating SCEV expressions. -PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { - // Bail if we disallowed widening. - if(!AllowIVWidening) - return nullptr; - - // Is this phi an induction variable? - const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); - if (!AddRec) - return nullptr; - - // Widen the induction variable expression. - const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended - ? SE->getSignExtendExpr(AddRec, WideType) - : SE->getZeroExtendExpr(AddRec, WideType); - - assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && - "Expect the new IV expression to preserve its type"); - - // Can the IV be extended outside the loop without overflow? - AddRec = dyn_cast(WideIVExpr); - if (!AddRec || AddRec->getLoop() != L) - return nullptr; - - // An AddRec must have loop-invariant operands. Since this AddRec is - // materialized by a loop header phi, the expression cannot have any post-loop - // operands, so they must dominate the loop header. - assert( - SE->properlyDominates(AddRec->getStart(), L->getHeader()) && - SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && - "Loop header phi recurrence inputs do not dominate the loop"); - - // Iterate over IV uses (including transitive ones) looking for IV increments - // of the form 'add nsw %iv, '. For each increment and each use of - // the increment calculate control-dependent range information basing on - // dominating conditions inside of the loop (e.g. a range check inside of the - // loop). Calculated ranges are stored in PostIncRangeInfos map. - // - // Control-dependent range information is later used to prove that a narrow - // definition is not negative (see pushNarrowIVUsers). It's difficult to do - // this on demand because when pushNarrowIVUsers needs this information some - // of the dominating conditions might be already widened. - if (UsePostIncrementRanges) - calculatePostIncRanges(OrigPhi); - - // The rewriter provides a value for the desired IV expression. This may - // either find an existing phi or materialize a new one. Either way, we - // expect a well-formed cyclic phi-with-increments. i.e. any operand not part - // of the phi-SCC dominates the loop entry. - Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); - Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); - // If the wide phi is not a phi node, for example a cast node, like bitcast, - // inttoptr, ptrtoint, just skip for now. - if (!(WidePhi = dyn_cast(ExpandInst))) { - // if the cast node is an inserted instruction without any user, we should - // remove it to make sure the pass don't touch the function as we can not - // wide the phi. - if (ExpandInst->hasNUses(0) && - Rewriter.isInsertedInstruction(cast(ExpandInst))) - DeadInsts.emplace_back(ExpandInst); - return nullptr; - } - - // Remembering the WideIV increment generated by SCEVExpander allows - // widenIVUse to reuse it when widening the narrow IV's increment. We don't - // employ a general reuse mechanism because the call above is the only call to - // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - WideInc = - cast(WidePhi->getIncomingValueForBlock(LatchBlock)); - WideIncExpr = SE->getSCEV(WideInc); - // Propagate the debug location associated with the original loop increment - // to the new (widened) increment. - auto *OrigInc = - cast(OrigPhi->getIncomingValueForBlock(LatchBlock)); - WideInc->setDebugLoc(OrigInc->getDebugLoc()); - } - - LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); - ++NumWidened; - - // Traverse the def-use chain using a worklist starting at the original IV. - assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); - - Widened.insert(OrigPhi); - pushNarrowIVUsers(OrigPhi, WidePhi); - - while (!NarrowIVUsers.empty()) { - NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); - - // Process a def-use edge. This may replace the use, so don't hold a - // use_iterator across it. - Instruction *WideUse = widenIVUse(DU, Rewriter); - - // Follow all def-use edges from the previous narrow use. - if (WideUse) - pushNarrowIVUsers(DU.NarrowUse, WideUse); - - // widenIVUse may have removed the def-use edge. - if (DU.NarrowDef->use_empty()) - DeadInsts.emplace_back(DU.NarrowDef); - } - - // Attach any debug information to the new PHI. - replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); - - return WidePhi; -} - -/// Calculates control-dependent range for the given def at the given context -/// by looking at dominating conditions inside of the loop -void WidenIV::calculatePostIncRange(Instruction *NarrowDef, - Instruction *NarrowUser) { - using namespace llvm::PatternMatch; - - Value *NarrowDefLHS; - const APInt *NarrowDefRHS; - if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), - m_APInt(NarrowDefRHS))) || - !NarrowDefRHS->isNonNegative()) - return; - - auto UpdateRangeFromCondition = [&] (Value *Condition, - bool TrueDest) { - CmpInst::Predicate Pred; - Value *CmpRHS; - if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), - m_Value(CmpRHS)))) - return; - - CmpInst::Predicate P = - TrueDest ? Pred : CmpInst::getInversePredicate(Pred); - - auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); - auto CmpConstrainedLHSRange = - ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); - auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( - *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); - - updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); - }; - - auto UpdateRangeFromGuards = [&](Instruction *Ctx) { - if (!HasGuards) - return; - - for (Instruction &I : make_range(Ctx->getIterator().getReverse(), - Ctx->getParent()->rend())) { - Value *C = nullptr; - if (match(&I, m_Intrinsic(m_Value(C)))) - UpdateRangeFromCondition(C, /*TrueDest=*/true); - } - }; - - UpdateRangeFromGuards(NarrowUser); - - BasicBlock *NarrowUserBB = NarrowUser->getParent(); - // If NarrowUserBB is statically unreachable asking dominator queries may - // yield surprising results. (e.g. the block may not have a dom tree node) - if (!DT->isReachableFromEntry(NarrowUserBB)) - return; - - for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); - L->contains(DTB->getBlock()); - DTB = DTB->getIDom()) { - auto *BB = DTB->getBlock(); - auto *TI = BB->getTerminator(); - UpdateRangeFromGuards(TI); - - auto *BI = dyn_cast(TI); - if (!BI || !BI->isConditional()) - continue; - - auto *TrueSuccessor = BI->getSuccessor(0); - auto *FalseSuccessor = BI->getSuccessor(1); - - auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { - return BBE.isSingleEdge() && - DT->dominates(BBE, NarrowUser->getParent()); - }; - - if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) - UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); - - if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) - UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); - } -} - -/// Calculates PostIncRangeInfos map for the given IV -void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { - SmallPtrSet Visited; - SmallVector Worklist; - Worklist.push_back(OrigPhi); - Visited.insert(OrigPhi); - - while (!Worklist.empty()) { - Instruction *NarrowDef = Worklist.pop_back_val(); - - for (Use &U : NarrowDef->uses()) { - auto *NarrowUser = cast(U.getUser()); - - // Don't go looking outside the current loop. - auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; - if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) - continue; - - if (!Visited.insert(NarrowUser).second) - continue; - - Worklist.push_back(NarrowUser); - - calculatePostIncRange(NarrowDef, NarrowUser); - } - } -} - //===----------------------------------------------------------------------===// // Live IV Reduction - Minimize IVs live across the loop. //===----------------------------------------------------------------------===// @@ -1596,7 +569,7 @@ PHINode *IVPhi; public: - WideIVInfo WI; + WidenIV::WideIVInfo WI; IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, const TargetTransformInfo *TTI, @@ -1620,7 +593,7 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI) { - SmallVector WideIVs; + SmallVector WideIVs; auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( Intrinsic::getName(Intrinsic::experimental_guard)); @@ -1656,9 +629,16 @@ } } while(!LoopPhis.empty()); + // Continue if we disallowed widening. + if (!AllowIVWidening) + continue; + for (; !WideIVs.empty(); WideIVs.pop_back()) { - WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); + WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards, + UsePostIncrementRanges); if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { + NumElimExt += Widener.getNumElimExt(); + NumWidened += Widener.getNumWidened(); Changed = true; LoopPhis.push_back(WidePhi); } Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1709,3 +1709,899 @@ FirstInst = GetFirstInst(FirstInst, Check, Loc); return std::make_pair(FirstInst, Check); } + +//===----------------------------------------------------------------------===// +// Widen Induction Variables - Extend the width of an IV to cover its +// widest uses. +//===----------------------------------------------------------------------===// + +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. A nullptr can be returned if no +/// viable location is found: it may happen if User is a PHI and Def only comes +/// to this PHI from unreachable blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT, LoopInfo *LI) { + PHINode *PHI = dyn_cast(User); + if (!PHI) + return User; + + Instruction *InsertPt = nullptr; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + + if (!DT->isReachableFromEntry(InsertBB)) + continue; + + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + + // If we have skipped all inputs, it means that Def only comes to Phi from + // unreachable blocks. + if (!InsertPt) + return nullptr; + + auto *DefI = dyn_cast(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); +} + +WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl &DI, + bool HasGuards, bool UsePostIncrementRanges) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), + DeadInsts(DI) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; +} + +Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, + bool IsSigned, Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} + +/// Instantiate a wide operation to replace a narrow operation. This only needs +/// to handle operations that can evaluation to SCEVAddRec. It can safely return +/// 0 for any operation we decide not to clone. +Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + unsigned Opcode = DU.NarrowUse->getOpcode(); + switch (Opcode) { + default: + return nullptr; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + return cloneArithmeticIVUser(DU, WideAR); + + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return cloneBitwiseIVUser(DU); + } +} + +Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything + // about the narrow operand yet so must insert a [sz]ext. It is probably loop + // invariant and will be folded or hoisted. If it actually comes from a + // widened IV, it should be removed during a future call to widenIVUse. + bool IsSigned = getExtendKind(NarrowDef) == SignExtended; + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + IsSigned, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + IsSigned, NarrowUse); + + auto *NarrowBO = cast(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; + + // We're trying to find X such that + // + // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X + // + // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), + // and check using SCEV if any of them are correct. + + // Returns true if extending NonIVNarrowDef according to `SignExt` is a + // correct solution to X. + auto GuessNonIVOperand = [&](bool SignExt) { + const SCEV *WideLHS; + const SCEV *WideRHS; + + auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { + if (SignExt) + return SE->getSignExtendExpr(S, Ty); + return SE->getZeroExtendExpr(S, Ty); + }; + + if (IVOpIdx == 0) { + WideLHS = SE->getSCEV(WideDef); + const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); + WideRHS = GetExtend(NarrowRHS, WideType); + } else { + const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); + WideLHS = GetExtend(NarrowLHS, WideType); + WideRHS = SE->getSCEV(WideDef); + } + + // WideUse is "WideDef `op.wide` X" as described in the comment. + const SCEV *WideUse = nullptr; + + switch (NarrowUse->getOpcode()) { + default: + llvm_unreachable("No other possibility!"); + + case Instruction::Add: + WideUse = SE->getAddExpr(WideLHS, WideRHS); + break; + + case Instruction::Mul: + WideUse = SE->getMulExpr(WideLHS, WideRHS); + break; + + case Instruction::UDiv: + WideUse = SE->getUDivExpr(WideLHS, WideRHS); + break; + + case Instruction::Sub: + WideUse = SE->getMinusSCEV(WideLHS, WideRHS); + break; + } + + return WideUse == WideAR; + }; + + bool SignExtend = getExtendKind(NarrowDef) == SignExtended; + if (!GuessNonIVOperand(SignExtend)) { + SignExtend = !SignExtend; + if (!GuessNonIVOperand(SignExtend)) + return nullptr; + } + + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + SignExtend, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + SignExtend, NarrowUse); + + auto *NarrowBO = cast(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { + auto It = ExtendKindMap.find(I); + assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); + return It->second; +} + +const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const { + if (OpCode == Instruction::Add) + return SE->getAddExpr(LHS, RHS); + if (OpCode == Instruction::Sub) + return SE->getMinusSCEV(LHS, RHS); + if (OpCode == Instruction::Mul) + return SE->getMulExpr(LHS, RHS); + + llvm_unreachable("Unsupported opcode."); +} + +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return the AddRec and the kind of +/// extension used. +WidenIV::WidenedRecTy +WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { + // Handle the common case of add + const unsigned OpCode = DU.NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions supported yet. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return {nullptr, Unknown}; + + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + const unsigned ExtendOperIdx = + DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = nullptr; + const OverflowingBinaryOperator *OBO = + cast(DU.NarrowUse); + ExtendKind ExtKind = getExtendKind(DU.NarrowDef); + if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return {nullptr, Unknown}; + + // When creating this SCEV expr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. + const SCEV *lhs = SE->getSCEV(DU.WideDef); + const SCEV *rhs = ExtendOperExpr; + + // Let's swap operands to the initial order for the case of non-commutative + // operations, like SUB. See PR21014. + if (ExtendOperIdx == 0) + std::swap(lhs, rhs); + const SCEVAddRecExpr *AddRec = + dyn_cast(getSCEVByOpCode(lhs, rhs, OpCode)); + + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, Unknown}; + + return {AddRec, ExtKind}; +} + +/// Is this instruction potentially interesting for further simplification after +/// widening it's type? In other words, can the extend be safely hoisted out of +/// the loop with SCEV reducing the value to a recurrence on the same loop. If +/// so, return the extended recurrence and the kind of extension used. Otherwise +/// return {nullptr, Unknown}. +WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { + if (!SE->isSCEVable(DU.NarrowUse->getType())) + return {nullptr, Unknown}; + + const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= + SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return {nullptr, Unknown}; + } + + const SCEV *WideExpr; + ExtendKind ExtKind; + if (DU.NeverNegative) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + if (isa(WideExpr)) + ExtKind = SignExtended; + else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + } else if (getExtendKind(DU.NarrowDef) == SignExtended) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + ExtKind = SignExtended; + } else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, Unknown}; + return {AddRec, ExtKind}; +} + +/// This IV user cannot be widened. Replace this use of the original narrow IV +/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. +static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, + LoopInfo *LI) { + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return; + LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " + << *DU.NarrowUse << "\n"); + IRBuilder<> Builder(InsertPt); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); +} + +/// If the narrow use is a compare instruction, then widen the compare +// (and possibly the other operand). The extend operation is hoisted into the +// loop preheader as far as possible. +bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { + ICmpInst *Cmp = dyn_cast(DU.NarrowUse); + if (!Cmp) + return false; + + // We can legally widen the comparison in the following two cases: + // + // - The signedness of the IV extension and comparison match + // + // - The narrow IV is always positive (and thus its sign extension is equal + // to its zero extension). For instance, let's say we're zero extending + // %narrow for the following use + // + // icmp slt i32 %narrow, %val ... (A) + // + // and %narrow is always positive. Then + // + // (A) == icmp slt i32 sext(%narrow), sext(%val) + // == icmp slt i32 zext(%narrow), sext(%val) + bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; + if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) + return false; + + Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); + unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); + + // Widen the compare instruction. + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return false; + IRBuilder<> Builder(InsertPt); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + + // Widen the other operand of the compare, if necessary. + if (CastWidth < IVWidth) { + Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); + DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); + } + return true; +} + +// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this +// will not work when: +// 1) SCEV traces back to an instruction inside the loop that SCEV can not +// expand, eg. add %indvar, (load %addr) +// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant +// While SCEV fails to avoid trunc, we can still try to use instruction +// combining approach to prove trunc is not required. This can be further +// extended with other instruction combining checks, but for now we handle the +// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") +// +// Src: +// %c = sub nsw %b, %indvar +// %d = sext %c to i64 +// Dst: +// %indvar.ext1 = sext %indvar to i64 +// %m = sext %b to i64 +// %d = sub nsw i64 %m, %indvar.ext1 +// Therefore, as long as the result of add/sub/mul is extended to wide type, no +// trunc is required regardless of how %b is generated. This pattern is common +// when calculating address in 64 bit architecture +bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + // Handle the common case of add + const unsigned OpCode = NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions are supported. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return false; + + // The operand that is not defined by NarrowDef of DU. Let's call it the + // other operand. + assert((NarrowUse->getOperand(0) == NarrowDef || + NarrowUse->getOperand(1) == NarrowDef) && + "bad DU"); + + const OverflowingBinaryOperator *OBO = + cast(NarrowUse); + ExtendKind ExtKind = getExtendKind(NarrowDef); + bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); + if (!CanSignExtend && !CanZeroExtend) + return false; + + // Verifying that Defining operand is an AddRec + const SCEV *Op1 = SE->getSCEV(WideDef); + const SCEVAddRecExpr *AddRecOp1 = dyn_cast(Op1); + if (!AddRecOp1 || AddRecOp1->getLoop() != L) + return false; + + if (ExtKind == SignExtended) { + for (Use &U : NarrowUse->uses()) { + SExtInst *User = dyn_cast(U.getUser()); + if (!User || User->getType() != WideType) + return false; + } + } else { // ExtKind == ZeroExtended + for (Use &U : NarrowUse->uses()) { + ZExtInst *User = dyn_cast(U.getUser()); + if (!User || User->getType() != WideType) + return false; + } + } + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + // Generating a widening use instruction. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + ExtKind, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + ExtKind, NarrowUse); + + auto *NarrowBO = cast(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + ExtendKindMap[NarrowUse] = ExtKind; + + for (Use &U : NarrowUse->uses()) { + Instruction *User = nullptr; + if (ExtKind == SignExtended) + User = cast(U.getUser()); + else + User = cast(U.getUser()); + assert(User->getType() == WideType && "Checked before!"); + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " + << *WideBO << "\n"); + ++NumElimExt; + User->replaceAllUsesWith(WideBO); + DeadInsts.emplace_back(User); + } + return true; +} + +/// Determine whether an individual user of the narrow IV can be widened. If so, +/// return the wide clone of the user. +Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { + assert(ExtendKindMap.count(DU.NarrowDef) && + "Should already know the kind of extension used to widen NarrowDef"); + + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) { + if (LI->getLoopFor(UsePhi->getParent()) != L) { + // For LCSSA phis, sink the truncate outside the loop. + // After SimplifyCFG most loop exit targets have a single predecessor. + // Otherwise fall back to a truncate within the loop. + if (UsePhi->getNumOperands() != 1) + truncateIVUse(DU, DT, LI); + else { + // Widening the PHI requires us to insert a trunc. The logical place + // for this trunc is in the same BB as the PHI. This is not possible if + // the BB is terminated by a catchswitch. + if (isa(UsePhi->getParent()->getTerminator())) + return nullptr; + + PHINode *WidePhi = + PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", + UsePhi); + WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); + IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); + Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); + UsePhi->replaceAllUsesWith(Trunc); + DeadInsts.emplace_back(UsePhi); + LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " + << *WidePhi << "\n"); + } + return nullptr; + } + } + + // This narrow use can be widened by a sext if it's non-negative or its narrow + // def was widended by a sext. Same for zext. + auto canWidenBySExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; + }; + auto canWidenByZExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; + }; + + // Our raison d'etre! Eliminate sign and zero extension. + if ((isa(DU.NarrowUse) && canWidenBySExt()) || + (isa(DU.NarrowUse) && canWidenByZExt())) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *DU.NarrowUse + << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; + } + } + if (NewDef != DU.NarrowUse) { + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); + ++NumElimExt; + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.emplace_back(DU.NarrowUse); + } + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. + + // No further widening is needed. The deceased [sz]ext had done it for us. + return nullptr; + } + + // Does this user itself evaluate to a recurrence after widening? + WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); + if (!WideAddRec.first) + WideAddRec = getWideRecurrence(DU); + + assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); + if (!WideAddRec.first) { + // If use is a loop condition, try to promote the condition instead of + // truncating the IV first. + if (widenLoopCompare(DU)) + return nullptr; + + // We are here about to generate a truncate instruction that may hurt + // performance because the scalar evolution expression computed earlier + // in WideAddRec.first does not indicate a polynomial induction expression. + // In that case, look at the operands of the use instruction to determine + // if we can still widen the use instead of truncating its operand. + if (widenWithVariantUse(DU)) + return nullptr; + + // This user does not evaluate to a recurrence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + truncateIVUse(DU, DT, LI); + return nullptr; + } + // Assume block terminators cannot evaluate to a recurrence. We can't to + // insert a Trunc after a terminator if there happens to be a critical edge. + assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && + "SCEV is not expected to evaluate a block terminator"); + + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = nullptr; + if (WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) + WideUse = WideInc; + else { + WideUse = cloneIVUser(DU, WideAddRec.first); + if (!WideUse) + return nullptr; + } + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simply throw away the newly created wide use. + if (WideAddRec.first != SE->getSCEV(WideUse)) { + LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " + << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first + << "\n"); + DeadInsts.emplace_back(WideUse); + return nullptr; + } + + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// Add eligible users of NarrowDef to NarrowIVUsers. +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); + bool NonNegativeDef = + SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, + SE->getZero(NarrowSCEV->getType())); + for (User *U : NarrowDef->users()) { + Instruction *NarrowUser = cast(U); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(NarrowUser).second) + continue; + + bool NonNegativeUse = false; + if (!NonNegativeDef) { + // We might have a control-dependent range information for this context. + if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) + NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); + } + + NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, + NonNegativeDef || NonNegativeUse); + } +} + +/// Process a single induction variable. First use the SCEVExpander to create a +/// wide induction variable that evaluates to the same recurrence as the +/// original narrow IV. Then use a worklist to forward traverse the narrow IV's +/// def-use chain. After widenIVUse has processed all interesting IV users, the +/// narrow IV will be isolated for removal by DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); + if (!AddRec) + return nullptr; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended + ? SE->getSignExtendExpr(AddRec, WideType) + : SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return nullptr; + + // An AddRec must have loop-invariant operands. Since this AddRec is + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert( + SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && + "Loop header phi recurrence inputs do not dominate the loop"); + + // Iterate over IV uses (including transitive ones) looking for IV increments + // of the form 'add nsw %iv, '. For each increment and each use of + // the increment calculate control-dependent range information basing on + // dominating conditions inside of the loop (e.g. a range check inside of the + // loop). Calculated ranges are stored in PostIncRangeInfos map. + // + // Control-dependent range information is later used to prove that a narrow + // definition is not negative (see pushNarrowIVUsers). It's difficult to do + // this on demand because when pushNarrowIVUsers needs this information some + // of the dominating conditions might be already widened. + if (UsePostIncrementRanges) + calculatePostIncRanges(OrigPhi); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); + Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); + // If the wide phi is not a phi node, for example a cast node, like bitcast, + // inttoptr, ptrtoint, just skip for now. + if (!(WidePhi = dyn_cast(ExpandInst))) { + // if the cast node is an inserted instruction without any user, we should + // remove it to make sure the pass don't touch the function as we can not + // wide the phi. + if (ExpandInst->hasNUses(0) && + Rewriter.isInsertedInstruction(cast(ExpandInst))) + DeadInsts.emplace_back(ExpandInst); + return nullptr; + } + + // Remembering the WideIV increment generated by SCEVExpander allows + // widenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + // Propagate the debug location associated with the original loop increment + // to the new (widened) increment. + auto *OrigInc = + cast(OrigPhi->getIncomingValueForBlock(LatchBlock)); + WideInc->setDebugLoc(OrigInc->getDebugLoc()); + } + + LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); + + while (!NarrowIVUsers.empty()) { + WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *WideUse = widenIVUse(DU, Rewriter); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) + pushNarrowIVUsers(DU.NarrowUse, WideUse); + + // widenIVUse may have removed the def-use edge. + if (DU.NarrowDef->use_empty()) + DeadInsts.emplace_back(DU.NarrowDef); + } + + // Attach any debug information to the new PHI. + replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); + + return WidePhi; +} + +/// Calculates control-dependent range for the given def at the given context +/// by looking at dominating conditions inside of the loop +void WidenIV::calculatePostIncRange(Instruction *NarrowDef, + Instruction *NarrowUser) { + using namespace llvm::PatternMatch; + + Value *NarrowDefLHS; + const APInt *NarrowDefRHS; + if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), + m_APInt(NarrowDefRHS))) || + !NarrowDefRHS->isNonNegative()) + return; + + auto UpdateRangeFromCondition = [&] (Value *Condition, + bool TrueDest) { + CmpInst::Predicate Pred; + Value *CmpRHS; + if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), + m_Value(CmpRHS)))) + return; + + CmpInst::Predicate P = + TrueDest ? Pred : CmpInst::getInversePredicate(Pred); + + auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); + auto CmpConstrainedLHSRange = + ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); + + updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); + }; + + auto UpdateRangeFromGuards = [&](Instruction *Ctx) { + if (!HasGuards) + return; + + for (Instruction &I : make_range(Ctx->getIterator().getReverse(), + Ctx->getParent()->rend())) { + Value *C = nullptr; + if (match(&I, m_Intrinsic(m_Value(C)))) + UpdateRangeFromCondition(C, /*TrueDest=*/true); + } + }; + + UpdateRangeFromGuards(NarrowUser); + + BasicBlock *NarrowUserBB = NarrowUser->getParent(); + // If NarrowUserBB is statically unreachable asking dominator queries may + // yield surprising results. (e.g. the block may not have a dom tree node) + if (!DT->isReachableFromEntry(NarrowUserBB)) + return; + + for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); + L->contains(DTB->getBlock()); + DTB = DTB->getIDom()) { + auto *BB = DTB->getBlock(); + auto *TI = BB->getTerminator(); + UpdateRangeFromGuards(TI); + + auto *BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + continue; + + auto *TrueSuccessor = BI->getSuccessor(0); + auto *FalseSuccessor = BI->getSuccessor(1); + + auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { + return BBE.isSingleEdge() && + DT->dominates(BBE, NarrowUser->getParent()); + }; + + if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); + + if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); + } +} + +/// Calculates PostIncRangeInfos map for the given IV +void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(OrigPhi); + Visited.insert(OrigPhi); + + while (!Worklist.empty()) { + Instruction *NarrowDef = Worklist.pop_back_val(); + + for (Use &U : NarrowDef->uses()) { + auto *NarrowUser = cast(U.getUser()); + + // Don't go looking outside the current loop. + auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; + if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) + continue; + + if (!Visited.insert(NarrowUser).second) + continue; + + Worklist.push_back(NarrowUser); + + calculatePostIncRange(NarrowDef, NarrowUser); + } + } +}