Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1884,6 +1884,11 @@ /// The printed text is indented by \p Depth. void print(raw_ostream &OS, unsigned Depth) const; + /// Check if \p AR1 and \p AR2 are equal, while taking into account + /// Equal predicates in Preds. + bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, + const SCEVAddRecExpr *AR2) const; + private: /// Increments the version number of the predicate. This needs to be called /// every time the SCEV predicate changes. Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -306,10 +306,13 @@ /// induction, the induction descriptor \p D will contain the data describing /// this induction. If by some other means the caller has a better SCEV /// expression for \p Phi than the one returned by the ScalarEvolution - /// analysis, it can be passed through \p Expr. - static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr = nullptr); + /// analysis, it can be passed through \p Expr. If the def-use chain + /// associated with the phi includes casts (that we know we can ignore + /// under proper runtime checks), they are passed through \p CastsToIgnore. + static bool + isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr = nullptr, + SmallVectorImpl *CastsToIgnore = nullptr); /// Returns true if \p Phi is a floating point induction in the loop \p L. /// If \p Phi is an induction, the induction descriptor \p D will contain @@ -348,10 +351,18 @@ Instruction::BinaryOpsEnd; } + /// Returns a reference to the type cast instructions in the induction + /// update chain, that are redundant when guarded with a runtime + /// SCEV overflow check. + const SmallVectorImpl &getCastInsts() const { + return RedundantCasts; + } + private: /// Private constructor - used by \c isInductionPHI. InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, - BinaryOperator *InductionBinOp = nullptr); + BinaryOperator *InductionBinOp = nullptr, + SmallVectorImpl *Casts = nullptr); /// Start value. TrackingVH StartValue; @@ -361,6 +372,9 @@ const SCEV *Step = nullptr; // Instruction that advances induction variable. BinaryOperator *InductionBinOp = nullptr; + // Instructions used for type-casts of the induction variable, + // that are redundant when guarded with a runtime SCEV overflow check. + SmallVector RedundantCasts; }; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -4732,6 +4732,30 @@ return Rewrite; } +// FIXME: This utility is currently required because the Rewriter currently +// does not rewrite this expression: +// {0, +, (sext ix (trunc iy to ix) to iy)} +// into {0, +, %step}, +// even when the following Equal predicate exists: +// "%step == (sext ix (trunc iy to ix) to iy)". +bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( + const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { + if (AR1 == AR2) + return true; + + auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { + if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) && + !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))) + return false; + return true; + }; + + if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || + !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))) + return false; + return true; +} + /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most @@ -4874,33 +4898,33 @@ // indices form a positive value. if (GEP->isInBounds() && GEP->getOperand(0) == PN) { Flags = setFlags(Flags, SCEV::FlagNW); - + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) Flags = setFlags(Flags, SCEV::FlagNUW); } - + // We cannot transfer nuw and nsw flags from subtraction // operations -- sub nuw X, Y is not the same as add nuw X, -Y // for instance. } - + const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - + // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetSymbolicName(PN, SymbolicName); ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - + // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); - + return PHISCEV; } } Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -678,7 +678,8 @@ } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - const SCEV *Step, BinaryOperator *BOp) + const SCEV *Step, BinaryOperator *BOp, + SmallVectorImpl *Casts) : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { assert(IK != IK_NoInduction && "Not an induction"); @@ -705,6 +706,12 @@ (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub))) && "Binary opcode should be specified for FP induction"); + + if (Casts) { + for (auto &Inst : *Casts) { + RedundantCasts.push_back(Inst); + } + } } int InductionDescriptor::getConsecutiveDirection() const { @@ -808,7 +815,7 @@ StartValue = Phi->getIncomingValue(1); } else { assert(TheLoop->contains(Phi->getIncomingBlock(1)) && - "Unexpected Phi node in the loop"); + "Unexpected Phi node in the loop"); BEValue = Phi->getIncomingValue(1); StartValue = Phi->getIncomingValue(0); } @@ -841,6 +848,110 @@ return true; } +/// This function is called when we suspect that the update-chain of a phi node +/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, +/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime +/// predicate P under which the SCEV expression for the phi can be the +/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the +/// cast instructions that are involved in the update-chain of this induction. +/// A caller that adds the required runtime predicate can be free to drop these +/// cast instructions, and compute the phi using \p AR (instead of some scev +/// expression with casts). +/// +/// For example, without a predicate the scev expression can take the following +/// form: +/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) +/// +/// It corresponds to the following IR sequence: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %casted_phi = "ExtTrunc i64 %x" +/// %add = add i64 %casted_phi, %step +/// +/// where %x is given in \p PN, +/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, +/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of +/// several forms, for example, such as: +/// ExtTrunc1: %casted_phi = and %x, 2^n-1 +/// or: +/// ExtTrunc2: %t = shl %x, m +/// %casted_phi = ashr %t, m +/// +/// If we are able to find such sequence, we return the instructions +/// we found, namely %casted_phi and the instructions on its use-def chain up +/// to the phi (not including the phi). +bool getCastsForInductionPHI( + PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, SmallVectorImpl &CastInsts) { + + assert(CastInsts.empty() && "CastInsts is expected to be empty."); + auto *PN = cast(PhiScev->getValue()); + assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); + const Loop *L = AR->getLoop(); + + // Find any cast instructions that participate in the def-use chain of + // PhiScev in the loop. + // FORNOW/TODO: We currently expect the def-use chain to include only + // two-operand instructions, where one of the operands is an invariant. + // createAddRecFromPHIWithCasts() currently does not support anything more + // involved than that, so we keep the search simple. This can be + // extended/generalized as needed. + + auto getDef = [&](const Value *Val) -> Value * { + const BinaryOperator *BinOp = dyn_cast(Val); + if (!BinOp) + return nullptr; + Value *Op0 = BinOp->getOperand(0); + Value *Op1 = BinOp->getOperand(1); + Value *Def = nullptr; + if (L->isLoopInvariant(Op0)) + Def = Op1; + else if (L->isLoopInvariant(Op1)) + Def = Op0; + return Def; + }; + + // Look for the instruction that defines the induction via the + // loop backedge. + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + Value *Val = PN->getIncomingValueForBlock(Latch); + if (!Val) + return false; + + // Follow the def-use chain until the induction phi is reached. + // If on the way we encounter a Value that has the same SCEV Expr as the + // phi node, we can consider the instructions we visit from that point + // as part of the cast-sequence that can be ignored. + bool InCastSequence = false; + auto *Inst = dyn_cast(Val); + while (Val != PN) { + // If we encountered a phi node other than PN, or if we left the loop, + // we bail out. + if (!Inst || !L->contains(Inst)) { + return false; + } + auto *AddRec = dyn_cast(PSE.getSCEV(Val)); + if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) + InCastSequence = true; + if (InCastSequence) { + // Only the last instruction in the cast sequence is expected to have + // uses outside the induction def-use chain. + if (!CastInsts.empty()) + if (!Inst->hasOneUse()) + return false; + CastInsts.push_back(Inst); + } + Val = getDef(Val); + if (!Val) + return false; + Inst = dyn_cast(Val); + } + + return InCastSequence; +} + bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, PredicatedScalarEvolution &PSE, InductionDescriptor &D, @@ -870,13 +981,26 @@ return false; } + // Record any Cast instructions that participate in the induction update + const auto *SymbolicPhi = dyn_cast(PhiScev); + // If we started from an UnknownSCEV, and managed to build an addRecurrence + // only after enabling Assume with PSCEV, this means we may have encountered + // cast instructions that required adding a runtime check in order to + // guarantee the correctness of the AddRecurence respresentation of the + // induction. + if (PhiScev != AR && SymbolicPhi) { + SmallVector Casts; + if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); + } + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); } -bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, - ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr) { +bool InductionDescriptor::isInductionPHI( + PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr, + SmallVectorImpl *CastsToIgnore) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -895,7 +1019,7 @@ // FIXME: We should treat this as a uniform. Unfortunately, we // don't currently know how to handled uniform PHIs. DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); - return false; + return false; } Value *StartValue = @@ -908,7 +1032,8 @@ return false; if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, Step); + D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, + CastsToIgnore); return true; } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -599,6 +599,20 @@ /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; + /// If there is a cast involved in the induction variable \p ID, which should + /// be ignored in the vectorized loop body, this function records the + /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the + /// cast. We had already proved that the casted Phi is equal to the uncasted + /// Phi in the vectorized loop (under a runtime guard), and therefore + /// there is no need to vectorize the cast - the same value can be used in the + /// vector loop for both the Phi and the cast. + /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, + /// Otherwise, \p VectorLoopValue is a widened/vectorized value. + void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID, + Value *VectorLoopValue, + unsigned Part, + unsigned Lane = UINT_MAX); + /// Generate a shuffle sequence that will reverse the vector Vec. virtual Value *reverseVector(Value *Vec); @@ -1570,7 +1584,17 @@ /// Returns the widest induction type. Type *getWidestInductionType() { return WidestIndTy; } - /// Returns True if V is an induction variable in this loop. + /// Returns True if V is a Phi node of an induction variable in this loop. + bool isInductionPhi(const Value *V); + + /// Returns True if V is a cast that is part of an induction def-use chain, + /// and had been proven to be redundant under a runtime guard (in other + /// words, the cast has the same SCEV expression as the induction phi). + bool isCastedInductionVariable(const Value *V); + + /// Returns True if V can be considered as an induction variable in this + /// loop. V can be the induction phi, or some redundant cast in the def-use + /// chain of the inducion phi. bool isInductionVariable(const Value *V); /// Returns True if PN is a reduction variable in this loop. @@ -1781,6 +1805,12 @@ /// variables can be pointers. InductionList Inductions; + /// Holds all the casts that participate in the update chain of the induction + /// variables, and that have been proven to be redundant (possibly under a + /// runtime guard). These casts can be ignored when creating the vectorized + /// loop body. + SmallPtrSet InductionCastsToIgnore; + /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; @@ -2014,7 +2044,7 @@ return false; // If the truncated value is not an induction variable, return false. - return Legal->isInductionVariable(Op); + return Legal->isInductionPhi(Op); } /// Collects the instructions to scalarize for each predicated instruction in @@ -2600,6 +2630,7 @@ Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); + recordVectorLoopValueForInductionCast(II, LastInduction, Part); if (isa(EntryVal)) addMetadata(LastInduction, EntryVal); LastInduction = cast(addFastMathFlag( @@ -2633,6 +2664,22 @@ return llvm::any_of(IV->users(), isScalarInst); } +void InnerLoopVectorizer::recordVectorLoopValueForInductionCast( + const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part, + unsigned Lane) { + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (Casts.empty()) + return; + // Only the first Cast instruction in the Casts vector is of interest. + // The rest of the Casts (if exist) have no uses outside the + // induction update chain itself. + Instruction *CastInst = *Casts.begin(); + if (Lane < UINT_MAX) + VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal); + else + VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal); +} + void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); @@ -2713,6 +2760,7 @@ Value *EntryPart = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); + recordVectorLoopValueForInductionCast(ID, EntryPart, Part); if (Trunc) addMetadata(EntryPart, Trunc); } @@ -2820,6 +2868,7 @@ auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); + recordVectorLoopValueForInductionCast(ID, Add, Part, Lane); } } } @@ -5169,6 +5218,15 @@ PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit) { Inductions[Phi] = ID; + + // In case this induction also comes with casts that we know we can ignore + // in the vectorized loop body, record them here. All casts could be recorded + // here for ignoring, but suffices to record only the first (as it is the + // only one that may bw used outside the cast sequence). + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (!Casts.empty()) + InductionCastsToIgnore.insert(*Casts.begin()); + Type *PhiTy = Phi->getType(); const DataLayout &DL = Phi->getModule()->getDataLayout(); @@ -5798,7 +5856,7 @@ return true; } -bool LoopVectorizationLegality::isInductionVariable(const Value *V) { +bool LoopVectorizationLegality::isInductionPhi(const Value *V) { Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); if (!PN) @@ -5807,6 +5865,15 @@ return Inductions.count(PN); } +bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { + auto *Inst = dyn_cast(V); + return (Inst && InductionCastsToIgnore.count(Inst)); +} + +bool LoopVectorizationLegality::isInductionVariable(const Value *V) { + return isInductionPhi(V) || isCastedInductionVariable(V); +} + bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { return FirstOrderRecurrences.count(Phi); } @@ -6916,14 +6983,16 @@ static const SCEV *getAddressAccessSCEV( Value *Ptr, LoopVectorizationLegality *Legal, - ScalarEvolution *SE, + PredicatedScalarEvolution &PSE, const Loop *TheLoop) { + auto *Gep = dyn_cast(Ptr); if (!Gep) return nullptr; // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. + auto SE = PSE.getSE(); unsigned NumOperands = Gep->getNumOperands(); for (unsigned i = 1; i < NumOperands; ++i) { Value *Opd = Gep->getOperand(i); @@ -6933,7 +7002,7 @@ } // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV. - return SE->getSCEV(Ptr); + return PSE.getSCEV(Ptr); } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { @@ -6953,7 +7022,7 @@ // Figure out whether the access is strided and get the stride value // if it's known in compile time - const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop); + const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); @@ -7507,6 +7576,13 @@ SmallPtrSetImpl &Casts = RedDes.getCastInsts(); VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } + // Ignore type-casting instructions we identified during induction + // detection. + for (auto &Induction : *Legal->getInductionVars()) { + InductionDescriptor &IndDes = Induction.second; + const SmallVectorImpl &Casts = IndDes.getCastInsts(); + VecValuesToIgnore.insert(Casts.begin(), Casts.end()); + } } LoopVectorizationCostModel::VectorizationFactor @@ -7612,6 +7688,18 @@ return U == Ind || DeadInstructions.count(cast(U)); })) DeadInstructions.insert(IndUpdate); + + // We record as "Dead" also the type-casting instructions we had identified + // during induction analysis. We don't need any handling for them in the + // vectorized loop because we have proven that, under a proper runtime + // test guarding the vectorized loop, the value of the phi, and the casted + // value of the phi, are the same. The last instruction in this casting chain + // will get its scalar/vector/widened def from the scalar/vector/widened def + // of the respective phi node. Any other casts in the induction def-use chain + // have no other uses outside the phi update chain, and will be ignored. + InductionDescriptor &IndDes = Induction.second; + const SmallVectorImpl &Casts = IndDes.getCastInsts(); + DeadInstructions.insert(Casts.begin(), Casts.end()); } } Index: test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll =================================================================== --- test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll +++ test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll @@ -0,0 +1,211 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8 +; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Given a loop with an induction variable which is being +; truncated/extended using casts that had been proven to +; be redundant under a runtime test, we want to make sure +; that these casts, do not get vectorized/scalarized/widened. +; This is the case for inductions whose SCEV expression is +; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc" +; can be a result of the IR sequences we check below. +; +; See also pr30654. +; + +; Case1: Check the following induction pattern: +; +; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] +; %sext = shl i32 %p.09, 24 +; %conv = ashr exact i32 %sext, 24 +; %add = add nsw i32 %conv, %step +; +; This is the case in the following code: +; +; void doit1(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; +; The "ExtTrunc" IR sequence here is: +; "%sext = shl i32 %p.09, 24" +; "%conv = ashr exact i32 %sext, 24" +; We check that it does not appear in the vector loop body, whether +; we vectorize or scalarize the induction. +; In the case of widened induction, this means that the induction phi +; is directly used, without shl/ashr on the way. + +; VF8-LABEL: @doit1 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i32> +; VF8: store <8 x i32> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit1 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = shl i32 +; VF1: middle.block: + +@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 + +define void @doit1(i32 %n, i32 %step) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %sext = shl i32 %p.09, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + + +; Case2: Another variant of the above pattern is where the induction variable +; is used only for address compuation (i.e. it is a GEP index) and therefore +; the induction is not vectorized but rather only the step is widened. +; +; This is the case in the following code, where the induction variable 'w_ix' +; is only used to access the array 'in': +; +; void doit2(int *in, int *out, size_t size, size_t step) +; { +; int w_ix = 0; +; for (size_t offset = 0; offset < size; ++offset) +; { +; int w = in[w_ix]; +; out[offset] = w; +; w_ix += step; +; } +; } +; +; The "ExtTrunc" IR sequence here is similar to the previous case: +; "%sext = shl i64 %w_ix.012, 32 +; %idxprom = ashr exact i64 %sext, 32" +; We check that it does not appear in the vector loop body, whether +; we widen or scalarize the induction. +; In the case of widened induction, this means that the induction phi +; is directly used, without shl/ashr on the way. + +; VF8-LABEL: @doit2 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i64> +; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit2 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = shl i64 +; VF1: middle.block: +; + +define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) { +entry: + %cmp9 = icmp eq i64 %size, 0 + br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %sext = shl i64 %w_ix.011, 32 + %idxprom = ashr exact i64 %sext, 32 + %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010 + store i32 %0, i32* %arrayidx1, align 4 + %add = add i64 %idxprom, %step + %inc = add nuw i64 %offset.010, 1 + %exitcond = icmp eq i64 %inc, %size + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +; Case3: Lastly, check also the following induction pattern: +; +; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ] +; %conv = and i32 %p.09, 255 +; %add = add nsw i32 %conv, %step +; +; This is the case in the following code: +; +; int a[N]; +; void doit3(int n, int step) { +; int i; +; unsigned char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; +; The "ExtTrunc" IR sequence here is: +; "%conv = and i32 %p.09, 255". +; We check that it does not appear in the vector loop body, whether +; we vectorize or scalarize the induction. + +; VF8-LABEL: @doit3 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i32> +; VF8: store <8 x i32> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit3 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = and i32 +; VF1: middle.block: + +define void @doit3(i32 %n, i32 %step) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %conv = and i32 %p.09, 255 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}