Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -575,6 +575,15 @@ Optional>> createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); + /// Check if \p PhiScev is an integer loop header phi for which PSCEV was + /// able to build the AddRecurrence \p AR under some predicates. If so, the + /// function returns in \p CastInsts the cast instructions that participate in + /// the induction update chain associated with this phi, and that can be + /// ignored under these predicates. + bool getCastsForInductionPHI(const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, + SmallVectorImpl &CastInsts); + /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -306,10 +306,13 @@ /// induction, the induction descriptor \p D will contain the data describing /// this induction. If by some other means the caller has a better SCEV /// expression for \p Phi than the one returned by the ScalarEvolution - /// analysis, it can be passed through \p Expr. - static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr = nullptr); + /// analysis, it can be passed through \p Expr. If the def-use chain + /// associated with the phi includes casts (that we know we can ignore + /// under proper runtime checks), they are passed through \p CastsToIgnore. + static bool + isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr = nullptr, + SmallVectorImpl *CastsToIgnore = nullptr); /// Returns true if \p Phi is a floating point induction in the loop \p L. /// If \p Phi is an induction, the induction descriptor \p D will contain @@ -350,10 +353,18 @@ Instruction::BinaryOpsEnd; } + /// Returns a reference to the type cast instructions in the induction + /// update chain, that are redundant when guarded with a runtime + /// SCEV overflow check. + const SmallVectorImpl &getCastInsts() const { + return RedundantCasts; + } + private: /// Private constructor - used by \c isInductionPHI. InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, - BinaryOperator *InductionBinOp = nullptr); + BinaryOperator *InductionBinOp = nullptr, + SmallVectorImpl *Casts = nullptr); /// Start value. TrackingVH StartValue; @@ -363,6 +374,9 @@ const SCEV *Step = nullptr; // Instruction that advances induction variable. BinaryOperator *InductionBinOp = nullptr; + // Instructions used for type-casts of the induction variable, + // that are redundant when guarded with a runtime SCEV overflow check. + SmallVector RedundantCasts; }; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -4342,6 +4342,184 @@ return L; } +/// Helper function for getCastsForInductionPHIImpl. +/// Given a Value \p CastedPhi, and a PhiNode Value \p PN, +/// look for the following sequence: +/// %Ext = shl %PN, m +/// %CastedPhi = ashr %Ext, m +/// If found, return true, and insert the intermediate casts in \p CastInsts. +/// Otherwise, return false. +bool isAshrShlInductionCastSequence(Value *CastedPhi, Value *PN, + const DataLayout &DL, + SmallVectorImpl &CastInsts) { + + // 1) look for ashr instruction + auto *AshrInst = dyn_cast(CastedPhi); + if (!AshrInst || AshrInst->getOpcode() != Instruction::AShr) + return false; + + Value *Ext = AshrInst->getOperand(0); + uint64_t ExtSrcBitWidth = DL.getTypeSizeInBits(Ext->getType()); + ConstantInt *AshrAmt = dyn_cast(AshrInst->getOperand(1)); + if (!AshrAmt || !AshrAmt->getValue().ult(ExtSrcBitWidth)) + return false; + + // 2) look for the shl instruction + auto *ShlInst = dyn_cast(Ext); + if (!ShlInst || ShlInst->getOpcode() != Instruction::Shl || + !ShlInst->hasOneUse() || ShlInst->getOperand(0) != PN) + return false; + + auto *ShlAmt = dyn_cast(ShlInst->getOperand(1)); + if (!ShlAmt || ShlAmt->getZExtValue() != AshrAmt->getZExtValue()) + return false; + + // First insert the last instruction from the ExtTrunc cast sequence. + // This last instruction (is the only one which) may be used outside of the + // ExtTrunc cast sequence, so it is placed in position 0 of CastInsts, + // for special handing later on. + assert(CastInsts.empty() && "CastInsts is expected to be empty."); + CastInsts.push_back(AshrInst); + CastInsts.push_back(ShlInst); + return true; +} + +/// Helper function for getCastsForInductionPHIImpl. +/// Given a Value \p CastedPhi, and a PhiNode Value \p PN, +/// look for the following sequence: +/// %CastedPhi = and i64 %PN, 2^n-1 +/// If found, return true, and insert the intermediate cast in \p CastInsts. +/// Otherwise, return false. +bool isAndInductionCastSequence(Value *CastedPhi, Value *PN, + const DataLayout &DL, + SmallVectorImpl &CastInsts) { + + auto *AndInst = dyn_cast(CastedPhi); + if (!AndInst || AndInst->getOpcode() != Instruction::And || + AndInst->getOperand(0) != PN) + return false; + + ConstantInt *Mask = dyn_cast(AndInst->getOperand(1)); + if (!Mask || !Mask->getValue().isMask()) + return false; + + assert(CastInsts.empty() && "CastInsts is expected to be empty."); + CastInsts.push_back(AndInst); + return true; +} + +/// Look for the following IR sequence: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %casted_phi = "ExtTrunc i64 %x" +/// %add = add i64 %casted_phi, %step +/// +/// where %x is given in \p PN, and where "ExtTrunc i64 %x" can take one of the +/// following forms: +/// a) %casted_phi = And i64 %x, 2^n-1 +/// e.g.: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %casted_phi = And i64 %x, 255 +/// %add = add i64 %casted_phi, %step +/// +/// b) %tmp = shl i64 %x, m +/// %casted_phi = ashr exact i64 %tmp, m +/// e.g.: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %sext = shl i64 %x, 32 +/// %casted_phi = ashr exact i64 %sext, 32 +/// %add = add i64 %casted_phi, %step +/// +/// If found, return true, and insert the intermediate casts in \p CastInsts. +/// Otherwise, return false. +bool getCastsForInductionPHIImpl(PHINode *PN, const Loop *L, + SmallVectorImpl &CastInsts) { + + // 1) Look for the add instruction that increments the induction via the + // loop backedge. + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + Value *BEValueV = PN->getIncomingValueForBlock(Latch); + auto *IndUpdate = dyn_cast(BEValueV); + if (!IndUpdate || IndUpdate->getOpcode() != Instruction::Add) + return false; + Value *Op0 = IndUpdate->getOperand(0); + Value *Op1 = IndUpdate->getOperand(1); + Value *CastedPhi = nullptr; + if (Op0 == PN || Op1 == PN) + return false; + if (L->isLoopInvariant(Op0)) + CastedPhi = Op1; + else if (L->isLoopInvariant(Op1)) + CastedPhi = Op0; + else + return false; + + // 2) Look for the ExtTrunc Sequence + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + return (isAshrShlInductionCastSequence(CastedPhi, PN, DL, CastInsts) || + isAndInductionCastSequence(CastedPhi, PN, DL, CastInsts)); +} + +/// If PredicatedSCEVRewrites contains an entry that maps \p PhiScev to \p AR +/// under a runtime predicate, then we know the following: +/// (1) The value of the phi at iteration i is: +/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) +/// (2) Under the runtime predicate, the above expression is equal to: +/// Start + i*Step +/// where Step is a loop invariant. +/// +/// We want to find the cast instructions that are involved in the +/// update-chain of this induction. A caller that adds the required runtime +/// predicate can be free to drop these cast instructions, and compute +/// the phi using (2) above instead of (1). +/// +/// We look for the following sequence: +/// x1 = phi (x0, x_next) +/// x2 = ExtTrunc (x1) +/// x_next = add (x2, Step) +/// +/// Where ExtTrunc is the IR sequence that resulted in the SCEV "sext(trunc(" +/// or "zext(trunc" expression. This can be one of several patterns; We look +/// for one of the following two patterns: +/// ExtTrunc1: +/// Cast: %x2 = and %x1, 2^n-1 +/// ExtTrunc2: +/// Cast: %t = shl %x1, m +/// Cast: %x2 = ashr %t, m +/// +/// If we are able to find this sequence, we return the "Cast:" instructions +/// from the pattern we found. +/// (TODO: Check for more IR induction patterns that can result in the SCEV +/// expression in (1) above.) +bool ScalarEvolution::getCastsForInductionPHI( + const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, + SmallVectorImpl &CastInsts) { + + auto *PN = cast(PhiScev->getValue()); + const Loop *L = isIntegerLoopHeaderPHI(PN, LI); + if (!L) + return false; + + auto I = PredicatedSCEVRewrites.find({PhiScev, L}); + if (I == PredicatedSCEVRewrites.end()) + return false; + std::pair> Rewrite = + I->second; + assert(isa(Rewrite.first) && "Expected an AddRec"); + if (Rewrite.first != AR) + return false; + + // PhiScev was found in PredicatedSCEVRewrites, and it is mapped to \p AR + // under some runtime tests. Find the cast instructions that participate in + // the def-use chain of PhiScev in the loop. + // + return getCastsForInductionPHIImpl(PN, L, CastInsts); +} + // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the // computation that updates the phi follows the following pattern: // (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -678,7 +678,8 @@ } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - const SCEV *Step, BinaryOperator *BOp) + const SCEV *Step, BinaryOperator *BOp, + SmallVectorImpl *Casts) : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { assert(IK != IK_NoInduction && "Not an induction"); @@ -705,6 +706,12 @@ (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub))) && "Binary opcode should be specified for FP induction"); + + if (Casts) { + for (auto &Inst : *Casts) { + RedundantCasts.push_back(Inst); + } + } } int InductionDescriptor::getConsecutiveDirection() const { @@ -870,13 +877,27 @@ return false; } - return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); + // Record any Cast instructions that participate in the induction update + SmallVector *CastInsts = nullptr; + const auto *SymbolicPhi = dyn_cast(PhiScev); + // If we started from an UnknownSCEV, and managed to build an addRecurrence + // only after enabling Assume with PSCEV, this means we may have encountered + // cast instructions that required adding a runtime check in order to + // guarantee the correctness of the AddRecurence respresentation of the + // induction. + if (PhiScev != AR && SymbolicPhi) { + SmallVector Casts; + if (PSE.getSE()->getCastsForInductionPHI(SymbolicPhi, AR, Casts)) + CastInsts = &Casts; + } + + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, CastInsts); } -bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, - ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr) { +bool InductionDescriptor::isInductionPHI( + PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr, + SmallVectorImpl *CastsToIgnore) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -908,7 +929,8 @@ return false; if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, Step); + D = InductionDescriptor(StartValue, IK_IntInduction, Step, nullptr, + CastsToIgnore); return true; } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -604,6 +604,20 @@ /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; + /// If there is a cast involved in the induction variable \p ID, which should + /// be ignored in the vectorized loop body, this function records the + /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the + /// cast. We had already proved that the casted Phi is equal to the uncasted + /// Phi in the vectorized loop (under a runtime guard), and therefore + /// there is no need to vectorize the cast - the same value can be used in the + /// vector loop for both the Phi and the cast. + /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, + /// Otherwise, \p VectorLoopValue is a widened/vectorized value. + void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID, + unsigned Part, + Value *VectorLoopValue, + unsigned Lane = UINT_MAX); + /// Generate a shuffle sequence that will reverse the vector Vec. virtual Value *reverseVector(Value *Vec); @@ -1572,6 +1586,11 @@ /// Returns True if V is an induction variable in this loop. bool isInductionVariable(const Value *V); + /// Returns True if V is a casted induction variable in this loop, that had + /// been proven to be redundant (possible under runtime guards) and can be + /// ignored when creating the vectorized loop body. + bool isCastedInductionVariable(Value *V); + /// Returns True if PN is a reduction variable in this loop. bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } @@ -1780,6 +1799,12 @@ /// variables can be pointers. InductionList Inductions; + /// Holds all the casts that participate in the update chain of the induction + /// variables, and that have been proven to be redundant (possibly under a + /// runtime guard). These casts can be ignored when creating the vectorized + /// loop body. + SmallPtrSet InductionCastsToIgnore; + /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; @@ -2549,6 +2574,7 @@ Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); + recordVectorLoopValueForInductionCast(II, Part, LastInduction); if (isa(EntryVal)) addMetadata(LastInduction, EntryVal); LastInduction = cast(addFastMathFlag( @@ -2582,6 +2608,22 @@ return llvm::any_of(IV->users(), isScalarInst); } +void InnerLoopVectorizer::recordVectorLoopValueForInductionCast( + const InductionDescriptor &ID, unsigned Part, Value *VectorLoopVal, + unsigned Lane) { + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (!Casts.empty()) { + // Only the first Cast instruction in the Casts vector is of interest. + // The rest of the Casts (if exist) have no uses outside the + // induction update chain itself. + Instruction *CastInst = *Casts.begin(); + if (Lane < UINT_MAX) + VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal); + else + VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal); + } +} + void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); @@ -2662,6 +2704,7 @@ Value *EntryPart = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); + recordVectorLoopValueForInductionCast(ID, Part, EntryPart); if (Trunc) addMetadata(EntryPart, Trunc); } @@ -2769,6 +2812,7 @@ auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); + recordVectorLoopValueForInductionCast(ID, Part, Add, Lane); } } } @@ -5221,6 +5265,16 @@ PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit) { Inductions[Phi] = ID; + + // In case this induction also comes with casts that we know we can ignore + // in the vectorized loop body, record them here. Only one of these casts + // (the first in Casts, the last in the actual IR sequence) is needed here, + // because any other casts, if exist, are not used outside the cast sequence, + // and will therefore will not be queried. + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (!Casts.empty()) + InductionCastsToIgnore.insert(*Casts.begin()); + Type *PhiTy = Phi->getType(); const DataLayout &DL = Phi->getModule()->getDataLayout(); @@ -5859,6 +5913,14 @@ return Inductions.count(PN); } +bool LoopVectorizationLegality::isCastedInductionVariable(Value *V) { + auto *Inst = dyn_cast(V); + if (!Inst) + return false; + + return InductionCastsToIgnore.count(Inst); +} + bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { return FirstOrderRecurrences.count(Phi); } @@ -6932,6 +6994,9 @@ if (ValuesToIgnore.count(&I)) continue; + if (VF > 1 && VecValuesToIgnore.count(&I)) + continue; + VectorizationCostTy C = getInstructionCost(&I, VF); // Check if we should override the cost. @@ -6968,24 +7033,27 @@ static const SCEV *getAddressAccessSCEV( Value *Ptr, LoopVectorizationLegality *Legal, - ScalarEvolution *SE, + PredicatedScalarEvolution &PSE, const Loop *TheLoop) { + auto *Gep = dyn_cast(Ptr); if (!Gep) return nullptr; // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. + auto SE = PSE.getSE(); unsigned NumOperands = Gep->getNumOperands(); for (unsigned i = 1; i < NumOperands; ++i) { Value *Opd = Gep->getOperand(i); if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) && - !Legal->isInductionVariable(Opd)) + !Legal->isInductionVariable(Opd) && + !Legal->isCastedInductionVariable(Opd)) return nullptr; } // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV. - return SE->getSCEV(Ptr); + return PSE.getSCEV(Ptr); } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { @@ -7005,7 +7073,7 @@ // Figure out whether the access is strided and get the stride value // if it's known in compile time - const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop); + const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); @@ -7559,6 +7627,13 @@ SmallPtrSetImpl &Casts = RedDes.getCastInsts(); VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } + // Ignore type-casting instructions we identified during induction + // detection. + for (auto &Induction : *Legal->getInductionVars()) { + InductionDescriptor &IndDes = Induction.second; + const SmallVectorImpl &Casts = IndDes.getCastInsts(); + VecValuesToIgnore.insert(Casts.begin(), Casts.end()); + } } LoopVectorizationCostModel::VectorizationFactor @@ -7661,6 +7736,18 @@ return U == Ind || DeadInstructions.count(cast(U)); })) DeadInstructions.insert(IndUpdate); + + // We record as "Dead" also the type-casting instructions we had identified + // during induction analysis. We don't need any handling for them in the + // vectorized loop because we have proven that, under a proper runtime + // test guarding the vectorized loop, the value of the phi, and the casted + // value of the phi, are the same. The last instruction in this casting chain + // will get its scalar/vector/widened def from the scalar/vector/widened def + // of the respective phi node. Any other casts in the induction def-use chain + // have no other uses outside the phi update chain, and will be ignored. + InductionDescriptor &IndDes = Induction.second; + const SmallVectorImpl &Casts = IndDes.getCastInsts(); + DeadInstructions.insert(Casts.begin(), Casts.end()); } } Index: test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll @@ -0,0 +1,211 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8 +; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Given a loop with an induction variable which is being +; truncated/extended using casts that had been proven to +; be redundant under a runtime test, we want to make sure +; that these casts, do not get vectorized/scalarized/widened. +; This is the case for inductions whose SCEV expression is +; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc" +; can be a result of the IR sequences we check below. +; +; See also pr30654. +; + +; Case1: Check the following induction pattern: +; +; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] +; %sext = shl i32 %p.09, 24 +; %conv = ashr exact i32 %sext, 24 +; %add = add nsw i32 %conv, %step +; +; This is the case in the following code: +; +; void doit1(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; +; The "ExtTrunc" IR sequence here is: +; "%sext = shl i32 %p.09, 24" +; "%conv = ashr exact i32 %sext, 24" +; We check that it does not appear in the vector loop body, whether +; we vectorize or scalarize the induction. +; In the case of widened induction, this means that the induction phi +; is directly used, without shl/ashr on the way. + +; VF8-LABEL: @doit1 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i32> +; VF8: store <8 x i32> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit1 +; VF1: vector.body: +; VF1-NOT: %[[TEST:[a-zA-Z0-9.]+]] = shl i32 +; VF1: middle.block: + +@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 + +define void @doit1(i32 %n, i32 %step) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %sext = shl i32 %p.09, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + + +; Case2: Another variant of the above pattern is where the induction variable +; is used only for address compuation (i.e. it is a GEP index) and therefore +; the induction is not vectorized but rather only the step is widened. +; +; This is the case in the following code, where the induction variable 'w_ix' +; is only used to access the array 'in': +; +; void doit2(int *in, int *out, size_t size, size_t step) +; { +; int w_ix = 0; +; for (size_t offset = 0; offset < size; ++offset) +; { +; int w = in[w_ix]; +; out[offset] = w; +; w_ix += step; +; } +; } +; +; The "ExtTrunc" IR sequence here is similar to the previous case: +; "%sext = shl i64 %w_ix.012, 32 +; %idxprom = ashr exact i64 %sext, 32" +; We check that it does not appear in the vector loop body, whether +; we widen or scalarize the induction. +; In the case of widened induction, this means that the induction phi +; is directly used, without shl/ashr on the way. + +; VF8-LABEL: @doit2 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i64> +; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit2 +; VF1: vector.body: +; VF1-NOT: %[[TEST:[a-zA-Z0-9.]+]] = shl i64 +; VF1: middle.block: +; + +define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) { +entry: + %cmp9 = icmp eq i64 %size, 0 + br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %sext = shl i64 %w_ix.011, 32 + %idxprom = ashr exact i64 %sext, 32 + %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010 + store i32 %0, i32* %arrayidx1, align 4 + %add = add i64 %idxprom, %step + %inc = add nuw i64 %offset.010, 1 + %exitcond = icmp eq i64 %inc, %size + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +; Case3: Lastly, check also the following induction pattern: +; +; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ] +; %conv = and i32 %p.09, 255 +; %add = add nsw i32 %conv, %step +; +; This is the case in the following code: +; +; int a[N]; +; void doit3(int n, int step) { +; int i; +; unsigned char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; +; The "ExtTrunc" IR sequence here is: +; "%conv = and i32 %p.09, 255". +; We check that it does not appear in the vector loop body, whether +; we vectorize or scalarize the induction. + +; VF8-LABEL: @doit3 +; VF8: vector.body: +; VF8: %vec.ind = phi <8 x i32> +; VF8: store <8 x i32> %vec.ind +; VF8: middle.block: + +; VF1-LABEL: @doit3 +; VF1: vector.body: +; VF1-NOT: %{{.*}} = and i32 +; VF1: middle.block: + +define void @doit3(i32 %n, i32 %step) { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %conv = and i32 %p.09, 255 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}