Index: ../include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- ../include/llvm/Transforms/Utils/LoopUtils.h +++ ../include/llvm/Transforms/Utils/LoopUtils.h @@ -268,7 +268,7 @@ public: /// Default constructor - creates an invalid induction. InductionDescriptor() - : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} + : StartValue(nullptr), IK(IK_NoInduction), Step(nullptr) {} /// Get the consecutive direction. Returns: /// 0 - unknown or non-consecutive. @@ -282,25 +282,27 @@ /// For pointer induction, returns StartValue[Index * StepValue]. /// FIXME: The newly created binary instructions should contain nsw/nuw /// flags, which can be found from the original scalar operations. - Value *transform(IRBuilder<> &B, Value *Index) const; + Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, + const DataLayout& DL) const; Value *getStartValue() const { return StartValue; } InductionKind getKind() const { return IK; } - ConstantInt *getStepValue() const { return StepValue; } + const SCEV *getStep() const { return Step; } + ConstantInt *getConstIntStepValue() const; static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE, InductionDescriptor &D); private: /// Private constructor - used by \c isInductionPHI. - InductionDescriptor(Value *Start, InductionKind K, ConstantInt *Step); + InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step); /// Start value. TrackingVH StartValue; /// Induction kind. InductionKind IK; /// Step value. - ConstantInt *StepValue; + const SCEV *Step; }; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Index: ../lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- ../lib/Transforms/Utils/LoopUtils.cpp +++ ../lib/Transforms/Utils/LoopUtils.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -653,42 +654,65 @@ } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - ConstantInt *Step) - : StartValue(Start), IK(K), StepValue(Step) { + const SCEV *Step) + : StartValue(Start), IK(K), Step(Step) { assert(IK != IK_NoInduction && "Not an induction"); + + // Check Start Value assert(StartValue && "StartValue is null"); - assert(StepValue && !StepValue->isZero() && "StepValue is zero"); assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && "StartValue is not a pointer for pointer induction"); assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && "StartValue is not an integer for integer induction"); - assert(StepValue->getType()->isIntegerTy() && - "StepValue is not an integer"); + + // Check Step + assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && + "Step value is zero"); + + assert((IK != IK_PtrInduction || getConstIntStepValue()) && + "Step value should be constant for pointer induction"); + assert(Step->getType()->isIntegerTy() && "StepValue is not an integer"); } int InductionDescriptor::getConsecutiveDirection() const { - if (StepValue && (StepValue->isOne() || StepValue->isMinusOne())) - return StepValue->getSExtValue(); + ConstantInt *ConstStep = getConstIntStepValue(); + if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) + return ConstStep->getSExtValue(); return 0; } -Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const { +ConstantInt *InductionDescriptor::getConstIntStepValue() const { + if (isa(Step)) + return dyn_cast(cast(Step)->getValue()); + return nullptr; +} + +Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, + ScalarEvolution *SE, + const DataLayout& DL) const { + + SCEVExpander Exp(*SE, DL, "induction"); + Value *StepValue = Exp.expandCodeFor(Step, Step->getType(), + &*B.GetInsertPoint()); switch (IK) { case IK_IntInduction: assert(Index->getType() == StartValue->getType() && "Index type does not match StartValue type"); - if (StepValue->isMinusOne()) + if (isa(StepValue) && + cast(StepValue)->isMinusOne()) return B.CreateSub(StartValue, Index); - if (!StepValue->isOne()) + if (!isa(StepValue) || !cast(StepValue)->isOne()) Index = B.CreateMul(Index, StepValue); return B.CreateAdd(StartValue, Index); case IK_PtrInduction: assert(Index->getType() == StepValue->getType() && "Index type does not match StepValue type"); - if (StepValue->isMinusOne()) + assert(isa(StepValue) && + "Expected constant step for pointer induction"); + if (cast(StepValue)->isMinusOne()) Index = B.CreateNeg(Index); - else if (!StepValue->isOne()) + else if (!cast(StepValue)->isOne()) Index = B.CreateMul(Index, StepValue); return B.CreateGEP(nullptr, StartValue, Index); @@ -719,17 +743,22 @@ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. - const SCEVConstant *C = dyn_cast(Step); - if (!C) + // The stride may be a constant or a loop invariant integer value. + const SCEVConstant *ConstStep = dyn_cast(Step); + if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop())) return false; - ConstantInt *CV = C->getValue(); if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, CV); + D = InductionDescriptor(StartValue, IK_IntInduction, Step); return true; } assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + // Pointer induction should be a constant. + if (!ConstStep) + return false; + + ConstantInt *CV = ConstStep->getValue(); Type *PointerElementType = PhiTy->getPointerElementType(); // The pointer stride cannot be determined if the pointer element type is not // sized. @@ -744,8 +773,8 @@ int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; - auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); - + auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, + true /* signed */); D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); return true; } Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoopVectorize.cpp +++ ../lib/Transforms/Vectorize/LoopVectorize.cpp @@ -420,6 +420,12 @@ /// to each vector element of Val. The sequence starts at StartIndex. virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step); + /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...) + /// to each vector element of Val. The sequence starts at StartIndex. + /// Step is a SCEV. In order to get StepValue it takes the existing value + /// from SCEV or creates a new using SCEVExpander. + virtual Value *getStepVector(Value *Val, int StartIdx, const SCEV *Step); + /// When we go over instructions in the basic block we rely on previous /// values within the current basic block or on loop invariant values. /// When we widen (vectorize) values we place them in the map. If the values @@ -604,6 +610,7 @@ void vectorizeMemoryInstruction(Instruction *Instr) override; Value *getBroadcastInstrs(Value *V) override; Value *getStepVector(Value *Val, int StartIdx, Value *Step) override; + Value *getStepVector(Value *Val, int StartIdx, const SCEV *StepSCEV) override; Value *reverseVector(Value *Vec) override; }; @@ -2075,6 +2082,15 @@ } Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, + const SCEV *StepSCEV) { + const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); + SCEVExpander Exp(*PSE.getSE(), DL, "induction"); + Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(), + &*Builder.GetInsertPoint()); + return getStepVector(Val, StartIdx, StepValue); +} + +Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step) { assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->getScalarType()->isIntegerTy() && @@ -3131,9 +3147,10 @@ } else { IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); Value *CRD = B.CreateSExtOrTrunc(CountRoundDown, - II.getStepValue()->getType(), + II.getStep()->getType(), "cast.crd"); - EndValue = II.transform(B, CRD); + const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); + EndValue = II.transform(B, CRD, PSE.getSE(), DL); EndValue->setName("ind.end"); } @@ -4024,6 +4041,7 @@ "Not an induction variable"); InductionDescriptor II = Legal->getInductionVars()->lookup(P); + const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); // FIXME: The newly created binary instructions should contain nsw/nuw flags, // which can be found from the original scalar operations. @@ -4038,14 +4056,14 @@ Value *V = Induction; if (P != OldInduction) { V = Builder.CreateSExtOrTrunc(Induction, P->getType()); - V = II.transform(Builder, V); + V = II.transform(Builder, V, PSE.getSE(), DL); V->setName("offset.idx"); } Value *Broadcasted = getBroadcastInstrs(V); // After broadcasting the induction variable we need to make the vector // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) - Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue()); + Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep()); return; } case InductionDescriptor::IK_PtrInduction: @@ -4053,7 +4071,7 @@ assert(P->getType()->isPointerTy() && "Unexpected type."); // This is the normalized GEP that starts counting at zero. Value *PtrInd = Induction; - PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType()); + PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType()); // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. for (unsigned part = 0; part < UF; ++part) { @@ -4061,7 +4079,7 @@ int EltIndex = part; Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = II.transform(Builder, GlobalIdx); + Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL); SclrGep->setName("next.gep"); Entry[part] = SclrGep; continue; @@ -4072,7 +4090,7 @@ int EltIndex = i + part * VF; Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = II.transform(Builder, GlobalIdx); + Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL); SclrGep->setName("next.gep"); VecVal = Builder.CreateInsertElement(VecVal, SclrGep, Builder.getInt32(i), @@ -4209,23 +4227,26 @@ case Instruction::BitCast: { CastInst *CI = dyn_cast(it); setDebugLocFromInst(Builder, &*it); - /// Optimize the special case where the source is the induction - /// variable. Notice that we can only optimize the 'trunc' case + /// Optimize the special case where the source is a constant integer + /// induction variable. Notice that we can only optimize the 'trunc' case /// because: a. FP conversions lose precision, b. sext/zext may wrap, /// c. other casts depend on pointer size. + if (CI->getOperand(0) == OldInduction && it->getOpcode() == Instruction::Trunc) { - Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, - CI->getType()); - Value *Broadcasted = getBroadcastInstrs(ScalarCast); InductionDescriptor II = - Legal->getInductionVars()->lookup(OldInduction); - Constant *Step = ConstantInt::getSigned( - CI->getType(), II.getStepValue()->getSExtValue()); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = getStepVector(Broadcasted, VF * Part, Step); - addMetadata(Entry, &*it); - break; + Legal->getInductionVars()->lookup(OldInduction); + if (auto StepValue = II.getConstIntStepValue()) { + StepValue = ConstantInt::getSigned(cast(CI->getType()), + StepValue->getSExtValue()); + Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, + CI->getType()); + Value *Broadcasted = getBroadcastInstrs(ScalarCast); + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = getStepVector(Broadcasted, VF * Part, StepValue); + addMetadata(Entry, &*it); + break; + } } /// Vectorize casts. Type *DestTy = (VF == 1) ? CI->getType() : @@ -4636,7 +4657,8 @@ // Int inductions are special because we only allow one IV. if (ID.getKind() == InductionDescriptor::IK_IntInduction && - ID.getStepValue()->isOne() && + ID.getConstIntStepValue() && + ID.getConstIntStepValue()->isOne() && isa(ID.getStartValue()) && cast(ID.getStartValue())->isNullValue()) { // Use the phi node with the widest type as induction. Use the last @@ -6201,6 +6223,15 @@ return V; } +Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, + const SCEV *StepSCEV) { + const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); + SCEVExpander Exp(*PSE.getSE(), DL, "induction"); + Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(), + &*Builder.GetInsertPoint()); + return getStepVector(Val, StartIdx, StepValue); +} + Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) { // When unrolling and the VF is 1, we only need to add a simple scalar. Type *ITy = Val->getType(); Index: ../test/Transforms/LoopVectorize/induction-step.ll =================================================================== --- ../test/Transforms/LoopVectorize/induction-step.ll +++ ../test/Transforms/LoopVectorize/induction-step.ll @@ -0,0 +1,124 @@ +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -S | FileCheck %s + +; int int_inc; +; +;int induction_with_global(int init, int *restrict A, int N) { +; int x = init; +; for (int i=0;i undef, i32 %[[INT_INC]], i32 0 +; CHECK: %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK: mul <8 x i32> , %[[VAR2]] + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + + +@int_inc = common global i32 0, align 4 + +define i32 @induction_with_global(i32 %init, i32* noalias nocapture %A, i32 %N) { +entry: + %cmp4 = icmp sgt i32 %N, 0 + br i1 %cmp4, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %0 = load i32, i32* @int_inc, align 4 + %1 = mul i32 %0, %N + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %x.05 = phi i32 [ %init, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %x.05, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %x.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + %2 = add i32 %1, %init + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %x.0.lcssa = phi i32 [ %init, %entry ], [ %2, %for.end.loopexit ] + ret i32 %x.0.lcssa +} + + +;int induction_with_loop_inv(int init, int *restrict A, int N, int M) { +; int x = init; +; for (int j = 0; j < M; j++) { +; for (int i=0; i undef, i32 %[[INDVAR1]], i32 0 +; CHECK: %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK: mul <8 x i32> , %[[VAR2]] + +define i32 @induction_with_loop_inv(i32 %init, i32* noalias nocapture %A, i32 %N, i32 %M) { +entry: + %cmp10 = icmp sgt i32 %M, 0 + br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %for.end6 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp27 = icmp sgt i32 %N, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc4, %for.cond1.preheader.lr.ph + %indvars.iv15 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next16, %for.inc4 ] + %j.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc5, %for.inc4 ] + %x.011 = phi i32 [ %init, %for.cond1.preheader.lr.ph ], [ %x.1.lcssa, %for.inc4 ] + br i1 %cmp27, label %for.body3.preheader, label %for.inc4 + +for.body3.preheader: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.preheader, %for.body3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ] + %x.18 = phi i32 [ %add, %for.body3 ], [ %x.011, %for.body3.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %x.18, i32* %arrayidx, align 4 + %add = add nsw i32 %x.18, %j.012 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.inc4.loopexit, label %for.body3 + +for.inc4.loopexit: ; preds = %for.body3 + %0 = add i32 %x.011, %indvars.iv15 + br label %for.inc4 + +for.inc4: ; preds = %for.inc4.loopexit, %for.cond1.preheader + %x.1.lcssa = phi i32 [ %x.011, %for.cond1.preheader ], [ %0, %for.inc4.loopexit ] + %inc5 = add nuw nsw i32 %j.012, 1 + %indvars.iv.next16 = add i32 %indvars.iv15, %N + %exitcond17 = icmp eq i32 %inc5, %M + br i1 %exitcond17, label %for.end6.loopexit, label %for.cond1.preheader + +for.end6.loopexit: ; preds = %for.inc4 + %x.1.lcssa.lcssa = phi i32 [ %x.1.lcssa, %for.inc4 ] + br label %for.end6 + +for.end6: ; preds = %for.end6.loopexit, %entry + %x.0.lcssa = phi i32 [ %init, %entry ], [ %x.1.lcssa.lcssa, %for.end6.loopexit ] + ret i32 %x.0.lcssa +}