Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -355,10 +355,9 @@ /// element. virtual Value *getBroadcastInstrs(Value *V); - /// This function adds 0, 1, 2 ... to each vector element, starting at zero. - /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). - /// The sequence starts at StartIndex. - virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate); + /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...) + /// to each vector element of Val. + virtual Value *getConsecutiveVector(Value* Val, int StartIdx, Value *Step); /// When we go over instructions in the basic block we rely on previous /// values within the current basic block or on loop invariant values. @@ -479,7 +478,7 @@ bool IfPredicateStore = false) override; void vectorizeMemoryInstruction(Instruction *Instr) override; Value *getBroadcastInstrs(Value *V) override; - Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override; + Value *getConsecutiveVector(Value* Val, int StartIdx, Value *Step) override; Value *reverseVector(Value *Vec) override; }; @@ -603,10 +602,8 @@ /// This enum represents the kinds of inductions that we support. enum InductionKind { IK_NoInduction, ///< Not an induction variable. - IK_IntInduction, ///< Integer induction variable. Step = 1. - IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1. - IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem). - IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). + IK_IntInduction, ///< Integer induction variable. Step = C. + IK_PtrInduction ///< Pointer induction var. Step = C * sizeof(elem). }; // This enum represents the kind of minmax reduction. @@ -696,12 +693,60 @@ /// A struct for saving information about induction variables. struct InductionInfo { - InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} - InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} + InductionInfo(Value *Start, InductionKind K, ConstantInt *Step) + : StartValue(Start), IK(K), StepValue(Step) { + assert(IK != IK_NoInduction && "Not an induction"); + assert(StartValue && "StartValue is null"); + assert(StepValue && "StepValue is null"); + assert((IK != IK_IntInduction || + StartValue->getType() == StepValue->getType()) && + "StartValue type does not match StepValue type"); + assert((IK != IK_PtrInduction || + StartValue->getType()->isPointerTy()) && + "StartValue is not a pointer"); + assert((IK != IK_PtrInduction || + StepValue->getType()->isIntegerTy()) && + "StepValue is not an integer"); + } + InductionInfo() + : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} + + /// Return true if StepValue is a negative constant. + bool isReverse() const { return StepValue && StepValue->isNegative(); } + + /// Compute the transformed value of Index at offset StartValue using step + /// StepValue. + Value *transform(IRBuilder<> &B, Value *Index) const { + switch (IK) { + case IK_IntInduction: + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + if (StepValue->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateAdd(StartValue, Index); + + case IK_PtrInduction: + if (StepValue->isMinusOne()) + Index = B.CreateNeg(Index); + else if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateGEP(StartValue, Index); + + case IK_NoInduction: + break; + default: + llvm_unreachable("Unknown induction"); + } + } + /// Start value. TrackingVH StartValue; /// Induction kind. InductionKind IK; + /// Step value. + ConstantInt *StepValue; }; /// ReductionList contains the reduction descriptors for all @@ -808,7 +853,7 @@ ReductionInstDesc &Prev); /// Returns the induction kind of Phi. This function may return NoInduction /// if the PHI is not an induction variable. - InductionKind isInductionVariable(PHINode *Phi); + InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue); /// \brief Collect memory access with loop invariant strides. /// @@ -1573,10 +1618,12 @@ } Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, - bool Negate) { + Value* Step) { assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->getScalarType()->isIntegerTy() && "Elem must be an integer"); + assert(Step->getType() == Val->getType()->getScalarType() && + "Step has wrong type"); // Create the types. Type *ITy = Val->getType()->getScalarType(); VectorType *Ty = cast(Val->getType()); @@ -1584,15 +1631,16 @@ SmallVector Indices; // Create a vector of consecutive numbers from zero to VF. - for (int i = 0; i < VLen; ++i) { - int64_t Idx = Negate ? (-i) : i; - Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate)); - } + for (int i = 0; i < VLen; ++i) + Indices.push_back(ConstantInt::get(ITy, StartIdx + i)); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); - return Builder.CreateAdd(Val, Cv, "induction"); + Step = Builder.CreateVectorSplat(VLen, Step); + assert(Step->getType() == Val->getType() && "Invalid step vec"); + Step = Builder.CreateMul(Cv, Step); + return Builder.CreateAdd(Val, Step, "induction"); } /// \brief Find the operand of the GEP that should be checked for consecutive @@ -1630,10 +1678,7 @@ PHINode *Phi = dyn_cast_or_null(Ptr); if (Phi && Inductions.count(Phi)) { InductionInfo II = Inductions[Phi]; - if (IK_PtrInduction == II.IK) - return 1; - else if (IK_ReversePtrInduction == II.IK) - return -1; + return II.isReverse() ? -1 : 1; } GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); @@ -1658,10 +1703,7 @@ return 0; InductionInfo II = Inductions[Phi]; - if (IK_PtrInduction == II.IK) - return 1; - else if (IK_ReversePtrInduction == II.IK) - return -1; + return II.isReverse() ? -1 : 1; } unsigned InductionOperand = getGEPInductionOperand(DL, Gep); @@ -2461,33 +2503,13 @@ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, II.StartValue->getType(), "cast.crd"); - EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end"); - break; - } - case LoopVectorizationLegality::IK_ReverseIntInduction: { - // Convert the CountRoundDown variable to the PHI size. - Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, - II.StartValue->getType(), - "cast.crd"); - // Handle reverse integer induction counter. - EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end"); + EndValue = II.transform(BypassBuilder, CRD); + EndValue->setName("ind.end"); break; } case LoopVectorizationLegality::IK_PtrInduction: { - // For pointer induction variables, calculate the offset using - // the end index. - EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown, - "ptr.ind.end"); - break; - } - case LoopVectorizationLegality::IK_ReversePtrInduction: { - // The value at the end of the loop for the reverse pointer is calculated - // by creating a GEP with a negative index starting from the start value. - Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0); - Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown, - "rev.ind.end"); - EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx, - "rev.ptr.ind.end"); + EndValue = II.transform(BypassBuilder, CountRoundDown); + EndValue->setName("ptr.ind.end"); break; } }// end of case @@ -3121,80 +3143,43 @@ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx"); NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); - Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx, - "offset.idx"); + Broadcasted = II.transform(Builder, NormalizedIdx); + Broadcasted->setName(II.isReverse() ? "reverse.idx" : "offset.idx"); } Broadcasted = getBroadcastInstrs(Broadcasted); // After broadcasting the induction variable we need to make the vector // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) - Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false); + Entry[part] = getConsecutiveVector(Broadcasted, VF * part, + II.StepValue); return; } - case LoopVectorizationLegality::IK_ReverseIntInduction: case LoopVectorizationLegality::IK_PtrInduction: - case LoopVectorizationLegality::IK_ReversePtrInduction: - // Handle reverse integer and pointer inductions. - Value *StartIdx = ExtendedIdx; - // This is the normalized GEP that starts counting at zero. - Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, - "normalized.idx"); - - // Handle the reverse integer induction variable case. - if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) { - IntegerType *DstTy = cast(II.StartValue->getType()); - Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, - "resize.norm.idx"); - Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, - "reverse.idx"); - - // This is a new value so do not hoist it out. - Value *Broadcasted = getBroadcastInstrs(ReverseInd); - // After broadcasting the induction variable we need to make the - // vector consecutive by adding ... -3, -2, -1, 0. - for (unsigned part = 0; part < UF; ++part) - Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part, - true); - return; - } - // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); - - // Is this a reverse induction ptr or a consecutive induction ptr. - bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == - II.IK); - + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, + "normalized.idx"); // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. for (unsigned part = 0; part < UF; ++part) { if (VF == 1) { - int EltIndex = (part) * (Reverse ? -1 : 1); + int EltIndex = part; Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); - Value *GlobalIdx; - if (Reverse) - GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); - else - GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); - - Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, - "next.gep"); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx); + Value *SclrGep = II.transform(Builder, GlobalIdx); + SclrGep->setName("next.gep"); Entry[part] = SclrGep; continue; } Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); for (unsigned int i = 0; i < VF; ++i) { - int EltIndex = (i + part * VF) * (Reverse ? -1 : 1); + int EltIndex = i + part * VF; Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); - Value *GlobalIdx; - if (!Reverse) - GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); - else - GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); - - Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, - "next.gep"); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx); + Value *SclrGep = II.transform(Builder, GlobalIdx); + SclrGep->setName("next.gep"); VecVal = Builder.CreateInsertElement(VecVal, SclrGep, Builder.getInt32(i), "insert.gep"); @@ -3214,7 +3199,7 @@ // Nothing to do for PHIs and BR, since we already took care of the // loop control flow instructions. continue; - case Instruction::PHI:{ + case Instruction::PHI: { // Vectorize PHINodes. widenPHIInstruction(it, Entry, UF, VF, PV); continue; @@ -3335,8 +3320,12 @@ Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, CI->getType()); Value *Broadcasted = getBroadcastInstrs(ScalarCast); + LoopVectorizationLegality::InductionInfo II = + Legal->getInductionVars()->lookup(OldInduction); + Constant *Step = ConstantInt::getSigned(CI->getType(), + II.StepValue->getSExtValue()); for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false); + Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, Step); propagateMetadata(Entry, it); break; } @@ -3674,8 +3663,9 @@ // This is the value coming from the preheader. Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); + ConstantInt *StepValue = 0; // Check if this is an induction variable. - InductionKind IK = isInductionVariable(Phi); + InductionKind IK = isInductionVariable(Phi, StepValue); if (IK_NoInduction != IK) { // Get the widest type. @@ -3685,7 +3675,7 @@ WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); // Int inductions are special because we only allow one IV. - if (IK == IK_IntInduction) { + if (IK == IK_IntInduction && !StepValue->isNegative()) { // Use the phi node with the widest type as induction. Use the last // one if there are multiple (no good reason for doing this other // than it is expedient). @@ -3694,7 +3684,7 @@ } DEBUG(dbgs() << "LV: Found an induction variable.\n"); - Inductions[Phi] = InductionInfo(StartValue, IK); + Inductions[Phi] = InductionInfo(StartValue, IK, StepValue); // Until we explicitly handle the case of an induction variable with // an outside loop user we have to give up vectorizing this loop. @@ -5235,7 +5225,10 @@ } LoopVectorizationLegality::InductionKind -LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { +LoopVectorizationLegality::isInductionVariable(PHINode *Phi, + ConstantInt *&StepValue) { + StepValue = 0; + Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -5248,30 +5241,26 @@ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return IK_NoInduction; } - const SCEV *Step = AR->getStepRecurrence(*SE); - // Integer inductions need to have a stride of one. - if (PhiTy->isIntegerTy()) { - if (Step->isOne()) - return IK_IntInduction; - if (Step->isAllOnesValue()) - return IK_ReverseIntInduction; - return IK_NoInduction; - } - - // Calculate the pointer stride and check if it is consecutive. + const SCEV *Step = AR->getStepRecurrence(*SE); const SCEVConstant *C = dyn_cast(Step); if (!C) return IK_NoInduction; + + ConstantInt *CV = C->getValue(); - assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); - uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); - if (C->getValue()->equalsInt(Size)) - return IK_PtrInduction; - else if (C->getValue()->equalsInt(0 - Size)) - return IK_ReversePtrInduction; + if (PhiTy->isIntegerTy()) { + StepValue = CV; + return IK_IntInduction; + } - return IK_NoInduction; + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + int64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); + int64_t CVSize = CV->getSExtValue(); + if (CVSize % Size) + return IK_NoInduction; + StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + return IK_PtrInduction; } bool LoopVectorizationLegality::isInductionVariable(const Value *V) { @@ -6231,10 +6220,10 @@ } Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx, - bool Negate) { + Value *Step) { // When unrolling and the VF is 1, we only need to add a simple scalar. Type *ITy = Val->getType(); assert(!ITy->isVectorTy() && "Val must be a scalar"); - Constant *C = ConstantInt::get(ITy, StartIdx, Negate); - return Builder.CreateAdd(Val, C, "induction"); + Constant *C = ConstantInt::get(ITy, StartIdx); + return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction"); } Index: test/Transforms/LoopVectorize/const-induction.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/const-induction.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK-LABEL: @ind_plus( +;CHECK: <4 x i32> +define i32 @ind_plus(i32* nocapture readonly %a) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %res.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i.04 = phi i32 [ 0, %entry ], [ %add.1, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i32 %i.04 + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %res.05 + %add.1 = add nsw i32 %i.04, 2 + %cmp = icmp slt i32 %add.1, 101 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret i32 %add +} + +;CHECK-LABEL: @ind_minus( +;CHECK: <4 x i32> +define i32 @ind_minus(i32* nocapture readonly %a) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %res.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i.04 = phi i32 [ 100, %entry ], [ %sub, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i32 %i.04 + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %res.05 + %sub = add nsw i32 %i.04, -2 + %cmp = icmp sgt i32 %sub, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret i32 %add +} + Index: test/Transforms/LoopVectorize/gcc-examples.ll =================================================================== --- test/Transforms/LoopVectorize/gcc-examples.ll +++ test/Transforms/LoopVectorize/gcc-examples.ll @@ -388,9 +388,8 @@ ret void } -; Can't vectorize because of reductions. ;CHECK-LABEL: @example13( -;CHECK-NOT: <4 x i32> +;CHECK: <4 x i32> ;CHECK: ret void define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp { br label %.preheader