Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -355,10 +355,9 @@ /// element. virtual Value *getBroadcastInstrs(Value *V); - /// This function adds 0, 1, 2 ... to each vector element, starting at zero. - /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). - /// The sequence starts at StartIndex. - virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate); + /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...) + /// to each vector element of Val. The sequence starts at StartIndex. + virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step); /// When we go over instructions in the basic block we rely on previous /// values within the current basic block or on loop invariant values. @@ -479,7 +478,7 @@ bool IfPredicateStore = false) override; void vectorizeMemoryInstruction(Instruction *Instr) override; Value *getBroadcastInstrs(Value *V) override; - Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override; + Value *getStepVector(Value *Val, int StartIdx, Value *Step) override; Value *reverseVector(Value *Vec) override; }; @@ -603,11 +602,9 @@ /// This enum represents the kinds of inductions that we support. enum InductionKind { - IK_NoInduction, ///< Not an induction variable. - IK_IntInduction, ///< Integer induction variable. Step = 1. - IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1. - IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem). - IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). + IK_NoInduction, ///< Not an induction variable. + IK_IntInduction, ///< Integer induction variable. Step = C. + IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem). }; // This enum represents the kind of minmax reduction. @@ -697,12 +694,67 @@ /// A struct for saving information about induction variables. struct InductionInfo { - InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} - InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} + InductionInfo(Value *Start, InductionKind K, ConstantInt *Step) + : StartValue(Start), IK(K), StepValue(Step) { + assert(IK != IK_NoInduction && "Not an induction"); + assert(StartValue && "StartValue is null"); + assert(StepValue && !StepValue->isZero() && "StepValue is zero"); + assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && + "StartValue is not a pointer for pointer induction"); + assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && + "StartValue is not an integer for integer induction"); + assert(StepValue->getType()->isIntegerTy() && + "StepValue is not an integer"); + } + InductionInfo() + : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} + + /// Get the consecutive direction. Returns: + /// 0 - unknown or non-consecutive. + /// 1 - consecutive and increasing. + /// -1 - consecutive and decreasing. + int getConsecutiveDirection() const { + if (StepValue && (StepValue->isOne() || StepValue->isMinusOne())) + return StepValue->getSExtValue(); + return 0; + } + + /// Compute the transformed value of Index at offset StartValue using step + /// StepValue. + /// For integer induction, returns StartValue + Index * StepValue. + /// For pointer induction, returns StartValue[Index * StepValue]. + /// FIXME: The newly created binary instructions should contain nsw/nuw + /// flags, which can be found from the original scalar operations. + Value *transform(IRBuilder<> &B, Value *Index) const { + switch (IK) { + case IK_IntInduction: + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + if (StepValue->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateAdd(StartValue, Index); + + case IK_PtrInduction: + if (StepValue->isMinusOne()) + Index = B.CreateNeg(Index); + else if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateGEP(StartValue, Index); + + case IK_NoInduction: + default: + return nullptr; + } + } + /// Start value. TrackingVH StartValue; /// Induction kind. InductionKind IK; + /// Step value. + ConstantInt *StepValue; }; /// ReductionList contains the reduction descriptors for all @@ -822,9 +874,9 @@ /// pattern corresponding to a min(X, Y) or max(X, Y). static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev); - /// Returns the induction kind of Phi. This function may return NoInduction - /// if the PHI is not an induction variable. - InductionKind isInductionVariable(PHINode *Phi); + /// Returns the induction kind of Phi and record the step. This function may + /// return NoInduction if the PHI is not an induction variable. + InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue); /// \brief Collect memory access with loop invariant strides. /// @@ -1592,11 +1644,13 @@ return Shuf; } -Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, - bool Negate) { +Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, + Value *Step) { assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->getScalarType()->isIntegerTy() && "Elem must be an integer"); + assert(Step->getType() == Val->getType()->getScalarType() && + "Step has wrong type"); // Create the types. Type *ITy = Val->getType()->getScalarType(); VectorType *Ty = cast(Val->getType()); @@ -1604,15 +1658,18 @@ SmallVector Indices; // Create a vector of consecutive numbers from zero to VF. - for (int i = 0; i < VLen; ++i) { - int64_t Idx = Negate ? (-i) : i; - Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate)); - } + for (int i = 0; i < VLen; ++i) + Indices.push_back(ConstantInt::get(ITy, StartIdx + i)); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); - return Builder.CreateAdd(Val, Cv, "induction"); + Step = Builder.CreateVectorSplat(VLen, Step); + assert(Step->getType() == Val->getType() && "Invalid step vec"); + // FIXME: The newly created binary instructions should contain nsw/nuw flags, + // which can be found from the original scalar operations. + Step = Builder.CreateMul(Cv, Step); + return Builder.CreateAdd(Val, Step, "induction"); } /// \brief Find the operand of the GEP that should be checked for consecutive @@ -1650,10 +1707,7 @@ PHINode *Phi = dyn_cast_or_null(Ptr); if (Phi && Inductions.count(Phi)) { InductionInfo II = Inductions[Phi]; - if (IK_PtrInduction == II.IK) - return 1; - else if (IK_ReversePtrInduction == II.IK) - return -1; + return II.getConsecutiveDirection(); } GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); @@ -1678,10 +1732,7 @@ return 0; InductionInfo II = Inductions[Phi]; - if (IK_PtrInduction == II.IK) - return 1; - else if (IK_ReversePtrInduction == II.IK) - return -1; + return II.getConsecutiveDirection(); } unsigned InductionOperand = getGEPInductionOperand(DL, Gep); @@ -2496,33 +2547,13 @@ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, II.StartValue->getType(), "cast.crd"); - EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end"); - break; - } - case LoopVectorizationLegality::IK_ReverseIntInduction: { - // Convert the CountRoundDown variable to the PHI size. - Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, - II.StartValue->getType(), - "cast.crd"); - // Handle reverse integer induction counter. - EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end"); + EndValue = II.transform(BypassBuilder, CRD); + EndValue->setName("ind.end"); break; } case LoopVectorizationLegality::IK_PtrInduction: { - // For pointer induction variables, calculate the offset using - // the end index. - EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown, - "ptr.ind.end"); - break; - } - case LoopVectorizationLegality::IK_ReversePtrInduction: { - // The value at the end of the loop for the reverse pointer is calculated - // by creating a GEP with a negative index starting from the start value. - Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0); - Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown, - "rev.ind.end"); - EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx, - "rev.ptr.ind.end"); + EndValue = II.transform(BypassBuilder, CountRoundDown); + EndValue->setName("ptr.ind.end"); break; } }// end of case @@ -3137,6 +3168,8 @@ LoopVectorizationLegality::InductionInfo II = Legal->getInductionVars()->lookup(P); + // FIXME: The newly created binary instructions should contain nsw/nuw flags, + // which can be found from the original scalar operations. switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); @@ -3154,80 +3187,42 @@ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx"); NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); - Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx, - "offset.idx"); + Broadcasted = II.transform(Builder, NormalizedIdx); + Broadcasted->setName("offset.idx"); } Broadcasted = getBroadcastInstrs(Broadcasted); // After broadcasting the induction variable we need to make the vector // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) - Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false); + Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue); return; } - case LoopVectorizationLegality::IK_ReverseIntInduction: case LoopVectorizationLegality::IK_PtrInduction: - case LoopVectorizationLegality::IK_ReversePtrInduction: - // Handle reverse integer and pointer inductions. - Value *StartIdx = ExtendedIdx; - // This is the normalized GEP that starts counting at zero. - Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, - "normalized.idx"); - - // Handle the reverse integer induction variable case. - if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) { - IntegerType *DstTy = cast(II.StartValue->getType()); - Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, - "resize.norm.idx"); - Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, - "reverse.idx"); - - // This is a new value so do not hoist it out. - Value *Broadcasted = getBroadcastInstrs(ReverseInd); - // After broadcasting the induction variable we need to make the - // vector consecutive by adding ... -3, -2, -1, 0. - for (unsigned part = 0; part < UF; ++part) - Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part, - true); - return; - } - // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); - - // Is this a reverse induction ptr or a consecutive induction ptr. - bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == - II.IK); - + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = + Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx"); // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. for (unsigned part = 0; part < UF; ++part) { if (VF == 1) { - int EltIndex = (part) * (Reverse ? -1 : 1); + int EltIndex = part; Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); - Value *GlobalIdx; - if (Reverse) - GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); - else - GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); - - Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, - "next.gep"); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx); + Value *SclrGep = II.transform(Builder, GlobalIdx); + SclrGep->setName("next.gep"); Entry[part] = SclrGep; continue; } Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); for (unsigned int i = 0; i < VF; ++i) { - int EltIndex = (i + part * VF) * (Reverse ? -1 : 1); + int EltIndex = i + part * VF; Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); - Value *GlobalIdx; - if (!Reverse) - GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); - else - GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); - - Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, - "next.gep"); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx); + Value *SclrGep = II.transform(Builder, GlobalIdx); + SclrGep->setName("next.gep"); VecVal = Builder.CreateInsertElement(VecVal, SclrGep, Builder.getInt32(i), "insert.gep"); @@ -3247,7 +3242,7 @@ // Nothing to do for PHIs and BR, since we already took care of the // loop control flow instructions. continue; - case Instruction::PHI:{ + case Instruction::PHI: { // Vectorize PHINodes. widenPHIInstruction(it, Entry, UF, VF, PV); continue; @@ -3368,8 +3363,12 @@ Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, CI->getType()); Value *Broadcasted = getBroadcastInstrs(ScalarCast); + LoopVectorizationLegality::InductionInfo II = + Legal->getInductionVars()->lookup(OldInduction); + Constant *Step = + ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue()); for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false); + Entry[Part] = getStepVector(Broadcasted, VF * Part, Step); propagateMetadata(Entry, it); break; } @@ -3716,8 +3715,9 @@ // This is the value coming from the preheader. Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); + ConstantInt *StepValue = nullptr; // Check if this is an induction variable. - InductionKind IK = isInductionVariable(Phi); + InductionKind IK = isInductionVariable(Phi, StepValue); if (IK_NoInduction != IK) { // Get the widest type. @@ -3727,7 +3727,7 @@ WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); // Int inductions are special because we only allow one IV. - if (IK == IK_IntInduction) { + if (IK == IK_IntInduction && StepValue->isOne()) { // Use the phi node with the widest type as induction. Use the last // one if there are multiple (no good reason for doing this other // than it is expedient). @@ -3736,7 +3736,7 @@ } DEBUG(dbgs() << "LV: Found an induction variable.\n"); - Inductions[Phi] = InductionInfo(StartValue, IK); + Inductions[Phi] = InductionInfo(StartValue, IK, StepValue); // Until we explicitly handle the case of an induction variable with // an outside loop user we have to give up vectorizing this loop. @@ -5287,7 +5287,8 @@ } LoopVectorizationLegality::InductionKind -LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { +LoopVectorizationLegality::isInductionVariable(PHINode *Phi, + ConstantInt *&StepValue) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -5300,22 +5301,19 @@ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return IK_NoInduction; } - const SCEV *Step = AR->getStepRecurrence(*SE); - - // Integer inductions need to have a stride of one. - if (PhiTy->isIntegerTy()) { - if (Step->isOne()) - return IK_IntInduction; - if (Step->isAllOnesValue()) - return IK_ReverseIntInduction; - return IK_NoInduction; - } + const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast(Step); if (!C) return IK_NoInduction; + ConstantInt *CV = C->getValue(); + if (PhiTy->isIntegerTy()) { + StepValue = CV; + return IK_IntInduction; + } + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); Type *PointerElementType = PhiTy->getPointerElementType(); // The pointer stride cannot be determined if the pointer element type is not @@ -5323,13 +5321,12 @@ if (!PointerElementType->isSized()) return IK_NoInduction; - uint64_t Size = DL->getTypeAllocSize(PointerElementType); - if (C->getValue()->equalsInt(Size)) - return IK_PtrInduction; - else if (C->getValue()->equalsInt(0 - Size)) - return IK_ReversePtrInduction; - - return IK_NoInduction; + int64_t Size = static_cast(DL->getTypeAllocSize(PointerElementType)); + int64_t CVSize = CV->getSExtValue(); + if (CVSize % Size) + return IK_NoInduction; + StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + return IK_PtrInduction; } bool LoopVectorizationLegality::isInductionVariable(const Value *V) { @@ -6311,11 +6308,10 @@ return V; } -Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx, - bool Negate) { +Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) { // When unrolling and the VF is 1, we only need to add a simple scalar. Type *ITy = Val->getType(); assert(!ITy->isVectorTy() && "Val must be a scalar"); - Constant *C = ConstantInt::get(ITy, StartIdx, Negate); - return Builder.CreateAdd(Val, C, "induction"); + Constant *C = ConstantInt::get(ITy, StartIdx); + return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction"); } Index: test/Transforms/LoopVectorize/arbitrary-induction-step.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/arbitrary-induction-step.ll @@ -0,0 +1,150 @@ +; RUN: opt -S < %s -loop-vectorize 2>&1 | FileCheck %s +; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnueabi" + +; Test integer induction variable of step 2: +; for (int i = 0; i < 1024; i+=2) { +; int tmp = *A++; +; sum += i * tmp; +; } + +; CHECK-LABEL: @ind_plus2( +; CHECK: load <4 x i32>* +; CHECK: load <4 x i32>* +; CHECK: mul nsw <4 x i32> +; CHECK: mul nsw <4 x i32> +; CHECK: add nsw <4 x i32> +; CHECK: add nsw <4 x i32> +; CHECK: %index.next = add i64 %index, 8 +; CHECK: icmp eq i64 %index.next, 512 + +; FORCE-VEC-LABEL: @ind_plus2( +; FORCE-VEC: %wide.load = load <2 x i32>* +; FORCE-VEC: mul nsw <2 x i32> +; FORCE-VEC: add nsw <2 x i32> +; FORCE-VEC: %index.next = add i64 %index, 2 +; FORCE-VEC: icmp eq i64 %index.next, 512 +define i32 @ind_plus2(i32* %A) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ] + %i = phi i32 [ 0, %entry ], [ %add1, %for.body ] + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %inc.ptr = getelementptr inbounds i32* %A.addr, i64 1 + %0 = load i32* %A.addr, align 4 + %mul = mul nsw i32 %0, %i + %add = add nsw i32 %mul, %sum + %add1 = add nsw i32 %i, 2 + %cmp = icmp slt i32 %add1, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa +} + + +; Test integer induction variable of step -2: +; for (int i = 1024; i > 0; i-=2) { +; int tmp = *A++; +; sum += i * tmp; +; } + +; CHECK-LABEL: @ind_minus2( +; CHECK: load <4 x i32>* +; CHECK: load <4 x i32>* +; CHECK: mul nsw <4 x i32> +; CHECK: mul nsw <4 x i32> +; CHECK: add nsw <4 x i32> +; CHECK: add nsw <4 x i32> +; CHECK: %index.next = add i64 %index, 8 +; CHECK: icmp eq i64 %index.next, 512 + +; FORCE-VEC-LABEL: @ind_minus2( +; FORCE-VEC: %wide.load = load <2 x i32>* +; FORCE-VEC: mul nsw <2 x i32> +; FORCE-VEC: add nsw <2 x i32> +; FORCE-VEC: %index.next = add i64 %index, 2 +; FORCE-VEC: icmp eq i64 %index.next, 512 +define i32 @ind_minus2(i32* %A) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ] + %i = phi i32 [ 1024, %entry ], [ %sub, %for.body ] + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %inc.ptr = getelementptr inbounds i32* %A.addr, i64 1 + %0 = load i32* %A.addr, align 4 + %mul = mul nsw i32 %0, %i + %add = add nsw i32 %mul, %sum + %sub = add nsw i32 %i, -2 + %cmp = icmp sgt i32 %i, 2 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa +} + + +; Test pointer induction variable of step 2. As currently we don't support +; masked load/store, vectorization is possible but not beneficial. If loop +; vectorization is not enforced, LV will only do interleave. +; for (int i = 0; i < 1024; i++) { +; int tmp0 = *A++; +; int tmp1 = *A++; +; sum += tmp0 * tmp1; +; } + +; CHECK-LABEL: @ptr_ind_plus2( +; CHECK: load i32* +; CHECK: load i32* +; CHECK: load i32* +; CHECK: load i32* +; CHECK: mul nsw i32 +; CHECK: mul nsw i32 +; CHECK: add nsw i32 +; CHECK: add nsw i32 +; CHECK: %index.next = add i64 %index, 2 +; CHECK: %21 = icmp eq i64 %index.next, 1024 + +; FORCE-VEC-LABEL: @ptr_ind_plus2( +; FORCE-VEC: load i32* +; FORCE-VEC: insertelement <2 x i32> +; FORCE-VEC: load i32* +; FORCE-VEC: insertelement <2 x i32> +; FORCE-VEC: load i32* +; FORCE-VEC: insertelement <2 x i32> +; FORCE-VEC: load i32* +; FORCE-VEC: insertelement <2 x i32> +; FORCE-VEC: mul nsw <2 x i32> +; FORCE-VEC: add nsw <2 x i32> +; FORCE-VEC: %index.next = add i64 %index, 2 +; FORCE-VEC: icmp eq i64 %index.next, 1024 +define i32 @ptr_ind_plus2(i32* %A) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ] + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %inc.ptr = getelementptr inbounds i32* %A.addr, i64 1 + %0 = load i32* %A.addr, align 4 + %inc.ptr1 = getelementptr inbounds i32* %A.addr, i64 2 + %1 = load i32* %inc.ptr, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %sum + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa +} Index: test/Transforms/LoopVectorize/gcc-examples.ll =================================================================== --- test/Transforms/LoopVectorize/gcc-examples.ll +++ test/Transforms/LoopVectorize/gcc-examples.ll @@ -388,9 +388,8 @@ ret void } -; Can't vectorize because of reductions. ;CHECK-LABEL: @example13( -;CHECK-NOT: <4 x i32> +;CHECK: <4 x i32> ;CHECK: ret void define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp { br label %.preheader Index: test/Transforms/LoopVectorize/reverse_induction.ll =================================================================== --- test/Transforms/LoopVectorize/reverse_induction.ll +++ test/Transforms/LoopVectorize/reverse_induction.ll @@ -97,7 +97,7 @@ ; CHECK: vector.body ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: %normalized.idx = sub i64 %index, 0 -; CHECK: %reverse.idx = sub i64 1023, %normalized.idx +; CHECK: %offset.idx = sub i64 1023, %normalized.idx ; CHECK: trunc i64 %index to i8 define void @reverse_forward_induction_i64_i8() { @@ -124,7 +124,7 @@ ; CHECK: vector.body: ; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: %normalized.idx = sub i64 %index, 129 -; CHECK: %reverse.idx = sub i64 1023, %normalized.idx +; CHECK: %offset.idx = sub i64 1023, %normalized.idx ; CHECK: trunc i64 %index to i8 define void @reverse_forward_induction_i64_i8_signed() {