Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1545,38 +1545,33 @@ if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; - // If this value is a pointer induction variable we know it is consecutive. PHINode *Phi = dyn_cast_or_null(Ptr); - if (Phi && Inductions.count(Phi)) { - InductionInfo II = Inductions[Phi]; - return II.getConsecutiveDirection(); + GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); + if (!Phi && !Gep) + return 0; + + const SCEV *PtrScev = SE->getSCEV(Ptr); + const DataLayout &DL = TheFunction->getParent()->getDataLayout(); + // If this Ptr value is SCEVAddRecExpr, has constant stride and the stride + // equals to the size of pointer element type, we know it is consecutive. + if (const SCEVAddRecExpr *PtrAddRec = + dyn_cast_or_null(PtrScev)) { + if (auto C = + dyn_cast_or_null(PtrAddRec->getStepRecurrence(*SE))) { + int64_t StepVal = C->getValue()->getSExtValue(); + int64_t ElemSize = + DL.getTypeAllocSize(Ptr->getType()->getPointerElementType()); + if (StepVal == ElemSize) + return 1; + if (StepVal == -ElemSize) + return -1; + } } - GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); if (!Gep) return 0; unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant then we know it is consecutive. We can - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { - - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; - - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - InductionInfo II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - unsigned InductionOperand = getGEPInductionOperand(Gep); // Check that all of the gep indices are uniform except for our induction @@ -1696,8 +1691,13 @@ VectorParts &Entry = WidenMap.get(Instr); // Handle consecutive loads/stores. + Instruction *GepPtrInst = nullptr; GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + if (Gep && + (GepPtrInst = dyn_cast_or_null(Gep->getPointerOperand())) && + !SE->isLoopInvariant(SE->getSCEV(GepPtrInst), OrigLoop)) { + // The case Gep->getPointerOperand() is an induction variable + // or a SCEVAddRecExpr. setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; Index: test/Transforms/LoopVectorize/pr23580.ll =================================================================== --- test/Transforms/LoopVectorize/pr23580.ll +++ test/Transforms/LoopVectorize/pr23580.ll @@ -0,0 +1,37 @@ +; PR23580 +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check loop vectorizer can recognize loads inside for.body to be inter-iterations +; consecutive, and generate %wide.loads. +; +; CHECK-LABEL: @fn2( +; CHECK: %wide.load{{[0-9]*}} = +; CHECK: %wide.load{{[0-9]*}} = + +%struct.B = type { i16 } +define void @fn2(%struct.B* %add.ptr, %struct.B* %add.ptr1, i32 %tmp4) align 2 { +entry: + br label %for.body + +for.body: + %k.02 = phi i32 [ 1, %entry ], [ %add, %for.body ] + %idxprom15 = sext i32 %k.02 to i64 + %arrayidx16 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom15 + %ival = getelementptr inbounds %struct.B, %struct.B* %arrayidx16, i32 0, i32 0 + %tmp9 = load i16, i16* %ival, align 2 + %add = add nsw i32 %k.02, 1 + %arrayidx25 = getelementptr inbounds %struct.B, %struct.B* %add.ptr1, i64 %idxprom15 + %ival26 = getelementptr inbounds %struct.B, %struct.B* %arrayidx25, i32 0, i32 0 + store i16 %tmp9, i16* %ival26, align 2 + %cmp14 = icmp slt i32 %add, %tmp4 + br i1 %cmp14, label %for.body, label %for.end, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.width", i32 4}