Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1577,6 +1577,26 @@ return II.getConsecutiveDirection(); } + const SCEV *PtrScev = SE->getSCEV(Ptr); + const SCEVAddRecExpr *PtrAddRec = nullptr; + const DataLayout &DL = Gep->getModule()->getDataLayout(); + // If this Ptr value is SCEVAddRecExpr, has constant stride and the stride + // equals to the size of pointer element type, we know it is consecutive. + if (PtrScev && (PtrAddRec = dyn_cast(PtrScev))) { + const SCEV *Step = PtrAddRec->getStepRecurrence(*SE); + const SCEVConstant *C = dyn_cast(Step); + if (C) { + const APInt &APStepVal = C->getValue()->getValue(); + int64_t StepVal = APStepVal.getSExtValue(); + int64_t ElemSize = + DL.getTypeAllocSize(Ptr->getType()->getPointerElementType()); + if (StepVal == ElemSize) + return 1; + else if (StepVal == -ElemSize) + return -1; + } + } + unsigned InductionOperand = getGEPInductionOperand(Gep); // Check that all of the gep indices are uniform except for our induction @@ -1696,8 +1716,13 @@ VectorParts &Entry = WidenMap.get(Instr); // Handle consecutive loads/stores. + Instruction *GepPtrInst = nullptr; GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + if (Gep && + (GepPtrInst = dyn_cast_or_null(Gep->getPointerOperand())) && + !SE->isLoopInvariant(SE->getSCEV(GepPtrInst), OrigLoop)) { + // The case Gep->getPointerOperand() is an induction variable + // or a SCEVAddRecExpr. setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; Index: test/Transforms/LoopVectorize/pr23580.ll =================================================================== --- test/Transforms/LoopVectorize/pr23580.ll +++ test/Transforms/LoopVectorize/pr23580.ll @@ -0,0 +1,58 @@ +; PR23580 +; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.B = type { i16 } +%class.G = type <{ %struct.F, [2 x i32], i8, [7 x i8] }> +%struct.F = type { i8, i8, i8, i16, i32* } + +@a = global i32 0, align 4 + +declare %struct.B* @_ZN1C5m_fn1Ev() + +; Check geps inside for.body are merged so loop vectorizer can recognize loads +; inside for.body to be inter-iterations consecutive, and generate %wide.loads. +; +; CHECK-LABEL: @fn2( +; CHECK: %wide.load{{[0-9]*}} = +; CHECK: %wide.load{{[0-9]*}} = + +define void @fn2(%class.G* nocapture readonly %this) align 2 { +entry: + br label %for.preheader + +for.preheader: ; preds = %entry + %call = call %struct.B* @_ZN1C5m_fn1Ev() + %tmp4 = load i32, i32* @a, align 4 + %idx.ext = sext i32 %tmp4 to i64 + %add.ptr = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idx.ext + %cmp14.1 = icmp slt i32 1, %tmp4 + br i1 %cmp14.1, label %for.body.lr.ph, label %for.end, !llvm.loop !0 + +for.body.lr.ph: ; preds = %for.preheader + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %k.02 = phi i32 [ 1, %for.body.lr.ph ], [ %add, %for.body ] + %idxprom15 = sext i32 %k.02 to i64 + %arrayidx16 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom15 + %ival = getelementptr inbounds %struct.B, %struct.B* %arrayidx16, i32 0, i32 0 + %tmp9 = load i16, i16* %ival, align 2 + %add = add nsw i32 %k.02, 1 + %arrayidx25 = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idxprom15 + %ival26 = getelementptr inbounds %struct.B, %struct.B* %arrayidx25, i32 0, i32 0 + store i16 %tmp9, i16* %ival26, align 2 + %cmp14 = icmp slt i32 %add, %tmp4 + br i1 %cmp14, label %for.body, label %for.cond.for.end_crit_edge, !llvm.loop !0 + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %for.preheader + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.width", i32 4}