Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1556,25 +1556,61 @@ if (!Gep) return 0; - unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant then we know it is consecutive. We can - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { + const DataLayout &DL = Gep->getModule()->getDataLayout(); + unsigned GEPAllocSize = DL.getTypeAllocSize( + cast(Gep->getType()->getScalarType())->getElementType()); - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; + unsigned NumOperands; + while (Gep) { + NumOperands = Gep->getNumOperands(); + Value *GpPtr = Gep->getPointerOperand(); + Phi = dyn_cast(GpPtr); + if (Phi && Inductions.count(Phi)) { + // If this GEP value is a consecutive pointer induction variable and + // all of the indices are constant then we know it is consecutive. + + // Make sure that the pointer does not point to structs. + PointerType *GepPtrType = cast(GpPtr->getType()); + + // Make sure that all of the index operands are loop invariant. + for (unsigned i = 1; i < NumOperands; ++i) + if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; + if (GepPtrType->getElementType()->isAggregateType()) { + if ((Gep = dyn_cast(GpPtr))) + continue; + else + return 0; + } + InductionInfo II = Inductions[Phi]; + return II.getConsecutiveDirection(); + } else { + // If the pointer operand of the GEP is a SCEVAddRecExpr, and all the + // other operand is 0, and the pointer operand is another + // GetElementPtrInst, recursively find the induction variable in the + // pointer operand. + const SCEV *PtrScev = SE->getSCEV(GpPtr); + if (dyn_cast(PtrScev)) { + for (unsigned i = 1; i < NumOperands; ++i) + if (!match(Gep->getOperand(i), m_Zero())) + return 0; + + Gep = dyn_cast(GpPtr); + if (!Gep) + return 0; + + unsigned NewAllocSize = DL.getTypeAllocSize( + cast(Gep->getType()->getScalarType()) + ->getElementType()); + if (GEPAllocSize != NewAllocSize) + return 0; - InductionInfo II = Inductions[Phi]; - return II.getConsecutiveDirection(); + continue; + } else { + break; + } + } } unsigned InductionOperand = getGEPInductionOperand(Gep); @@ -1696,8 +1732,13 @@ VectorParts &Entry = WidenMap.get(Instr); // Handle consecutive loads/stores. + Instruction *GepPtrInst = nullptr; GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + if (Gep && + (GepPtrInst = dyn_cast_or_null(Gep->getPointerOperand())) && + !SE->isLoopInvariant(SE->getSCEV(GepPtrInst), OrigLoop)) { + // The case Gep->getPointerOperand() is an induction variable + // or a SCEVAddRecExpr. setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; Index: test/Transforms/LoopVectorize/pr23580.ll =================================================================== --- test/Transforms/LoopVectorize/pr23580.ll +++ test/Transforms/LoopVectorize/pr23580.ll @@ -0,0 +1,57 @@ +; PR23580 +; RUN: opt < %s -loop-rotate -loop-vectorize -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.anon = type { [0 x %class.C] } +%class.C = type { i8 } +%struct.B = type { i16 } +%class.G = type <{ %struct.F, [2 x i32], i8, [7 x i8] }> +%struct.F = type { i8, i8, i8, i16, i32* } + +@a = global i32 0, align 4 +@d = internal global %struct.anon zeroinitializer, align 1 + +declare %struct.B* @_ZN1C5m_fn1Ev() + +; Check geps inside for.body are merged so loop vectorizer can recognize loads +; inside for.body to be inter-iterations consecutive, and generate %wide.loads. +; +; CHECK-LABEL: @fn2( +; CHECK: %wide.load{{[0-9]*}} = +; CHECK: %wide.load{{[0-9]*}} = + +define void @fn2(%class.G* nocapture readonly %this) align 2 { +entry: + br label %for.preheader + +for.preheader: + %call = call %struct.B* @_ZN1C5m_fn1Ev() + %tmp4 = load i32, i32* @a, align 4 + %idx.ext = sext i32 %tmp4 to i64 + %add.ptr = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idx.ext + br label %for.cond + +for.cond: + %k.0 = phi i32 [ 1, %for.preheader ], [ %add, %for.body ] + %cmp14 = icmp slt i32 %k.0, %tmp4 + br i1 %cmp14, label %for.body, label %for.end, !llvm.loop !0 + +for.body: + %idxprom15 = sext i32 %k.0 to i64 + %arrayidx16 = getelementptr inbounds %struct.B, %struct.B* %add.ptr, i64 %idxprom15 + %ival = getelementptr inbounds %struct.B, %struct.B* %arrayidx16, i32 0, i32 0 + %tmp9 = load i16, i16* %ival, align 2 + %add = add nsw i32 %k.0, 1 + %arrayidx25 = getelementptr inbounds %struct.B, %struct.B* %call, i64 %idxprom15 + %ival26 = getelementptr inbounds %struct.B, %struct.B* %arrayidx25, i32 0, i32 0 + store i16 %tmp9, i16* %ival26, align 2 + br label %for.cond + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.width", i32 4}