Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoopVectorize.cpp +++ ../lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2134,38 +2134,30 @@ if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; - // If this value is a pointer induction variable, we know it is consecutive. - PHINode *Phi = dyn_cast_or_null(Ptr); - if (Phi && Inductions.count(Phi)) { - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); + // If this value is a recurrent expression with a constant step, + // and the step is equal to object size, we know it is consecutive. + + if (auto *AddRec = dyn_cast(SE->getSCEV(Ptr))) { + // The absolute step value should be equal to element size. + // Negative step means reversed access. + const SCEVConstant *Step = + dyn_cast(AddRec->getStepRecurrence(*SE)); + if (Step) + if (auto *ConstStep = dyn_cast(Step->getValue())) { + const DataLayout &DL = TheFunction->getParent()->getDataLayout(); + unsigned AllocSize = + DL.getTypeAllocSize(Ptr->getType()->getPointerElementType()); + if (ConstStep->getValue().abs() == AllocSize) + return ConstStep->getValue().getSExtValue() / AllocSize; + } } + // One more case that we want to check is strided access with stride 1 GetElementPtrInst *Gep = getGEPInstruction(Ptr); - if (!Gep) + if (!Gep || !Strides.count(Gep)) return 0; unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant, then we know it is consecutive. - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { - - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; - - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - unsigned InductionOperand = getGEPInductionOperand(Gep); // Check that all of the gep indices are uniform except for our induction @@ -2175,29 +2167,25 @@ !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; - // We can emit wide load/stores only if the last non-zero index is the - // induction variable. - const SCEV *Last = nullptr; - if (!Strides.count(Gep)) - Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); - else { - // Because of the multiplication by a stride we can have a s/zext cast. - // We are going to replace this stride by 1 so the cast is safe to ignore. - // - // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - // %0 = trunc i64 %indvars.iv to i32 - // %mul = mul i32 %0, %Stride1 - // %idxprom = zext i32 %mul to i64 << Safe cast. - // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom - // - Last = replaceSymbolicStrideSCEV(PSE, Strides, - Gep->getOperand(InductionOperand), Gep); - if (const SCEVCastExpr *C = dyn_cast(Last)) - Last = - (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) - ? C->getOperand() - : Last; - } + // Because of the multiplication by a stride we can have a s/zext cast. + // We are going to replace this stride by 1 so the cast is safe to ignore. + // + // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + // %0 = trunc i64 %indvars.iv to i32 + // %mul = mul i32 %0, %Stride1 + // %idxprom = zext i32 %mul to i64 << Safe cast. + // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom + // + const SCEV *Last = + replaceSymbolicStrideSCEV(PSE, Strides, Gep->getOperand(InductionOperand), + Gep); + + if (const SCEVCastExpr *C = dyn_cast(Last)) + Last = + (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) + ? C->getOperand() + : Last; + if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { const SCEV *Step = AR->getStepRecurrence(*SE); @@ -2208,7 +2196,6 @@ if (Step->isAllOnesValue()) return -1; } - return 0; } @@ -2589,7 +2576,8 @@ Ptr = Builder.Insert(Gep2); } else { // No GEP // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); + assert(isa(PSE.getSE()->getSCEV(Ptr)) && + "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); VectorParts &PtrVal = getVectorValue(Ptr); Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); Index: ../test/Transforms/LoopVectorize/consec_no_gep.ll =================================================================== --- ../test/Transforms/LoopVectorize/consec_no_gep.ll +++ ../test/Transforms/LoopVectorize/consec_no_gep.ll @@ -0,0 +1,45 @@ +;RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; Check consecutive memory access without preceding GEP instruction + +; for (int i=0; i + +define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { +entry: + %cmp2 = icmp sgt i32 %len, 0 + br i1 %cmp2, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ] + %to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1 + %0 = bitcast float* %from.addr.04 to i32* + %1 = load i32, i32* %0, align 4 + %incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1 + %2 = bitcast float* %to.addr.03 to i32* + store i32 %1, i32* %2, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %len + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +}