Index: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h @@ -713,7 +713,7 @@ /// run-time assumptions. int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), - bool Assume = false); + bool Assume = false, bool ShouldCheckWrap = true); /// \brief Returns true if the memory operations \p A and \p B are consecutive. /// This is a simple API that does not depend on the analysis pass. Index: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp @@ -887,7 +887,7 @@ /// \brief Check whether the access through \p Ptr has a constant stride. int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, - bool Assume) { + bool Assume, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -926,9 +926,9 @@ // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = - PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || - isNoWrapAddRec(Ptr, AR, PSE, Lp); + bool IsNoWrapAddRec = !ShouldCheckWrap || + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { if (Assume) { Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2296,87 +2296,13 @@ } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); - auto *SE = PSE.getSE(); - // Make sure that the pointer does not point to structs. - if (Ptr->getType()->getPointerElementType()->isAggregateType()) - return 0; - - // If this value is a pointer induction variable, we know it is consecutive. - PHINode *Phi = dyn_cast_or_null(Ptr); - if (Phi && Inductions.count(Phi)) { - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - GetElementPtrInst *Gep = getGEPInstruction(Ptr); - if (!Gep) - return 0; - - unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant, then we know it is consecutive. - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { - - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; - - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - - unsigned InductionOperand = getGEPInductionOperand(Gep); - - // Check that all of the gep indices are uniform except for our induction - // operand. - for (unsigned i = 0; i != NumOperands; ++i) - if (i != InductionOperand && - !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - // We can emit wide load/stores only if the last non-zero index is the - // induction variable. - const SCEV *Last = nullptr; - if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep)) - Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); - else { - // Because of the multiplication by a stride we can have a s/zext cast. - // We are going to replace this stride by 1 so the cast is safe to ignore. - // - // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - // %0 = trunc i64 %indvars.iv to i32 - // %mul = mul i32 %0, %Stride1 - // %idxprom = zext i32 %mul to i64 << Safe cast. - // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom - // - Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(), - Gep->getOperand(InductionOperand), Gep); - if (const SCEVCastExpr *C = dyn_cast(Last)) - Last = - (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) - ? C->getOperand() - : Last; - } - if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { - const SCEV *Step = AR->getStepRecurrence(*SE); - - // The memory is consecutive because the last index is consecutive - // and all other indices are loop invariant. - if (Step->isOne()) - return 1; - if (Step->isAllOnesValue()) - return -1; - } + const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : + ValueToValueMap(); + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + if (Stride == 1 || Stride == -1) + return Stride; return 0; } @@ -2813,8 +2739,6 @@ Ptr = Builder.Insert(Gep2); } else { // No GEP - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); Ptr = getScalarValue(Ptr, 0, 0); } Index: llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll +++ llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll @@ -0,0 +1,43 @@ +;RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; Check consecutive memory access without preceding GEP instruction + +; for (int i=0; i + +define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { +entry: + %cmp2 = icmp sgt i32 %len, 0 + br i1 %cmp2, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ] + %to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1 + %val = load float, float* %from.addr.04, align 4 + %incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1 + store float %val, float* %to.addr.03, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %len + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +}