Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoopVectorize.cpp +++ ../lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2127,80 +2127,41 @@ return Builder.CreateAdd(Val, Step, "induction"); } -int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); - auto *SE = PSE.getSE(); - // Make sure that the pointer does not point to structs. - if (Ptr->getType()->getPointerElementType()->isAggregateType()) - return 0; - - // If this value is a pointer induction variable, we know it is consecutive. - PHINode *Phi = dyn_cast_or_null(Ptr); - if (Phi && Inductions.count(Phi)) { - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } +static int getStrideFromGEPIndex(PredicatedScalarEvolution &PSE, + GetElementPtrInst *Gep, + const Loop *Lp, + const ValueToValueMap &StridesMap) { - GetElementPtrInst *Gep = getGEPInstruction(Ptr); - if (!Gep) + auto *SE = PSE.getSE(); + if (Gep->getNumOperands() != 2) return 0; - unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant, then we know it is consecutive. - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { - - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; - - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - + // Induction operand is the index of the GEP, not a pointer. unsigned InductionOperand = getGEPInductionOperand(Gep); + if (InductionOperand != 1 || + !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(0)), Lp)) + return 0; - // Check that all of the gep indices are uniform except for our induction - // operand. - for (unsigned i = 0; i != NumOperands; ++i) - if (i != InductionOperand && - !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; + // Because of the multiplication by a stride we can have a s/zext cast. + // We are going to replace this stride by 1 so the cast is safe to ignore. + // + // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + // %0 = trunc i64 %indvars.iv to i32 + // %mul = mul i32 %0, %Stride1 + // %idxprom = zext i32 %mul to i64 << Safe cast. + // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom + // + const SCEV *Last = replaceSymbolicStrideSCEV(PSE, StridesMap, + Gep->getOperand(InductionOperand), + Gep); + + if (const SCEVCastExpr *C = dyn_cast(Last)) + Last = + (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) + ? C->getOperand() : Last; - // We can emit wide load/stores only if the last non-zero index is the - // induction variable. - const SCEV *Last = nullptr; - if (!Strides.count(Gep)) - Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); - else { - // Because of the multiplication by a stride we can have a s/zext cast. - // We are going to replace this stride by 1 so the cast is safe to ignore. - // - // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - // %0 = trunc i64 %indvars.iv to i32 - // %mul = mul i32 %0, %Stride1 - // %idxprom = zext i32 %mul to i64 << Safe cast. - // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom - // - Last = replaceSymbolicStrideSCEV(PSE, Strides, - Gep->getOperand(InductionOperand), Gep); - if (const SCEVCastExpr *C = dyn_cast(Last)) - Last = - (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) - ? C->getOperand() - : Last; - } if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { const SCEV *Step = AR->getStepRecurrence(*SE); - // The memory is consecutive because the last index is consecutive // and all other indices are loop invariant. if (Step->isOne()) @@ -2208,7 +2169,24 @@ if (Step->isAllOnesValue()) return -1; } + return 0; +} +int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { + assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + + // Make sure that the pointer does not point to structs. + if (Ptr->getType()->getPointerElementType()->isAggregateType()) + return 0; + + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides); + if (Stride == 1 || Stride == -1) + return Stride; + + // One more case that we want to check is strided access with stride 1 + GetElementPtrInst *Gep = getGEPInstruction(Ptr); + if (Gep) + return getStrideFromGEPIndex(PSE, Gep, TheLoop, Strides); return 0; } @@ -2589,7 +2567,8 @@ Ptr = Builder.Insert(Gep2); } else { // No GEP // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); + assert(isa(PSE.getSE()->getSCEV(Ptr)) && + "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); VectorParts &PtrVal = getVectorValue(Ptr); Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); Index: ../test/Transforms/LoopVectorize/consec_no_gep.ll =================================================================== --- ../test/Transforms/LoopVectorize/consec_no_gep.ll +++ ../test/Transforms/LoopVectorize/consec_no_gep.ll @@ -0,0 +1,45 @@ +;RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; Check consecutive memory access without preceding GEP instruction + +; for (int i=0; i + +define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { +entry: + %cmp2 = icmp sgt i32 %len, 0 + br i1 %cmp2, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ] + %to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1 + %0 = bitcast float* %from.addr.04 to i32* + %1 = load i32, i32* %0, align 4 + %incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1 + %2 = bitcast float* %to.addr.03 to i32* + store i32 %1, i32* %2, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %len + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +}