Index: ../lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- ../lib/Analysis/LoopAccessAnalysis.cpp +++ ../lib/Analysis/LoopAccessAnalysis.cpp @@ -838,6 +838,57 @@ return false; } +// Return 1, -1 or 0 for consecutive, reverse and non-consecutive access. +// Check Step of the induction index of the GEP. +static int +getConsecutiveAccessFromGEPIndex(PredicatedScalarEvolution &PSE, + GetElementPtrInst *Gep, + const Loop *Lp, + const ValueToValueMap &StridesMap) { + + auto *SE = PSE.getSE(); + unsigned NumOperands = Gep->getNumOperands(); + + // Induction operand is the index of the GEP, not a pointer. + // We are going to check the recurrence step of this index, + // so it should be the last relevant index of the GEP. + unsigned InductionOperand = getGEPInductionOperand(Gep); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0; i < InductionOperand; ++i) + if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), Lp)) + return 0; + + // Because of the multiplication by a stride we can have a s/zext cast. + // We are going to replace this stride by 1 so the cast is safe to ignore. + // + // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + // %0 = trunc i64 %indvars.iv to i32 + // %mul = mul i32 %0, %Stride1 + // %idxprom = zext i32 %mul to i64 << Safe cast. + // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom + // + const SCEV *Last = replaceSymbolicStrideSCEV(PSE, StridesMap, + Gep->getOperand(InductionOperand), + Gep); + + if (const SCEVCastExpr *C = dyn_cast(Last)) + if (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) + Last = C->getOperand(); + + if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { + const SCEV *Step = AR->getStepRecurrence(*SE); + // The memory is consecutive because the last index is consecutive + // and all other indices are loop invariant. + if (Step->isOne()) + return 1; + if (Step->isAllOnesValue()) + return -1; + } + return 0; +} + /// \brief Check whether the access through \p Ptr has a constant stride. int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, @@ -859,6 +910,12 @@ if (Assume && !AR) AR = PSE.getAsAddRec(Ptr); + if (!AR) + // One more attempt to get stride from GEP with index calculated in runtime + if (auto *Gep = dyn_cast(Ptr)) + if (int Stride = getConsecutiveAccessFromGEPIndex(PSE, Gep, Lp, StridesMap)) + return Stride; + if (!AR) { DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr << " SCEV: " << *PtrScev << "\n"); Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoopVectorize.cpp +++ ../lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2129,86 +2129,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); - auto *SE = PSE.getSE(); - // Make sure that the pointer does not point to structs. - if (Ptr->getType()->getPointerElementType()->isAggregateType()) - return 0; - - // If this value is a pointer induction variable, we know it is consecutive. - PHINode *Phi = dyn_cast_or_null(Ptr); - if (Phi && Inductions.count(Phi)) { - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - - GetElementPtrInst *Gep = getGEPInstruction(Ptr); - if (!Gep) - return 0; - - unsigned NumOperands = Gep->getNumOperands(); - Value *GpPtr = Gep->getPointerOperand(); - // If this GEP value is a consecutive pointer induction variable and all of - // the indices are constant, then we know it is consecutive. - Phi = dyn_cast(GpPtr); - if (Phi && Inductions.count(Phi)) { - - // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) - return 0; - - // Make sure that all of the index operands are loop invariant. - for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - InductionDescriptor II = Inductions[Phi]; - return II.getConsecutiveDirection(); - } - - unsigned InductionOperand = getGEPInductionOperand(Gep); - - // Check that all of the gep indices are uniform except for our induction - // operand. - for (unsigned i = 0; i != NumOperands; ++i) - if (i != InductionOperand && - !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) - return 0; - - // We can emit wide load/stores only if the last non-zero index is the - // induction variable. - const SCEV *Last = nullptr; - if (!Strides.count(Gep)) - Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); - else { - // Because of the multiplication by a stride we can have a s/zext cast. - // We are going to replace this stride by 1 so the cast is safe to ignore. - // - // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - // %0 = trunc i64 %indvars.iv to i32 - // %mul = mul i32 %0, %Stride1 - // %idxprom = zext i32 %mul to i64 << Safe cast. - // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom - // - Last = replaceSymbolicStrideSCEV(PSE, Strides, - Gep->getOperand(InductionOperand), Gep); - if (const SCEVCastExpr *C = dyn_cast(Last)) - Last = - (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) - ? C->getOperand() - : Last; - } - if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { - const SCEV *Step = AR->getStepRecurrence(*SE); - - // The memory is consecutive because the last index is consecutive - // and all other indices are loop invariant. - if (Step->isOne()) - return 1; - if (Step->isAllOnesValue()) - return -1; - } + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides); + if (Stride == 1 || Stride == -1) + return Stride; return 0; } @@ -2589,7 +2513,8 @@ Ptr = Builder.Insert(Gep2); } else { // No GEP // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); + assert(isa(PSE.getSE()->getSCEV(Ptr)) && + "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); VectorParts &PtrVal = getVectorValue(Ptr); Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); Index: ../test/Transforms/LoopVectorize/consec_no_gep.ll =================================================================== --- ../test/Transforms/LoopVectorize/consec_no_gep.ll +++ ../test/Transforms/LoopVectorize/consec_no_gep.ll @@ -0,0 +1,45 @@ +;RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; Check consecutive memory access without preceding GEP instruction + +; for (int i=0; i + +define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { +entry: + %cmp2 = icmp sgt i32 %len, 0 + br i1 %cmp2, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ] + %to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1 + %0 = bitcast float* %from.addr.04 to i32* + %1 = load i32, i32* %0, align 4 + %incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1 + %2 = bitcast float* %to.addr.03 to i32* + store i32 %1, i32* %2, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %len + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +}