Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1581,10 +1581,15 @@ /// to be vectorized. bool blockNeedsPredication(BasicBlock *BB); + /// Collect strides of pointers that are used in loads/store instructions. + void collectPtrStrides(); + /// Check if this pointer is consecutive when vectorizing. This happens /// when the last index of the GEP is the induction variable, or that the /// pointer itself is an induction variable. /// This check allows us to vectorize A[idx] into a wide load/store. + /// Requirements: pointer strides must have been collected before by + /// collectPtrStrides. /// Returns: /// 0 - Stride is unknown or non-consecutive. /// 1 - Address is consecutive. @@ -1786,6 +1791,10 @@ /// first-order recurrences. DenseMap SinkAfter; + /// Holds the stride of pointers that are used in load/store instructions. + /// It is populated by collectPtrStrides. + DenseMap PtrStrides; + /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; @@ -2780,11 +2789,30 @@ } } +void LoopVectorizationLegality::collectPtrStrides() { + if (!PtrStrides.empty()) + PtrStrides.clear(); + + for (auto *BB : TheLoop->blocks()) + for (auto &I : *BB) { + // If there's no pointer operand or it was visited before, there's + // nothing to do. + auto *Ptr = dyn_cast_or_null(getPointerOperand(&I)); + if (!Ptr || PtrStrides.count(Ptr)) + continue; + + const ValueToValueMap &Strides = + getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); + + PtrStrides[Ptr] = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + } +} + int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : - ValueToValueMap(); + assert(!PtrStrides.empty() && "Pointer strides have not been collected yet."); + assert(PtrStrides.count(Ptr) && "Missing pointer stride."); - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + int Stride = PtrStrides[Ptr]; if (Stride == 1 || Stride == -1) return Stride; return 0; @@ -5119,6 +5147,10 @@ DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() << '\n'); + // Collect strides of pointers used by memory instructions. To be used by + // subsequent canVectorize* functions. + collectPtrStrides(); + // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { @@ -5853,6 +5885,9 @@ Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); PSE.addPredicate(LAI->getPSE().getUnionPredicate()); + // Runtime checks introduced in PSE may allow to compute more accurate pointer + // strides so we re-collect them to reflect this new information. + collectPtrStrides(); return true; } Index: test/Transforms/LoopVectorize/consecutive-ptr-cg-bug.ll =================================================================== --- test/Transforms/LoopVectorize/consecutive-ptr-cg-bug.ll +++ test/Transforms/LoopVectorize/consecutive-ptr-cg-bug.ll @@ -0,0 +1,52 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +; During LV code generation, after introducing middle and scalar.ph basic +; blocks, we expected Legal->isConsecutivePtr() to be consistent and return the +; same output as during legal/cost model phases. Unfortunately, there were some +; corner cases where that didn't happen due to a limitation in SE/SLEV. This +; test verifies that LV is able to handle those corner cases. + +; PR34965 + +; Verify that store is vectorized as stride-1 memory access. + +; CHECK: vector.body: +; CHECK: store <4 x i32> + +; Function Attrs: uwtable +define void @test() { + br label %.outer + +;