Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -522,6 +522,11 @@ DenseMap, VectorParts>; using BlockMaskCacheTy = DenseMap; + /// Collect the return value of Legal->isConsecutivePtr() for each pointer in + /// the loop. This is done before modifications to the loop which can affect + /// the return value of Legal->isConsecutivePtr(). + void collectIsConsecutivePtr(); + /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *CountRoundDown, Value *EndValue, @@ -757,6 +762,10 @@ // Holds the end values for each induction variable. We save the end values // so we can later fix-up the external users of the induction variables. DenseMap IVEndValues; + + /// Holds the return value of Legal->isConsecutivePtr() for each pointer in + /// the loop, computed before the loop is modified. + DenseMap IsConsecutivePtr; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -3105,7 +3114,9 @@ // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. - int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + assert(IsConsecutivePtr.count(Ptr) && + "Missing IsConsecutivePtr information for this pointer."); + int ConsecutiveStride = IsConsecutivePtr[Ptr]; bool Reverse = ConsecutiveStride < 0; bool CreateGatherScatter = (Decision == LoopVectorizationCostModel::CM_GatherScatter); @@ -3521,6 +3532,10 @@ assert(VectorPH && "Invalid loop structure"); assert(ExitBlock && "Must have an exit block"); + // Collect Legal->isConsecutivePtr() information for all the pointers in the + // loop before it's modified. + collectIsConsecutivePtr(); + // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we @@ -3667,6 +3682,22 @@ return LoopVectorPreHeader; } +void InnerLoopVectorizer::collectIsConsecutivePtr() { + assert(IsConsecutivePtr.empty() && + "IsConsecutivePtr information has been already collected."); + + for (auto *BB : OrigLoop->blocks()) + for (auto &I : *BB) { + // If there's no pointer operand or it was visited before, there's + // nothing to do. + auto *Ptr = dyn_cast_or_null(getPointerOperand(&I)); + if (!Ptr || IsConsecutivePtr.count(Ptr)) + continue; + + IsConsecutivePtr[Ptr] = Legal->isConsecutivePtr(Ptr); + } +} + // Fix up external users of the induction variable. At this point, we are // in LCSSA form, with all external PHIs that use the IV having one input value, // coming from the remainder loop. We need those PHIs to also have a correct Index: test/Transforms/LoopVectorize/consecutive-ptr-cg-bug.ll =================================================================== --- test/Transforms/LoopVectorize/consecutive-ptr-cg-bug.ll +++ test/Transforms/LoopVectorize/consecutive-ptr-cg-bug.ll @@ -0,0 +1,52 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +; During LV code generation, after introducing middle and scalar.ph basic +; blocks, we expected Legal->isConsecutivePtr() to be consistent and return the +; same output as during legal/cost model phases. Unfortunately, there were some +; corner cases where that didn't happen due to a limitation in SE/SLEV. This +; test verifies that LV is able to handle those corner cases. + +; PR34965 + +; Verify that store is vectorized as stride-1 memory access. + +; CHECK: vector.body: +; CHECK: store <4 x i32> + +; Function Attrs: uwtable +define void @test() { + br label %.outer + +;