Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8259,19 +8259,24 @@ // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. - LoopVectorizationCostModel::InstWidening Decision = - CM.getWideningDecision(I, Range.Start); - bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse; - bool Consecutive = - Reverse || Decision == LoopVectorizationCostModel::CM_Widen; + const std::optional Stride = [&]() -> std::optional { + switch (CM.getWideningDecision(I, Range.Start)) { + default: + return std::nullopt; + case LoopVectorizationCostModel::CM_Widen_Reverse: + return -1; + case LoopVectorizationCostModel::CM_Widen: + return 1; + } + }(); if (LoadInst *Load = dyn_cast(I)) return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask, - Consecutive, Reverse); + Stride); StoreInst *Store = cast(I); return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], - Mask, Consecutive, Reverse); + Mask, Stride); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -9664,7 +9669,7 @@ auto *DataTy = VectorType::get(ScalarDataTy, State.VF); const Align Alignment = getLoadStoreAlignment(&Ingredient); - bool CreateGatherScatter = !Consecutive; + bool CreateGatherScatter = !isConsecutive(); auto &Builder = State.Builder; InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); @@ -9681,13 +9686,13 @@ // or query DataLayout for a more suitable index type otherwise. const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (Reverse || Part > 0) + Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0) ? DL.getIndexType(ScalarDataTy->getPointerTo()) : Builder.getInt32Ty(); bool InBounds = false; if (auto *gep = dyn_cast(Ptr->stripPointerCasts())) InBounds = gep->isInBounds(); - if (Reverse) { + if (isReverse()) { // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. // RunTimeVF = VScale * VF.getKnownMinValue() @@ -9727,7 +9732,7 @@ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, MaskPart); } else { - if (Reverse) { + if (isReverse()) { // If we store to reverse consecutive memory locations, then we need // to reverse the order of elements in the stored value. StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); @@ -9771,7 +9776,7 @@ // Add metadata to the load, but setVectorValue to the reverse shuffle. State.addMetadata(NewLI, LI); - if (Reverse) + if (isReverse()) NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); } Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -1693,11 +1693,15 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { Instruction &Ingredient; - // Whether the loaded-from / stored-to addresses are consecutive. - bool Consecutive; - - // Whether the consecutive loaded/stored addresses are in reverse order. - bool Reverse; + // The (constant) stride of the memory access as lowered. Note that this is + // a lowering choice; there can be strided accesses in the source which we've + // decided to lower via gather/scatter and they will not have the stride set + // here. Interesting special cases are: + // * 1 -- the loaded-from / stored-to addresses are consecutive. Will be + // lowered as a single wide access. + // * -1 - the consecutive loaded/stored addresses are in reverse order. + // Will be lowered as a wide access followed by a vector reverse. + std::optional Stride; void setMask(VPValue *Mask) { if (!Mask) @@ -1711,20 +1715,18 @@ public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, - bool Consecutive, bool Reverse) + std::optional Stride) : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}), - Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + Ingredient(Load), Stride(Stride) { new VPValue(this, &Load); setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask, - bool Consecutive, bool Reverse) + std::optional Stride) : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}), - Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + Ingredient(Store), Stride(Stride) { setMask(Mask); } @@ -1752,11 +1754,13 @@ } // Return whether the loaded-from / stored-to addresses are consecutive. - bool isConsecutive() const { return Consecutive; } + bool isConsecutive() const { + return Stride && (*Stride == 1 || *Stride == -1); + } // Return whether the consecutive loaded/stored addresses are in reverse // order. - bool isReverse() const { return Reverse; } + bool isReverse() const { return Stride && *Stride == -1; } /// Generate the wide load/store. void execute(VPTransformState &State) override; Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -69,12 +69,12 @@ if (LoadInst *Load = dyn_cast(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), - nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); + nullptr /*Mask*/, std::nullopt /*stride*/); } else if (StoreInst *Store = dyn_cast(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, - false /*Consecutive*/, false /*Reverse*/); + std::nullopt /*stride*/); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, Plan->mapToVPValues(GEP->operands())); Index: llvm/unittests/Transforms/Vectorize/VPlanTest.cpp =================================================================== --- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1002,7 +1002,7 @@ new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false); + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, 1); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1097,7 +1097,7 @@ new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false); + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, 1); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1111,8 +1111,8 @@ VPValue Addr; VPValue Mask; VPValue StoredV; - VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, - false); + VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, + std::nullopt); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory());