Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -544,7 +544,8 @@ /// vectorized loop. void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr, - VPValue *StoredValue, VPValue *BlockInMask); + VPValue *StoredValue, VPValue *BlockInMask, + bool ConsecutiveStride, bool Reverse); /// Set the debug location in the builder \p Ptr using the debug location in /// \p V. If \p Ptr is None then it uses the class member's Builder. @@ -2900,7 +2901,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr, - VPValue *StoredValue, VPValue *BlockInMask) { + VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride, + bool Reverse) { // Attempt to issue a wide load. LoadInst *LI = dyn_cast(Instr); StoreInst *SI = dyn_cast(Instr); @@ -2909,13 +2911,6 @@ assert((!SI || StoredValue) && "No stored value provided for widened store"); assert((!LI || !StoredValue) && "Stored value provided for widened load"); - LoopVectorizationCostModel::InstWidening Decision = - Cost->getWideningDecision(Instr, VF); - assert((Decision == LoopVectorizationCostModel::CM_Widen || - Decision == LoopVectorizationCostModel::CM_Widen_Reverse || - Decision == LoopVectorizationCostModel::CM_GatherScatter) && - "CM decision is not to widen the memory instruction"); - Type *ScalarDataTy = getLoadStoreType(Instr); auto *DataTy = VectorType::get(ScalarDataTy, VF); @@ -2923,11 +2918,7 @@ // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. - bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse); - bool ConsecutiveStride = - Reverse || (Decision == LoopVectorizationCostModel::CM_Widen); - bool CreateGatherScatter = - (Decision == LoopVectorizationCostModel::CM_GatherScatter); + bool CreateGatherScatter = !ConsecutiveStride; // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector // gather/scatter. Otherwise Decision should have been to Scalarize. @@ -8783,12 +8774,21 @@ if (Legal->isMaskRequired(I)) Mask = createBlockInMask(I->getParent(), Plan); + // Determine if the pointer operand of the access is either consecutive or + // reverse consecutive. + LoopVectorizationCostModel::InstWidening Decision = + CM.getWideningDecision(I, Range.Start); + bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse); + bool Consecutive = + Reverse || (Decision == LoopVectorizationCostModel::CM_Widen); + if (LoadInst *Load = dyn_cast(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask); + return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask, + Consecutive, Reverse); StoreInst *Store = cast(I); return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], - Mask); + Mask, Consecutive, Reverse); } VPWidenIntOrFpInductionRecipe * @@ -9883,7 +9883,7 @@ VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; State.ILV->vectorizeMemoryInstruction( &Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(), - StoredValue, getMask()); + StoredValue, getMask(), Consecutive, Reverse); } // Determine how to lower the scalar epilogue, which depends on 1) optimising Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -1514,6 +1514,9 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { Instruction &Ingredient; + bool Consecutive; + bool Reverse; + void setMask(VPValue *Mask) { if (!Mask) return; @@ -1525,16 +1528,19 @@ } public: - VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load) { + VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, + bool Consecutive, bool Reverse) + : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load), + Consecutive(Consecutive), Reverse(Reverse) { new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, - VPValue *StoredValue, VPValue *Mask) + VPValue *StoredValue, VPValue *Mask, + bool Consecutive, bool Reverse) : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), - Ingredient(Store) { + Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { setMask(Mask); } @@ -1564,6 +1570,12 @@ return getOperand(1); // Stored value is the 2nd, mandatory operand. } + // Return whether the loaded-from / stored-to addresses are consecutive. + bool isConsecutive() const { return Consecutive; } + + // Return whether the loaded/stored addresses are in reverse order. + bool isReverse() const { return Reverse; } + /// Generate the wide load/store. void execute(VPTransformState &State) override; Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -61,12 +61,12 @@ if (LoadInst *Load = dyn_cast(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), - nullptr /*Mask*/); + nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); } else if (StoreInst *Store = dyn_cast(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), - Plan->getOrAddVPValue(Store->getValueOperand()), - nullptr /*Mask*/); + Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, + false /*Consecutive*/, false /*Reverse*/); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { NewRecipe = new VPWidenGEPRecipe( GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); Index: llvm/unittests/Transforms/Vectorize/VPlanTest.cpp =================================================================== --- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -936,7 +936,7 @@ new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask); + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1032,7 +1032,7 @@ new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask); + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1046,7 +1046,8 @@ VPValue Addr; VPValue Mask; VPValue StoredV; - VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask); + VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, + false); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory());