diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -535,7 +535,7 @@ ArrayRef VPDefs, VPTransformState &State, VPValue *Addr, ArrayRef StoredValues, - VPValue *BlockInMask = nullptr); + VPValue *BlockInMask, bool NeedsMaskForGaps); /// Fix the non-induction PHIs in \p Plan. void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State); @@ -2610,7 +2610,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( const InterleaveGroup *Group, ArrayRef VPDefs, VPTransformState &State, VPValue *Addr, ArrayRef StoredValues, - VPValue *BlockInMask) { + VPValue *BlockInMask, bool NeedsMaskForGaps) { Instruction *Instr = Group->getInsertPos(); const DataLayout &DL = Instr->getModule()->getDataLayout(); @@ -2671,7 +2671,7 @@ // Vectorize the interleaved load group. if (isa(Instr)) { Value *MaskForGaps = nullptr; - if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) { + if (NeedsMaskForGaps) { MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group); assert(MaskForGaps && "Mask for Gaps is required but it is null"); @@ -9033,8 +9033,10 @@ StoredValues.push_back(StoreR->getStoredValue()); } + bool NeedsMaskForGaps = + IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed(); auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues, - Recipe->getMask()); + Recipe->getMask(), NeedsMaskForGaps); VPIG->insertBefore(Recipe); unsigned J = 0; for (unsigned i = 0; i < IG->getFactor(); ++i) @@ -9487,7 +9489,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(), - getStoredValues(), getMask()); + getStoredValues(), getMask(), + NeedsMaskForGaps); } void VPReductionRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1391,12 +1391,20 @@ class VPInterleaveRecipe : public VPRecipeBase { const InterleaveGroup *IG; + /// Indicates if the interleave group is in a conditional block and requires a + /// mask. bool HasMask = false; + /// Indicates if gaps between members of the group need to be masked out or if + /// unusued gaps can be loaded speculatively. + bool NeedsMaskForGaps = false; + public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, - ArrayRef StoredValues, VPValue *Mask) - : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) { + ArrayRef StoredValues, VPValue *Mask, + bool NeedsMaskForGaps) + : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG), + NeedsMaskForGaps(NeedsMaskForGaps) { for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *I = IG->getMember(i)) { if (I->getType()->isVoidTy()) diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -960,7 +960,7 @@ VPValue Addr; VPValue Mask; InterleaveGroup IG(4, false, Align(4)); - VPInterleaveRecipe Recipe(&IG, &Addr, {}, &Mask); + VPInterleaveRecipe Recipe(&IG, &Addr, {}, &Mask, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR));