This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Transforms/Vectorize/
-
Transforms/
-
Vectorize/
4/4
LoopVectorize.cpp
2/2
VPlan.h
-
unittests/Transforms/Vectorize/
-
Transforms/
-
Vectorize/
-
VPlanTest.cpp

Differential D147467

[VPlan] Add VPInterleaveRecipe::NeedsMaskForGaps field (NFCI).
ClosedPublic

Authored by fhahn on Apr 3 2023, 1:37 PM.

Download Raw Diff

Details

Reviewers

Ayal
gilr
rengolin

Commits

rG11896357d413: [VPlan] Add VPInterleaveRecipe::NeedsMaskForGaps field (NFCI).

Summary

This patch adds a NeedsMaskForGaps field to VPInterleaveRecipe to record
whether a mask for gaps is needed. This removes a dependence on the cost
model in VPlan code-generation.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

fhahn created this revision.Apr 3 2023, 1:37 PM

Herald added a project: Restricted Project. · View Herald TranscriptApr 3 2023, 1:37 PM

Herald added subscribers: StephenFan, tschuett, psnobl and 3 others. · View Herald Transcript

fhahn requested review of this revision.Apr 3 2023, 1:37 PM

Herald added a project: Restricted Project. · View Herald TranscriptApr 3 2023, 1:37 PM

Herald added subscribers: • pcwang-thead, vkmr. · View Herald Transcript

Harbormaster completed remote builds in B223420: Diff 510589.Apr 3 2023, 3:49 PM

Whether masking of suffix gaps in interleaved loads can be optimized away or not should indeed be recorded during planning rather than IR generation.
Adding a couple of nits.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
2670–2671	nit (Independent of this patch): MaskForGaps is set here for loads and later reset for stores. Better to either set it here for both or place this setting inside the handling of loads.
9041	nit: may be clearer to first set `bool NeedsMaskForGaps = IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed();` and then use it, instead of setting it inline.
llvm/lib/Transforms/Vectorize/VPlan.h
1398	nit: would be good to document and clarify or rename the two masks: `HasMask` indicates if the interleave group is inside a conditional basic block, i.e., HasBlockMask or HasMaskForBlock. `NeedsMaskForGaps` indicates if the interleave group of loads is allowed to speculatively load absent unused members or must avoid doing so by using a mask, i.e., !CanSpeculativelyLoadWithoutMask, where the speculation relies on loading present members on both sides of missing ones - gaps.

This revision is now accepted and ready to land.Apr 5 2023, 4:11 AM

fhahn mentioned this in rG3f36b9b456ac: [LV] Move conditional MaskForGaps construction to load case..Apr 6 2023, 1:16 PM

Rebase and address nits, thanks! I am planning to land this soon.

Harbormaster completed remote builds in B224197: Diff 511657.Apr 7 2023, 4:44 AM

Closed by commit rG11896357d413: [VPlan] Add VPInterleaveRecipe::NeedsMaskForGaps field (NFCI). (authored by fhahn). · Explain WhyApr 7 2023, 5:11 AM

This revision was automatically updated to reflect the committed changes.

fhahn added a commit: rG11896357d413: [VPlan] Add VPInterleaveRecipe::NeedsMaskForGaps field (NFCI)..

fhahn marked 3 inline comments as done.Apr 7 2023, 5:12 AM

fhahn added inline comments.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
2670–2671	Moved inside the load handling: 3f36b9b456ac4bfa695e253926daa87cd9838550
9041	Should be adjusted, thanks!
llvm/lib/Transforms/Vectorize/VPlan.h
1398	Added comments, thanks!

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Vectorize/

LoopVectorize.cpp

13 lines

VPlan.h

12 lines

unittests/

Transforms/

Vectorize/

VPlanTest.cpp

2 lines

Diff 511666

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 529 Lines • ▼ Show 20 Lines	public:
/// Try to vectorize interleaved access group \p Group with the base address		/// Try to vectorize interleaved access group \p Group with the base address
/// given in \p Addr, optionally masking the vector operations if \p		/// given in \p Addr, optionally masking the vector operations if \p
/// BlockInMask is non-null. Use \p State to translate given VPValues to IR		/// BlockInMask is non-null. Use \p State to translate given VPValues to IR
/// values in the vectorized loop.		/// values in the vectorized loop.
void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group,		void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group,
ArrayRef<VPValue *> VPDefs,		ArrayRef<VPValue *> VPDefs,
VPTransformState &State, VPValue *Addr,		VPTransformState &State, VPValue *Addr,
ArrayRef<VPValue *> StoredValues,		ArrayRef<VPValue *> StoredValues,
VPValue *BlockInMask = nullptr);		VPValue *BlockInMask, bool NeedsMaskForGaps);

/// Fix the non-induction PHIs in \p Plan.		/// Fix the non-induction PHIs in \p Plan.
void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);		void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);

/// Returns true if the reordering of FP operations is not allowed, but we are		/// Returns true if the reordering of FP operations is not allowed, but we are
/// able to vectorize with strict in-order reductions for the given RdxDesc.		/// able to vectorize with strict in-order reductions for the given RdxDesc.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc);		bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc);

▲ Show 20 Lines • Show All 2,058 Lines • ▼ Show 20 Lines
// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>		// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>		// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,		// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements		// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B		// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
void InnerLoopVectorizer::vectorizeInterleaveGroup(		void InnerLoopVectorizer::vectorizeInterleaveGroup(
const InterleaveGroup<Instruction> Group, ArrayRef<VPValue > VPDefs,		const InterleaveGroup<Instruction> Group, ArrayRef<VPValue > VPDefs,
VPTransformState &State, VPValue Addr, ArrayRef<VPValue > StoredValues,		VPTransformState &State, VPValue Addr, ArrayRef<VPValue > StoredValues,
VPValue *BlockInMask) {		VPValue *BlockInMask, bool NeedsMaskForGaps) {
Instruction *Instr = Group->getInsertPos();		Instruction *Instr = Group->getInsertPos();
const DataLayout &DL = Instr->getModule()->getDataLayout();		const DataLayout &DL = Instr->getModule()->getDataLayout();

// Prepare for the vector type of the interleaved load/store.		// Prepare for the vector type of the interleaved load/store.
Type *ScalarTy = getLoadStoreType(Instr);		Type *ScalarTy = getLoadStoreType(Instr);
unsigned InterleaveFactor = Group->getFactor();		unsigned InterleaveFactor = Group->getFactor();
assert(!VF.isScalable() && "scalable vectors not yet supported.");		assert(!VF.isScalable() && "scalable vectors not yet supported.");
auto VecTy = VectorType::get(ScalarTy, VF InterleaveFactor);		auto VecTy = VectorType::get(ScalarTy, VF InterleaveFactor);
Show All 40 Lines	for (unsigned Part = 0; Part < UF; Part++) {
// Cast to the vector pointer type.		// Cast to the vector pointer type.
unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace();		unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace();
Type *PtrTy = VecTy->getPointerTo(AddressSpace);		Type *PtrTy = VecTy->getPointerTo(AddressSpace);
AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy));		AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy));
}		}

State.setDebugLocFromInst(Instr);		State.setDebugLocFromInst(Instr);
Value *PoisonVec = PoisonValue::get(VecTy);		Value *PoisonVec = PoisonValue::get(VecTy);

// Vectorize the interleaved load group.		// Vectorize the interleaved load group.
		AyalUnsubmitted Not Done Reply Inline Actions nit (Independent of this patch): MaskForGaps is set here for loads and later reset for stores. Better to either set it here for both or place this setting inside the handling of loads. Ayal: nit (Independent of this patch): MaskForGaps is set here for loads and later reset for stores.
		fhahnAuthorUnsubmitted Done Reply Inline Actions Moved inside the load handling: 3f36b9b456ac4bfa695e253926daa87cd9838550 fhahn: Moved inside the load handling: 3f36b9b456ac4bfa695e253926daa87cd9838550
if (isa<LoadInst>(Instr)) {		if (isa<LoadInst>(Instr)) {
Value *MaskForGaps = nullptr;		Value *MaskForGaps = nullptr;
if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {		if (NeedsMaskForGaps) {
MaskForGaps =		MaskForGaps =
createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);		createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
assert(MaskForGaps && "Mask for Gaps is required but it is null");		assert(MaskForGaps && "Mask for Gaps is required but it is null");
}		}

// For each unroll part, create a wide load for the group.		// For each unroll part, create a wide load for the group.
SmallVector<Value *, 2> NewLoads;		SmallVector<Value *, 2> NewLoads;
for (unsigned Part = 0; Part < UF; Part++) {		for (unsigned Part = 0; Part < UF; Part++) {
▲ Show 20 Lines • Show All 6,345 Lines • ▼ Show 20 Lines	for (const auto *IG : InterleaveGroups) {
SmallVector<VPValue *, 4> StoredValues;		SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)		for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {		if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
auto *StoreR =		auto *StoreR =
cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));		cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());		StoredValues.push_back(StoreR->getStoredValue());
}		}

		bool NeedsMaskForGaps =
		IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed();
auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,		auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,
Recipe->getMask());		Recipe->getMask(), NeedsMaskForGaps);
VPIG->insertBefore(Recipe);		VPIG->insertBefore(Recipe);
unsigned J = 0;		unsigned J = 0;
		AyalUnsubmitted Not Done Reply Inline Actions nit: may be clearer to first set `bool NeedsMaskForGaps = IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed();` and then use it, instead of setting it inline. Ayal: nit: may be clearer to first set `bool NeedsMaskForGaps = IG->requiresScalarEpilogue() && !CM.
		fhahnAuthorUnsubmitted Done Reply Inline Actions Should be adjusted, thanks! fhahn: Should be adjusted, thanks!
for (unsigned i = 0; i < IG->getFactor(); ++i)		for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *Member = IG->getMember(i)) {		if (Instruction *Member = IG->getMember(i)) {
if (!Member->getType()->isVoidTy()) {		if (!Member->getType()->isVoidTy()) {
VPValue *OriginalV = Plan->getVPValue(Member);		VPValue *OriginalV = Plan->getVPValue(Member);
Plan->removeVPValueFor(Member);		Plan->removeVPValueFor(Member);
Plan->addVPValue(Member, VPIG->getVPValue(J));		Plan->addVPValue(Member, VPIG->getVPValue(J));
OriginalV->replaceAllUsesWith(VPIG->getVPValue(J));		OriginalV->replaceAllUsesWith(VPIG->getVPValue(J));
J++;		J++;
▲ Show 20 Lines • Show All 434 Lines • ▼ Show 20 Lines	void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
Value *Step = State.get(getStepValue(), VPIteration(0, 0));		Value *Step = State.get(getStepValue(), VPIteration(0, 0));

buildScalarSteps(BaseIV, Step, IndDesc, this, State);		buildScalarSteps(BaseIV, Step, IndDesc, this, State);
}		}

void VPInterleaveRecipe::execute(VPTransformState &State) {		void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");		assert(!State.Instance && "Interleave group being replicated.");
State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),		State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
getStoredValues(), getMask());		getStoredValues(), getMask(),
		NeedsMaskForGaps);
}		}

void VPReductionRecipe::execute(VPTransformState &State) {		void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");		assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0);		Value *PrevInChain = State.get(getChainOp(), 0);
RecurKind Kind = RdxDesc->getRecurrenceKind();		RecurKind Kind = RdxDesc->getRecurrenceKind();
bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);		bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
// Propagate the fast-math flags carried by the underlying instruction.		// Propagate the fast-math flags carried by the underlying instruction.
▲ Show 20 Lines • Show All 1,117 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/VPlan.h

	Show First 20 Lines • Show All 1,385 Lines • ▼ Show 20 Lines

	/// VPInterleaveRecipe is a recipe for transforming an interleave group of load			/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
	/// or stores into one wide load/store and shuffles. The first operand of a			/// or stores into one wide load/store and shuffles. The first operand of a
	/// VPInterleave recipe is the address, followed by the stored values, followed			/// VPInterleave recipe is the address, followed by the stored values, followed
	/// by an optional mask.			/// by an optional mask.
	class VPInterleaveRecipe : public VPRecipeBase {			class VPInterleaveRecipe : public VPRecipeBase {
	const InterleaveGroup<Instruction> *IG;			const InterleaveGroup<Instruction> *IG;

				/// Indicates if the interleave group is in a conditional block and requires a
				/// mask.
	bool HasMask = false;			bool HasMask = false;

				/// Indicates if gaps between members of the group need to be masked out or if
				AyalUnsubmitted Not Done Reply Inline Actions nit: would be good to document and clarify or rename the two masks: `HasMask` indicates if the interleave group is inside a conditional basic block, i.e., HasBlockMask or HasMaskForBlock. `NeedsMaskForGaps` indicates if the interleave group of loads is allowed to speculatively load absent unused members or must avoid doing so by using a mask, i.e., !CanSpeculativelyLoadWithoutMask, where the speculation relies on loading present members on both sides of missing ones - gaps. Ayal: nit: would be good to document and clarify or rename the two masks: `HasMask` indicates if the…
				fhahnAuthorUnsubmitted Done Reply Inline Actions Added comments, thanks! fhahn: Added comments, thanks!
				/// unusued gaps can be loaded speculatively.
				bool NeedsMaskForGaps = false;

	public:			public:
	VPInterleaveRecipe(const InterleaveGroup<Instruction> IG, VPValue Addr,			VPInterleaveRecipe(const InterleaveGroup<Instruction> IG, VPValue Addr,
	ArrayRef<VPValue > StoredValues, VPValue Mask)			ArrayRef<VPValue > StoredValues, VPValue Mask,
	: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) {			bool NeedsMaskForGaps)
				: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
				NeedsMaskForGaps(NeedsMaskForGaps) {
	for (unsigned i = 0; i < IG->getFactor(); ++i)			for (unsigned i = 0; i < IG->getFactor(); ++i)
	if (Instruction *I = IG->getMember(i)) {			if (Instruction *I = IG->getMember(i)) {
	if (I->getType()->isVoidTy())			if (I->getType()->isVoidTy())
	continue;			continue;
	new VPValue(I, this);			new VPValue(I, this);
	}			}

	for (auto *SV : StoredValues)			for (auto *SV : StoredValues)
	▲ Show 20 Lines • Show All 1,328 Lines • Show Last 20 Lines

llvm/unittests/Transforms/Vectorize/VPlanTest.cpp

	Show First 20 Lines • Show All 954 Lines • ▼ Show 20 Lines
	}			}

	TEST(VPRecipeTest, CastVPInterleaveRecipeToVPUser) {			TEST(VPRecipeTest, CastVPInterleaveRecipeToVPUser) {
	LLVMContext C;			LLVMContext C;

	VPValue Addr;			VPValue Addr;
	VPValue Mask;			VPValue Mask;
	InterleaveGroup<Instruction> IG(4, false, Align(4));			InterleaveGroup<Instruction> IG(4, false, Align(4));
	VPInterleaveRecipe Recipe(&IG, &Addr, {}, &Mask);			VPInterleaveRecipe Recipe(&IG, &Addr, {}, &Mask, false);
	EXPECT_TRUE(isa<VPUser>(&Recipe));			EXPECT_TRUE(isa<VPUser>(&Recipe));
	VPRecipeBase *BaseR = &Recipe;			VPRecipeBase *BaseR = &Recipe;
	EXPECT_TRUE(isa<VPUser>(BaseR));			EXPECT_TRUE(isa<VPUser>(BaseR));
	EXPECT_EQ(&Recipe, BaseR);			EXPECT_EQ(&Recipe, BaseR);
	}			}

	TEST(VPRecipeTest, CastVPReplicateRecipeToVPUser) {			TEST(VPRecipeTest, CastVPReplicateRecipeToVPUser) {
	LLVMContext C;			LLVMContext C;
	▲ Show 20 Lines • Show All 349 Lines • Show Last 20 Lines