Diff 505441

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 534 Lines • ▼ Show 20 Lines	public:
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,		bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,		AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) const;		HardwareLoopInfo &HWLoopInfo) const;

/// Query the target whether it would be prefered to create a predicated		/// Query the target whether it would be prefered to create a predicated
/// vector loop, which can avoid the need to emit a scalar epilogue loop.		/// vector loop, which can avoid the need to emit a scalar epilogue loop.
bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) const;		InterleavedAccessInfo *IAI,
		bool NeedsReversePred) const;
		paulwalker-armUnsubmitted Done Reply Inline Actions If the number of factors needed to determine the predication strategy is going to increase, perhaps it's worth creating a descriptor class, much like IntrinsicCostAttributes, to keep the interface churn down. What do people think? paulwalker-arm: If the number of factors needed to determine the predication strategy is going to increase…

/// Query the target what the preferred style of tail folding is.		/// Query the target what the preferred style of tail folding is.
/// \param IVUpdateMayOverflow Tells whether it is known if the IV update		/// \param IVUpdateMayOverflow Tells whether it is known if the IV update
/// may (or will never) overflow for the suggested VF/UF in the given loop.		/// may (or will never) overflow for the suggested VF/UF in the given loop.
/// Targets can use this information to select a more optimal tail folding		/// Targets can use this information to select a more optimal tail folding
/// style. The value conservatively defaults to true, such that no assumptions		/// style. The value conservatively defaults to true, such that no assumptions
/// are made on overflow.		/// are made on overflow.
TailFoldingStyle		TailFoldingStyle
▲ Show 20 Lines • Show All 1,094 Lines • ▼ Show 20 Lines	public:
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,		virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) = 0;		PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,		virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,		AssumptionCache &AC,
TargetLibraryInfo *LibInfo,		TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;		HardwareLoopInfo &HWLoopInfo) = 0;
virtual bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		virtual bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) = 0;		InterleavedAccessInfo *IAI,
		bool NeedsReversePred) = 0;
virtual TailFoldingStyle		virtual TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
virtual std::optional<Instruction *> instCombineIntrinsic(		virtual std::optional<Instruction *> instCombineIntrinsic(
InstCombiner &IC, IntrinsicInst &II) = 0;		InstCombiner &IC, IntrinsicInst &II) = 0;
virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(		virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,		InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
KnownBits & Known, bool &KnownBitsComputed) = 0;		KnownBits & Known, bool &KnownBitsComputed) = 0;
virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(		virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
▲ Show 20 Lines • Show All 381 Lines • ▼ Show 20 Lines	public:
}		}
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,		bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,		AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) override {		HardwareLoopInfo &HWLoopInfo) override {
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);		return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}		}
bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) override {		InterleavedAccessInfo *IAI,
return Impl.preferPredicateOverEpilogue(TLI, LVL, IAI);		bool NeedsReversePred) override {
		return Impl.preferPredicateOverEpilogue(TLI, LVL, IAI, NeedsReversePred);
}		}
TailFoldingStyle		TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);		return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}		}
std::optional<Instruction *>		std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {		instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
return Impl.instCombineIntrinsic(IC, II);		return Impl.instCombineIntrinsic(IC, II);
▲ Show 20 Lines • Show All 671 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 159 Lines • ▼ Show 20 Lines	public:
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,		bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,		AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) const {		HardwareLoopInfo &HWLoopInfo) const {
return false;		return false;
}		}

bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) const {		InterleavedAccessInfo *IAI,
		bool NeedsReversePred) const {
return false;		return false;
}		}

TailFoldingStyle		TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
return TailFoldingStyle::DataWithoutLaneMask;		return TailFoldingStyle::DataWithoutLaneMask;
}		}

▲ Show 20 Lines • Show All 1,127 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 618 Lines • ▼ Show 20 Lines	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,		AssumptionCache &AC,
TargetLibraryInfo *LibInfo,		TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) {		HardwareLoopInfo &HWLoopInfo) {
return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);		return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}		}

bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) {		InterleavedAccessInfo *IAI,
return BaseT::preferPredicateOverEpilogue(TLI, LVL, IAI);		bool NeedsReversePred) {
		return BaseT::preferPredicateOverEpilogue(TLI, LVL, IAI, NeedsReversePred);
}		}

TailFoldingStyle		TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) {		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) {
return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);		return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}		}

std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,		std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
▲ Show 20 Lines • Show All 1,830 Lines • Show Last 20 Lines

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Show First 20 Lines • Show All 341 Lines • ▼ Show 20 Lines	public:
/// Returns:		/// Returns:
/// 0 - Stride is unknown or non-consecutive.		/// 0 - Stride is unknown or non-consecutive.
/// 1 - Address is consecutive.		/// 1 - Address is consecutive.
/// -1 - Address is consecutive, and decreasing.		/// -1 - Address is consecutive, and decreasing.
/// NOTE: This method must only be used before modifying the original scalar		/// NOTE: This method must only be used before modifying the original scalar
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).		/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
int isConsecutivePtr(Type AccessTy, Value Ptr) const;		int isConsecutivePtr(Type AccessTy, Value Ptr) const;

		/// This function returns true if it encounters a load or store in the loop
		/// that contains an address that is consecutive and decreasing. It calls
		/// isConsecutivePtr to determine this.
		bool containsDecreasingPointers();

/// Returns true if the value V is uniform within the loop.		/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V) const;		bool isUniform(Value *V) const;

/// A uniform memory op is a load or store which accesses the same memory		/// A uniform memory op is a load or store which accesses the same memory
/// location on all lanes.		/// location on all lanes.
bool isUniformMemOp(Instruction &I) const;		bool isUniformMemOp(Instruction &I) const;

/// Returns the information that we collected about runtime memory check.		/// Returns the information that we collected about runtime memory check.
▲ Show 20 Lines • Show All 208 Lines • Show Last 20 Lines

llvm/lib/Analysis/TargetTransformInfo.cpp

	Show First 20 Lines • Show All 301 Lines • ▼ Show 20 Lines
	bool TargetTransformInfo::isHardwareLoopProfitable(			bool TargetTransformInfo::isHardwareLoopProfitable(
	Loop *L, ScalarEvolution &SE, AssumptionCache &AC,			Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
	TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {			TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
	return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);			return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
	}			}

	bool TargetTransformInfo::preferPredicateOverEpilogue(			bool TargetTransformInfo::preferPredicateOverEpilogue(
	TargetLibraryInfo TLI, LoopVectorizationLegality LVL,			TargetLibraryInfo TLI, LoopVectorizationLegality LVL,
	InterleavedAccessInfo *IAI) const {			InterleavedAccessInfo *IAI, bool NeedsReversePred) const {
	return TTIImpl->preferPredicateOverEpilogue(TLI, LVL, IAI);			return TTIImpl->preferPredicateOverEpilogue(TLI, LVL, IAI, NeedsReversePred);
	}			}

	TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle(			TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle(
	bool IVUpdateMayOverflow) const {			bool IVUpdateMayOverflow) const {
	return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);			return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);
	}			}

	std::optional<Instruction *>			std::optional<Instruction *>
	▲ Show 20 Lines • Show All 931 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Show First 20 Lines • Show All 352 Lines • ▼ Show 20 Lines	if (ST->hasSVE())
? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck		? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
: TailFoldingStyle::DataAndControlFlow;		: TailFoldingStyle::DataAndControlFlow;

return TailFoldingStyle::DataWithoutLaneMask;		return TailFoldingStyle::DataWithoutLaneMask;
}		}

bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI);		InterleavedAccessInfo *IAI,
		bool NeedsReversePred);

bool supportsScalableVectors() const { return ST->hasSVE(); }		bool supportsScalableVectors() const { return ST->hasSVE(); }

bool enableScalableVectorization() const { return ST->hasSVE(); }		bool enableScalableVectorization() const { return ST->hasSVE(); }

bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,		bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;		ElementCount VF) const;

Show All 31 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines
private:		private:
uint8_t Bits = 0; // Currently defaults to disabled.		uint8_t Bits = 0; // Currently defaults to disabled.

public:		public:
enum TailFoldingOpts {		enum TailFoldingOpts {
TFDisabled = 0x0,		TFDisabled = 0x0,
TFReductions = 0x01,		TFReductions = 0x01,
TFRecurrences = 0x02,		TFRecurrences = 0x02,
		TFReverse = 0x04,
TFSimple = 0x80,		TFSimple = 0x80,
TFAll = TFReductions \| TFRecurrences \| TFSimple		TFAll = TFReductions \| TFRecurrences \| TFReverse \| TFSimple
};		};

void operator=(const std::string &Val) {		void operator=(const std::string &Val) {
if (Val.empty())		if (Val.empty())
return;		return;
SmallVector<StringRef, 6> TailFoldTypes;		SmallVector<StringRef, 6> TailFoldTypes;
StringRef(Val).split(TailFoldTypes, '+', -1, false);		StringRef(Val).split(TailFoldTypes, '+', -1, false);
for (auto TailFoldType : TailFoldTypes) {		for (auto TailFoldType : TailFoldTypes) {
if (TailFoldType == "disabled")		if (TailFoldType == "disabled")
Bits = 0;		Bits = 0;
else if (TailFoldType == "all")		else if (TailFoldType == "all")
Bits = TFAll;		Bits = TFAll;
else if (TailFoldType == "default")		else if (TailFoldType == "default")
Bits = 0; // Currently defaults to never tail-folding.		Bits = 0; // Currently defaults to never tail-folding.
else if (TailFoldType == "simple")		else if (TailFoldType == "simple")
add(TFSimple);		add(TFSimple);
else if (TailFoldType == "reductions")		else if (TailFoldType == "reductions")
add(TFReductions);		add(TFReductions);
else if (TailFoldType == "recurrences")		else if (TailFoldType == "recurrences")
add(TFRecurrences);		add(TFRecurrences);
		else if (TailFoldType == "reverse")
		add(TFReverse);
else if (TailFoldType == "noreductions")		else if (TailFoldType == "noreductions")
remove(TFReductions);		remove(TFReductions);
else if (TailFoldType == "norecurrences")		else if (TailFoldType == "norecurrences")
remove(TFRecurrences);		remove(TFRecurrences);
		else if (TailFoldType == "noreverse")
		remove(TFReverse);
else {		else {
errs()		errs()
<< "invalid argument " << TailFoldType.str()		<< "invalid argument " << TailFoldType.str()
<< " to -sve-tail-folding=; each element must be one of: disabled, "		<< " to -sve-tail-folding=; each element must be one of: disabled, "
"all, default, simple, reductions, noreductions, recurrences, "		"all, default, simple, reductions, noreductions, recurrences, "
"norecurrences\n";		"norecurrences\n";
}		}
}		}
Show All 15 Lines	cl::desc(
"\ndisabled No loop types will vectorize using tail-folding"		"\ndisabled No loop types will vectorize using tail-folding"
"\ndefault Uses the default tail-folding settings for the target "		"\ndefault Uses the default tail-folding settings for the target "
"CPU"		"CPU"
"\nall All legal loop types will vectorize using tail-folding"		"\nall All legal loop types will vectorize using tail-folding"
"\nsimple Use tail-folding for simple loops (not reductions or "		"\nsimple Use tail-folding for simple loops (not reductions or "
"recurrences)"		"recurrences)"
"\nreductions Use tail-folding for loops containing reductions"		"\nreductions Use tail-folding for loops containing reductions"
"\nrecurrences Use tail-folding for loops containing fixed order "		"\nrecurrences Use tail-folding for loops containing fixed order "
"recurrences"),		"recurrences"
		"\nreverse Use tail-folding for loops requiring reversed "
		"predicates"),
cl::location(TailFoldingKindLoc));		cl::location(TailFoldingKindLoc));

// Experimental option that will only be fully functional when the		// Experimental option that will only be fully functional when the
// code-generator is changed to use SVE instead of NEON for all fixed-width		// code-generator is changed to use SVE instead of NEON for all fixed-width
// operations.		// operations.
static cl::opt<bool> EnableFixedwidthAutovecInStreamingMode(		static cl::opt<bool> EnableFixedwidthAutovecInStreamingMode(
"enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);		"enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);

▲ Show 20 Lines • Show All 3,258 Lines • ▼ Show 20 Lines	if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() &&
}		}
}		}

return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);		return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}		}

bool AArch64TTIImpl::preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool AArch64TTIImpl::preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) {		InterleavedAccessInfo *IAI,
		bool NeedsReversePred) {
if (!ST->hasSVE() \|\| TailFoldingKindLoc == TailFoldingKind::TFDisabled)		if (!ST->hasSVE() \|\| TailFoldingKindLoc == TailFoldingKind::TFDisabled)
return false;		return false;

// We don't currently support vectorisation with interleaving for SVE - with		// We don't currently support vectorisation with interleaving for SVE - with
// such loops we're better off not using tail-folding. This gives us a chance		// such loops we're better off not using tail-folding. This gives us a chance
// to fall back on fixed-width vectorisation using NEON's ld2/st2/etc.		// to fall back on fixed-width vectorisation using NEON's ld2/st2/etc.
if (IAI->hasGroups())		if (IAI->hasGroups())
return false;		return false;

TailFoldingKind Required; // Defaults to 0.		TailFoldingKind Required; // Defaults to 0.
		sdesmalenUnsubmitted Done Reply Inline Actions Does it also need to return `true` for Strides < -1? sdesmalen: Does it also need to return `true` for Strides < -1?
		david-armAuthorUnsubmitted Done Reply Inline Actions In practice if abs(Stride) > 1 we will always version the loop and only enter the vectorised loop if abs(Stride)==1, but checking for all values < 0 doesn't do any harm. david-arm: In practice if abs(Stride) > 1 we will always version the loop and only enter the vectorised…
		sdesmalenUnsubmitted Done Reply Inline Actions In practice if abs(Stride) > 1 we will always version the loop and only enter the vectorised loop if abs(Stride)==1 I thought that's only the case when the stride of the induction variable is unknown. It's still possible to use LD2/3/4 for known strides > 1 or vectorize a loop using gathers. sdesmalen: > In practice if abs(Stride) > 1 we will always version the loop and only enter the vectorised…
		dmgreenUnsubmitted Done Reply Inline Actions I think it will use gathers for <= -2. At least that is what it does for MVE, it may depend on the costs. dmgreen: I think it will use gathers for <= -2. At least that is what it does for MVE, it may depend on…
		david-armAuthorUnsubmitted Done Reply Inline Actions Yeah, you're both right @dmgreen and @sdesmalen, for some reason I was getting mixed up with unknown strides. :confused. david-arm: Yeah, you're both right @dmgreen and @sdesmalen, for some reason I was getting mixed up with…
		sdesmalenUnsubmitted Done Reply Inline Actions nit: you could do: if (getPtrStride(...).value_or(0) == -1) directly? sdesmalen: nit: you could do: if (getPtrStride(...).value_or(0) == -1) directly?
if (LVL->getReductionVars().size())		if (LVL->getReductionVars().size())
Required.add(TailFoldingKind::TFReductions);		Required.add(TailFoldingKind::TFReductions);
if (LVL->getFixedOrderRecurrences().size())		if (LVL->getFixedOrderRecurrences().size())
Required.add(TailFoldingKind::TFRecurrences);		Required.add(TailFoldingKind::TFRecurrences);
		if (NeedsReversePred)
		Required.add(TailFoldingKind::TFReverse);
if (!Required)		if (!Required)
Required.add(TailFoldingKind::TFSimple);		Required.add(TailFoldingKind::TFSimple);

return (TailFoldingKindLoc & Required) == Required;		return (TailFoldingKindLoc & Required) == Required;
}		}

InstructionCost		InstructionCost
AArch64TTIImpl::getScalingFactorCost(Type Ty, GlobalValue BaseGV,		AArch64TTIImpl::getScalingFactorCost(Type Ty, GlobalValue BaseGV,
Show All 20 Lines

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Show First 20 Lines • Show All 299 Lines • ▼ Show 20 Lines	public:
bool maybeLoweredToCall(Instruction &I);		bool maybeLoweredToCall(Instruction &I);
bool isLoweredToCall(const Function *F);		bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,		bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,		AssumptionCache &AC,
TargetLibraryInfo *LibInfo,		TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);		HardwareLoopInfo &HWLoopInfo);
bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI);		InterleavedAccessInfo *IAI,
		bool NeedsReversePred);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,		void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,		TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);		OptimizationRemarkEmitter *ORE);

TailFoldingStyle		TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;

void getPeelingPreferences(Loop *L, ScalarEvolution &SE,		void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
▲ Show 20 Lines • Show All 48 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 2,234 Lines • ▼ Show 20 Lines	static bool canTailPredicateLoop(Loop L, LoopInfo LI, ScalarEvolution &SE,
}		}

LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");		LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");
return true;		return true;
}		}

bool ARMTTIImpl::preferPredicateOverEpilogue(TargetLibraryInfo *TLI,		bool ARMTTIImpl::preferPredicateOverEpilogue(TargetLibraryInfo *TLI,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) {		InterleavedAccessInfo *IAI,
		bool NeedsReversePred) {
if (!EnableTailPredication) {		if (!EnableTailPredication) {
LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");		LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
return false;		return false;
}		}

// Creating a predicated vector loop is the first step for generating a		// Creating a predicated vector loop is the first step for generating a
// tail-predicated hardware loop, for which we need the MVE masked		// tail-predicated hardware loop, for which we need the MVE masked
// load/stores instructions:		// load/stores instructions:
▲ Show 20 Lines • Show All 212 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Show First 20 Lines • Show All 646 Lines • ▼ Show 20 Lines	for (ElementCount VF = ElementCount::getScalable(1);
ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)		ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);		Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
assert((WidestScalableVF.isZero() \|\| !Scalarize) &&		assert((WidestScalableVF.isZero() \|\| !Scalarize) &&
"Caller may decide to scalarize a variant using a scalable VF");		"Caller may decide to scalarize a variant using a scalable VF");
}		}
return Scalarize;		return Scalarize;
}		}

		bool LoopVectorizationLegality::containsDecreasingPointers() {
		for (BasicBlock *BB : TheLoop->blocks()) {
		// Scan the instructions in the block and look for addresses that are
		// consecutive and decreasing.
		for (Instruction &I : *BB) {
		if (isa<LoadInst>(&I) \|\| isa<StoreInst>(&I)) {
		Value *Ptr = getLoadStorePointerOperand(&I);
		Type *ScalarTy = getLoadStoreType(&I);
		if (isConsecutivePtr(ScalarTy, Ptr) == -1)
		return true;
		}
		}
		}
		return false;
		}

bool LoopVectorizationLegality::canVectorizeInstrs() {		bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();		BasicBlock *Header = TheLoop->getHeader();

// For each block in the loop.		// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {		for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.		// Scan the instructions in the block and look for hazards.
for (Instruction &I : *BB) {		for (Instruction &I : *BB) {
if (auto *Phi = dyn_cast<PHINode>(&I)) {		if (auto *Phi = dyn_cast<PHINode>(&I)) {
▲ Show 20 Lines • Show All 787 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,785 Lines • ▼ Show 20 Lines	static ScalarEpilogueLowering getScalarEpilogueLowering(
// 3) If set, obey the hints		// 3) If set, obey the hints
switch (Hints.getPredicate()) {		switch (Hints.getPredicate()) {
case LoopVectorizeHints::FK_Enabled:		case LoopVectorizeHints::FK_Enabled:
return CM_ScalarEpilogueNotNeededUsePredicate;		return CM_ScalarEpilogueNotNeededUsePredicate;
case LoopVectorizeHints::FK_Disabled:		case LoopVectorizeHints::FK_Disabled:
return CM_ScalarEpilogueAllowed;		return CM_ScalarEpilogueAllowed;
};		};

		// We call this to discover whether any load/store pointers in the loop have
		// negative strides. This will require extra work to reverse the loop
		// predicate, which may be expensive.
		bool NeedsReversePred = LVL.containsDecreasingPointers();

// 4) if the TTI hook indicates this is profitable, request predication.		// 4) if the TTI hook indicates this is profitable, request predication.
if (TTI->preferPredicateOverEpilogue(TLI, &LVL, IAI))		if (TTI->preferPredicateOverEpilogue(TLI, &LVL, IAI, NeedsReversePred))
return CM_ScalarEpilogueNotNeededUsePredicate;		return CM_ScalarEpilogueNotNeededUsePredicate;

return CM_ScalarEpilogueAllowed;		return CM_ScalarEpilogueAllowed;
}		}

Value VPTransformState::get(VPValue Def, unsigned Part) {		Value VPTransformState::get(VPValue Def, unsigned Part) {
// If Values have been set for this Def return the one relevant for \p Part.		// If Values have been set for this Def return the one relevant for \p Part.
if (hasVectorValue(Def, Part))		if (hasVectorValue(Def, Part))
▲ Show 20 Lines • Show All 833 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll

; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=disabled -S \| FileCheck %s -check-prefix=CHECK-NOTF		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=disabled -S \| FileCheck %s -check-prefix=CHECK-NOTF
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=default -S \| FileCheck %s -check-prefix=CHECK-NOTF		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=default -S \| FileCheck %s -check-prefix=CHECK-NOTF
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all -S \| FileCheck %s -check-prefix=CHECK-TF		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all -S \| FileCheck %s -check-prefix=CHECK-TF
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=disabled+simple+reductions+recurrences -S \| FileCheck %s -check-prefix=CHECK-TF		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=disabled+simple+reductions+recurrences+reverse -S \| FileCheck %s -check-prefix=CHECK-TF
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all+noreductions -S \| FileCheck %s -check-prefix=CHECK-TF-NORED		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all+noreductions -S \| FileCheck %s -check-prefix=CHECK-TF-NORED
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all+norecurrences -S \| FileCheck %s -check-prefix=CHECK-TF-NOREC		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all+norecurrences -S \| FileCheck %s -check-prefix=CHECK-TF-NOREC
		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=all+noreverse -S \| FileCheck %s -check-prefix=CHECK-TF-NOREV
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=reductions -S \| FileCheck %s -check-prefix=CHECK-TF-ONLYRED		; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize -sve-tail-folding=reductions -S \| FileCheck %s -check-prefix=CHECK-TF-ONLYRED

target triple = "aarch64-unknown-linux-gnu"		target triple = "aarch64-unknown-linux-gnu"

define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {		define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
; CHECK-NOTF-LABEL: @simple_memset(		; CHECK-NOTF-LABEL: @simple_memset(
; CHECK-NOTF: vector.ph:		; CHECK-NOTF: vector.ph:
; CHECK-NOTF: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0		; CHECK-NOTF: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
Show All 13 Lines
; CHECK-TF-NOREC-LABEL: @simple_memset(		; CHECK-TF-NOREC-LABEL: @simple_memset(
; CHECK-TF-NOREC: vector.ph:		; CHECK-TF-NOREC: vector.ph:
; CHECK-TF-NOREC: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0		; CHECK-TF-NOREC: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
; CHECK-TF-NOREC: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer		; CHECK-TF-NOREC: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-TF-NOREC: vector.body:		; CHECK-TF-NOREC: vector.body:
; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>		; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
; CHECK-TF-NOREC: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]		; CHECK-TF-NOREC: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]

		; CHECK-TF-NOREV-LABEL: @simple_memset(
		; CHECK-TF-NOREV: vector.ph:
		; CHECK-TF-NOREV: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
		; CHECK-TF-NOREV: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
		; CHECK-TF-NOREV: vector.body:
		; CHECK-TF-NOREV: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
		; CHECK-TF-NOREV: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]

; CHECK-TF-LABEL: @simple_memset(		; CHECK-TF-LABEL: @simple_memset(
; CHECK-TF: vector.ph:		; CHECK-TF: vector.ph:
; CHECK-TF: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0		; CHECK-TF: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
; CHECK-TF: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer		; CHECK-TF: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-TF: vector.body:		; CHECK-TF: vector.body:
; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>		; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
; CHECK-TF: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]		; CHECK-TF: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]

▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>		; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
; CHECK-TF-NOREC: %[[VEC_PHI:.*]] = phi <vscale x 4 x float>		; CHECK-TF-NOREC: %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
; CHECK-TF-NOREC: %[[LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>{{.}} %[[ACTIVE_LANE_MASK]]		; CHECK-TF-NOREC: %[[LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>{{.}} %[[ACTIVE_LANE_MASK]]
; CHECK-TF-NOREC: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]		; CHECK-TF-NOREC: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
; CHECK-TF-NOREC: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]		; CHECK-TF-NOREC: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
; CHECK-TF-NOREC: middle.block:		; CHECK-TF-NOREC: middle.block:
; CHECK-TF-NOREC-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])		; CHECK-TF-NOREC-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])

		; CHECK-TF-NOREV-LABEL: @fadd_red_fast
		; CHECK-TF-NOREV: vector.body:
		; CHECK-TF-NOREV: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
		; CHECK-TF-NOREV: %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
		; CHECK-TF-NOREV: %[[LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>{{.}} %[[ACTIVE_LANE_MASK]]
		; CHECK-TF-NOREV: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
		; CHECK-TF-NOREV: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
		; CHECK-TF-NOREV: middle.block:
		; CHECK-TF-NOREV-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])

; CHECK-TF-LABEL: @fadd_red_fast		; CHECK-TF-LABEL: @fadd_red_fast
; CHECK-TF: vector.body:		; CHECK-TF: vector.body:
; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>		; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
; CHECK-TF: %[[VEC_PHI:.*]] = phi <vscale x 4 x float>		; CHECK-TF: %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
; CHECK-TF: %[[LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>{{.}} %[[ACTIVE_LANE_MASK]]		; CHECK-TF: %[[LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>{{.}} %[[ACTIVE_LANE_MASK]]
; CHECK-TF: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]		; CHECK-TF: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
; CHECK-TF: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]		; CHECK-TF: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
; CHECK-TF: middle.block:		; CHECK-TF: middle.block:
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
; CHECK-TF-NOREC: vector.body:		; CHECK-TF-NOREC: vector.body:
; CHECK-TF-NOREC-NOT: %{{.*}} = phi <vscale x 4 x i1>		; CHECK-TF-NOREC-NOT: %{{.*}} = phi <vscale x 4 x i1>
; CHECK-TF-NOREC: %[[VECTOR_RECUR:.]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.]], %vector.body ]		; CHECK-TF-NOREC: %[[VECTOR_RECUR:.]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.]], %vector.body ]
; CHECK-TF-NOREC: %[[LOAD]] = load <vscale x 4 x i32>		; CHECK-TF-NOREC: %[[LOAD]] = load <vscale x 4 x i32>
; CHECK-TF-NOREC: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)		; CHECK-TF-NOREC: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
; CHECK-TF-NOREC: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]		; CHECK-TF-NOREC: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
; CHECK-TF-NOREC: store <vscale x 4 x i32> %[[ADD]]		; CHECK-TF-NOREC: store <vscale x 4 x i32> %[[ADD]]

		; CHECK-TF-NOREV-LABEL: @add_recur
		; CHECK-TF-NOREV: entry:
		; CHECK-TF-NOREV: %[[PRE:.]] = load i32, i32 %src, align 4
		; CHECK-TF-NOREV: vector.ph:
		; CHECK-TF-NOREV: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
		; CHECK-TF-NOREV: vector.body:
		; CHECK-TF-NOREV: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
		; CHECK-TF-NOREV: %[[VECTOR_RECUR:.]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.]], %vector.body ]
		; CHECK-TF-NOREV: %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>{{.*}} %[[ACTIVE_LANE_MASK]]
		; CHECK-TF-NOREV: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
		; CHECK-TF-NOREV: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
		; CHECK-TF-NOREV: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]])

; CHECK-TF-LABEL: @add_recur		; CHECK-TF-LABEL: @add_recur
; CHECK-TF: entry:		; CHECK-TF: entry:
; CHECK-TF: %[[PRE:.]] = load i32, i32 %src, align 4		; CHECK-TF: %[[PRE:.]] = load i32, i32 %src, align 4
; CHECK-TF: vector.ph:		; CHECK-TF: vector.ph:
; CHECK-TF: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]		; CHECK-TF: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
; CHECK-TF: vector.body:		; CHECK-TF: vector.body:
; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>		; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
; CHECK-TF: %[[VECTOR_RECUR:.]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.]], %vector.body ]		; CHECK-TF: %[[VECTOR_RECUR:.]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.]], %vector.body ]
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines
; CHECK-TF-NORED: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>		; CHECK-TF-NORED: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

; CHECK-TF-NOREC-LABEL: @interleave(		; CHECK-TF-NOREC-LABEL: @interleave(
; CHECK-TF-NOREC: vector.body:		; CHECK-TF-NOREC: vector.body:
; CHECK-TF-NOREC: %[[LOAD:.*]] = load <8 x float>, <8 x float>		; CHECK-TF-NOREC: %[[LOAD:.*]] = load <8 x float>, <8 x float>
; CHECK-TF-NOREC: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>		; CHECK-TF-NOREC: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-TF-NOREC: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>		; CHECK-TF-NOREC: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

		; CHECK-TF-NOREV-LABEL: @interleave(
		; CHECK-TF-NOREV: vector.body:
		; CHECK-TF-NOREV: %[[LOAD:.*]] = load <8 x float>, <8 x float>
		; CHECK-TF-NOREV: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
		; CHECK-TF-NOREV: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

entry:		entry:
br label %for.body		br label %for.body

for.body: ; preds = %entry, %for.body		for.body: ; preds = %entry, %for.body
%i.021 = phi i64 [ %inc, %for.body ], [ 0, %entry ]		%i.021 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%mul = shl nuw nsw i64 %i.021, 1		%mul = shl nuw nsw i64 %i.021, 1
%arrayidx = getelementptr inbounds float, float* %src, i64 %mul		%arrayidx = getelementptr inbounds float, float* %src, i64 %mul
%0 = load float, float* %arrayidx, align 4		%0 = load float, float* %arrayidx, align 4
Show All 12 Lines	for.body: ; preds = %entry, %for.body
%inc = add nuw nsw i64 %i.021, 1		%inc = add nuw nsw i64 %i.021, 1
%exitcond.not = icmp eq i64 %inc, %n		%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.end, label %for.body		br i1 %exitcond.not, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry		for.end: ; preds = %for.body, %entry
ret void		ret void
}		}

		define void @reverse(double* noalias %dst, double* noalias %src) #0 {
		; CHECK-NOTF-LABEL: @reverse(
		; CHECK-NOTF: vector.body:
		; CHECK-NOTF-NOT: %{{.*}} = phi <vscale x 4 x i1>
		; CHECK-NOTF: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double> %18, align 8
		; CHECK-NOTF: %{{.*}} = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]])

		; CHECK-TF-NOREV-LABEL: @reverse(
		; CHECK-TF-NOREV: vector.body:
		; CHECK-TF-NOREV-NOT: %{{.*}} = phi <vscale x 4 x i1>
		; CHECK-TF-NOREV: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double> %18, align 8
		; CHECK-TF-NOREV: %{{.*}} = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]])

		; CHECK-TF-LABEL: @reverse(
		; CHECK-TF: vector.body:
		; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1>
		; CHECK-TF: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
		; CHECK-TF: %[[MASKED_LOAD:.]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>{{.}} <vscale x 2 x i1> %reverse

		; CHECK-TF-NORED-LABEL: @reverse(
		; CHECK-TF-NORED: vector.body:
		; CHECK-TF-NORED: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1>
		; CHECK-TF-NORED: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
		; CHECK-TF-NORED: %[[MASKED_LOAD:.]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>{{.}} <vscale x 2 x i1> %reverse

		; CHECK-TF-NOREC-LABEL: @reverse(
		; CHECK-TF-NOREC: vector.body:
		; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1>
		; CHECK-TF-NOREC: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
		; CHECK-TF-NOREC: %[[MASKED_LOAD:.]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>{{.}} <vscale x 2 x i1> %reverse

		entry:
		br label %for.body

		for.body: ; preds = %entry, %for.body
		%indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
		%arrayidx = getelementptr inbounds double, double* %src, i64 %indvars.iv
		%0 = load double, double* %arrayidx, align 8
		%add = fadd double %0, 1.000000e+00
		%arrayidx2 = getelementptr inbounds double, double* %dst, i64 %indvars.iv
		store double %add, double* %arrayidx2, align 8
		%indvars.iv.next = add nsw i64 %indvars.iv, -1
		%cmp.not = icmp eq i64 %indvars.iv, 0
		br i1 %cmp.not, label %for.end, label %for.body

		for.end: ; preds = %for.body
		ret void
		}

attributes #0 = { "target-features"="+sve" }		attributes #0 = { "target-features"="+sve" }

!0 = distinct !{!0, !1, !2, !3, !4}		!0 = distinct !{!0, !1, !2, !3, !4}
!1 = !{!"llvm.loop.vectorize.width", i32 4}		!1 = !{!"llvm.loop.vectorize.width", i32 4}
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!3 = !{!"llvm.loop.interleave.count", i32 1}		!3 = !{!"llvm.loop.interleave.count", i32 1}
!4 = !{!"llvm.loop.vectorize.enable", i1 true}		!4 = !{!"llvm.loop.vectorize.enable", i1 true}

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][LoopVectorize] Add option to disable tail-folding for reverse loops
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 505441

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][LoopVectorize] Add option to disable tail-folding for reverse loopsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 505441

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll

[SVE][LoopVectorize] Add option to disable tail-folding for reverse loops
ClosedPublic