Diff 366001

clang/include/clang/Basic/TargetInfo.h

Show First 20 Lines • Show All 865 Lines • ▼ Show 20 Lines	virtual void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const = 0;		MacroBuilder &Builder) const = 0;


/// Return information about target-specific builtins for		/// Return information about target-specific builtins for
/// the current primary target, and info about which builtins are non-portable		/// the current primary target, and info about which builtins are non-portable
/// across the current set of primary and secondary targets.		/// across the current set of primary and secondary targets.
virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;		virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;

		/// Returns target-specific min and max values VScale_Range.
		virtual Optional<std::pair<unsigned, unsigned>>
		getVScaleRange(const LangOptions &LangOpts) const {
		return None;
		}
/// The __builtin_clz* and __builtin_ctz* built-in		/// The __builtin_clz* and __builtin_ctz* built-in
/// functions are specified to have undefined results for zero inputs, but		/// functions are specified to have undefined results for zero inputs, but
/// on targets that support these operations in a way that provides		/// on targets that support these operations in a way that provides
/// well-defined results for zero without loss of performance, it is a good		/// well-defined results for zero without loss of performance, it is a good
/// idea to avoid optimizing based on that undef behavior.		/// idea to avoid optimizing based on that undef behavior.
virtual bool isCLZForZeroUndef() const { return true; }		virtual bool isCLZForZeroUndef() const { return true; }

/// Returns the kind of __builtin_va_list type that should be used		/// Returns the kind of __builtin_va_list type that should be used
▲ Show 20 Lines • Show All 707 Lines • Show Last 20 Lines

clang/lib/Basic/Targets/AArch64.h

Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines	void getTargetDefinesARMV86A(const LangOptions &Opts,
MacroBuilder &Builder) const;		MacroBuilder &Builder) const;
void getTargetDefinesARMV87A(const LangOptions &Opts,		void getTargetDefinesARMV87A(const LangOptions &Opts,
MacroBuilder &Builder) const;		MacroBuilder &Builder) const;
void getTargetDefines(const LangOptions &Opts,		void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;		MacroBuilder &Builder) const override;

ArrayRef<Builtin::Info> getTargetBuiltins() const override;		ArrayRef<Builtin::Info> getTargetBuiltins() const override;

		Optional<std::pair<unsigned, unsigned>>
		getVScaleRange(const LangOptions &LangOpts) const override;
		frasercrmckUnsubmitted Not Done Reply Inline Actions This clang-tidy warning needs satisfied. frasercrmck: This clang-tidy warning needs satisfied.

bool hasFeature(StringRef Feature) const override;		bool hasFeature(StringRef Feature) const override;
bool handleTargetFeatures(std::vector<std::string> &Features,		bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;		DiagnosticsEngine &Diags) override;

CallingConvCheckResult checkCallingConvention(CallingConv CC) const override;		CallingConvCheckResult checkCallingConvention(CallingConv CC) const override;

bool isCLZForZeroUndef() const override;		bool isCLZForZeroUndef() const override;

▲ Show 20 Lines • Show All 123 Lines • Show Last 20 Lines

clang/lib/Basic/Targets/AArch64.cpp

Show First 20 Lines • Show All 418 Lines • ▼ Show 20 Lines	void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
}		}
}		}

ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {		ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
return llvm::makeArrayRef(BuiltinInfo, clang::AArch64::LastTSBuiltin -		return llvm::makeArrayRef(BuiltinInfo, clang::AArch64::LastTSBuiltin -
Builtin::FirstTSBuiltin);		Builtin::FirstTSBuiltin);
}		}

		Optional<std::pair<unsigned, unsigned>>
		AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
		if (LangOpts.ArmSveVectorBits) {
		unsigned VScale = LangOpts.ArmSveVectorBits / 128;
		return std::pair<unsigned, unsigned>(VScale, VScale);
		}
		if (hasFeature("sve"))
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: unnecessary curly braces. sdesmalen: nit: unnecessary curly braces.
		return std::pair<unsigned, unsigned>(0, 16);
		return None;
		}

bool AArch64TargetInfo::hasFeature(StringRef Feature) const {		bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
return Feature == "aarch64" \|\| Feature == "arm64" \|\| Feature == "arm" \|\|		return Feature == "aarch64" \|\| Feature == "arm64" \|\| Feature == "arm" \|\|
(Feature == "neon" && (FPU & NeonMode)) \|\|		(Feature == "neon" && (FPU & NeonMode)) \|\|
((Feature == "sve" \|\| Feature == "sve2" \|\| Feature == "sve2-bitperm" \|\|		((Feature == "sve" \|\| Feature == "sve2" \|\| Feature == "sve2-bitperm" \|\|
Feature == "sve2-aes" \|\| Feature == "sve2-sha3" \|\|		Feature == "sve2-aes" \|\| Feature == "sve2-sha3" \|\|
Feature == "sve2-sm4" \|\| Feature == "f64mm" \|\| Feature == "f32mm" \|\|		Feature == "sve2-sm4" \|\| Feature == "f64mm" \|\| Feature == "f32mm" \|\|
Feature == "i8mm" \|\| Feature == "bf16") &&		Feature == "i8mm" \|\| Feature == "bf16") &&
(FPU & SveMode)) \|\|		(FPU & SveMode)) \|\|
▲ Show 20 Lines • Show All 535 Lines • Show Last 20 Lines

clang/lib/CodeGen/CodeGenFunction.cpp

Show First 20 Lines • Show All 477 Lines • ▼ Show 20 Lines	void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// Add the required-vector-width attribute. This contains the max width from:		// Add the required-vector-width attribute. This contains the max width from:
// 1. min-vector-width attribute used in the source program.		// 1. min-vector-width attribute used in the source program.
// 2. Any builtins used that have a vector width specified.		// 2. Any builtins used that have a vector width specified.
// 3. Values passed in and out of inline assembly.		// 3. Values passed in and out of inline assembly.
// 4. Width of vector arguments and return types for this function.		// 4. Width of vector arguments and return types for this function.
// 5. Width of vector aguments and return types for functions called by this		// 5. Width of vector aguments and return types for functions called by this
// function.		// function.
CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));		CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));

// Add vscale attribute if appropriate.		// Add vscale_range attribute if appropriate.
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: `vscale_range` sdesmalen: nit: `vscale_range`
if (getLangOpts().ArmSveVectorBits) {		Optional<std::pair<unsigned, unsigned>> VScaleRange =
		craig.topperUnsubmitted Not Done Reply Inline Actions Is there any way we can check that the target is ARM before checking an "sve" on a target independent interface? If any other target uses the name "sve" this will trigger. craig.topper: Is there any way we can check that the target is ARM before checking an "sve" on a target…
unsigned VScale = getLangOpts().ArmSveVectorBits / 128;		getContext().getTargetInfo().getVScaleRange(getLangOpts());
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),		if (VScaleRange) {
		bsmithUnsubmitted Not Done Reply Inline Actions Is this really what we want? Won't this enable fixed length codegen all of the time? bsmith: Is this really what we want? Won't this enable fixed length codegen all of the time?
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Fixed length codegen is tied to the minimum `vscale` value, so by using `0` here means nothing is known about the minimum `vscale` and thus fixed length codegen will be restricted to 128bit as is the case when no attribute is specified. paulwalker-arm: Fixed length codegen is tied to the minimum `vscale` value, so by using `0` here means nothing…
		bsmithUnsubmitted Not Done Reply Inline Actions Ah right ok, I'd missed that detail. Ignore me then! bsmith: Ah right ok, I'd missed that detail. Ignore me then!
VScale, VScale));		CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
		craig.topperUnsubmitted Not Done Reply Inline Actions Thinking about this more, maybe these values should be returned from a new interface on TargetInfo? Maybe using Optional so you can default to None for other targets? This way you don't have AArch64 specific numbers hardcoded in a file that doesn't belong to AArch64? But maybe the damage was already done with the "/ 128" in the ArmSveVectorBits code above. Maybe you can unify both into a new TargetInfo function? Does TargetInfo have access to LangOpts? craig.topper: Thinking about this more, maybe these values should be returned from a new interface on…
		getLLVMContext(), VScaleRange.getValue().first,
		VScaleRange.getValue().second));
}		}

// If we generated an unreachable return block, delete it now.		// If we generated an unreachable return block, delete it now.
if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) {		if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) {
Builder.ClearInsertionPoint();		Builder.ClearInsertionPoint();
ReturnBlock.getBlock()->eraseFromParent();		ReturnBlock.getBlock()->eraseFromParent();
}		}
if (ReturnValue.isValid()) {		if (ReturnValue.isValid()) {
▲ Show 20 Lines • Show All 2,189 Lines • Show Last 20 Lines

clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c

	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=128			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=128
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=256			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=256
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=512			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=512
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=1024			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=1024
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=2048			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=2048
				// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=128
				// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s \| FileCheck %s -D#VBITS=256
				// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s \| FileCheck %s --check-prefix=CHECK-NONE
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s \| FileCheck %s --check-prefix=CHECK-NONE			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s \| FileCheck %s --check-prefix=CHECK-NONE
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s \| FileCheck %s --check-prefix=CHECK-NONE			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s \| FileCheck %s --check-prefix=CHECK-NONE

	// CHECK-LABEL: @func() #0			// CHECK-LABEL: @func() #0
	// CHECK: attributes #0 = { {{.}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.}} }			// CHECK: attributes #0 = { {{.}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.}} }
	// CHECK-NONE-NOT: vscale_range			// CHECK-NONE: attributes #0 = { {{.}} vscale_range(0,16) {{.}} }
	void func() {}			void func() {}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Show First 20 Lines • Show All 119 Lines • ▼ Show 20 Lines	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
}		}
llvm_unreachable("Unsupported register kind");		llvm_unreachable("Unsupported register kind");
}		}

unsigned getMinVectorRegisterBitWidth() const {		unsigned getMinVectorRegisterBitWidth() const {
return ST->getMinVectorRegisterBitWidth();		return ST->getMinVectorRegisterBitWidth();
}		}

Optional<unsigned> getMaxVScale() const {
if (ST->hasSVE())
return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
return BaseT::getMaxVScale();
}

/// Try to return an estimate cost factor that can be used as a multiplier		/// Try to return an estimate cost factor that can be used as a multiplier
/// when scalarizing an operation for a vector with ElementCount \p VF.		/// when scalarizing an operation for a vector with ElementCount \p VF.
/// For scalable vectors this currently takes the most pessimistic view based		/// For scalable vectors this currently takes the most pessimistic view based
/// upon the maximum possible value for vscale.		/// upon the maximum possible value for vscale.
unsigned getMaxNumElements(ElementCount VF) const {		unsigned getMaxNumElements(ElementCount VF,
		const Function *F = nullptr) const {
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Can this parameter be a `Function`? given there's no real link between this function and LLVM Instructions. paulwalker-arm:* Can this parameter be a `Function*`? given there's no real link between this function and LLVM…
if (!VF.isScalable())		if (!VF.isScalable())
return VF.getFixedValue();		return VF.getFixedValue();
Optional<unsigned> MaxNumVScale = getMaxVScale();
assert(MaxNumVScale && "Expected valid max vscale value");		unsigned MaxNumVScale = 16;
return MaxNumVScale VF.getKnownMinValue();		if (F && F->hasFnAttribute(Attribute::VScaleRange)) {
		unsigned VScaleMax =
		F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second;
		if (VScaleMax > 0)
		MaxNumVScale = VScaleMax;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This can return `0` implying there is no know maximum. With the current code this means `0` will be returned instead of a sensible default. paulwalker-arm: This can return `0` implying there is no know maximum. With the current code this means `0`…
		}

		return MaxNumVScale * VF.getKnownMinValue();
}		}

unsigned getMaxInterleaveFactor(unsigned VF);		unsigned getMaxInterleaveFactor(unsigned VF);

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,		InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,		Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);		TTI::TargetCostKind CostKind);

▲ Show 20 Lines • Show All 181 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,580 Lines • ▼ Show 20 Lines	InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
// The code-generator is currently not able to handle scalable vectors		// The code-generator is currently not able to handle scalable vectors
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting		// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
// it. This change will be removed when code-generation for these types is		// it. This change will be removed when code-generation for these types is
// sufficiently reliable.		// sufficiently reliable.
if (cast<VectorType>(DataTy)->getElementCount() ==		if (cast<VectorType>(DataTy)->getElementCount() ==
ElementCount::getScalable(1))		ElementCount::getScalable(1))
return InstructionCost::getInvalid();		return InstructionCost::getInvalid();

ElementCount LegalVF = LT.second.getVectorElementCount();		ElementCount LegalVF = LT.second.getVectorElementCount();
InstructionCost MemOpCost =		InstructionCost MemOpCost =
		sdesmalenUnsubmitted Not Done Reply Inline Actions This can drop the Optional now. I also think asserting the attribute must be set is a bit of a strong requirement? Maybe we can return an Invalid instead if the attribute is not set. sdesmalen: This can drop the Optional now. I also think asserting the attribute must be set is a bit of a…
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);		getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
return LT.first * MemOpCost * getMaxNumElements(LegalVF);		return LT.first * MemOpCost * getMaxNumElements(LegalVF, I->getFunction());
}		}

bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {		bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();		return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
}		}

InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,		InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment,		MaybeAlign Alignment,
▲ Show 20 Lines • Show All 647 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,634 Lines • ▼ Show 20 Lines	if (any_of(ElementTypesInLoop, [&](Type *Ty) {
return ElementCount::getScalable(0);		return ElementCount::getScalable(0);
}		}

if (Legal->isSafeForAnyVectorWidth())		if (Legal->isSafeForAnyVectorWidth())
return MaxScalableVF;		return MaxScalableVF;

// Limit MaxScalableVF by the maximum safe dependence distance.		// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();		Optional<unsigned> MaxVScale = TTI.getMaxVScale();
		if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
		unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
		.getVScaleRangeArgs()
		.second;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I think you only want to set `MaxVScale` when `VScaleRangeAttr.getVScaleRangeArgs().second` is non-zero. Given this and the above similar comment perhaps there's need for extra tests that cover `vscale_range(2,0)` for example. paulwalker-arm: I think you only want to set `MaxVScale` when `VScaleRangeAttr.getVScaleRangeArgs().second` is…
		if (VScaleMax > 0)
		MaxVScale = VScaleMax;
		}
MaxScalableVF = ElementCount::getScalable(		MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);		MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)		if (!MaxScalableVF)
reportVectorizationInfo(		reportVectorizationInfo(
"Max legal vector width too small, scalable vectorization "		"Max legal vector width too small, scalable vectorization "
"unfeasible.",		"unfeasible.",
"ScalableVFUnfeasible", ORE, TheLoop);		"ScalableVFUnfeasible", ORE, TheLoop);

▲ Show 20 Lines • Show All 4,911 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

	; Check getIntrinsicInstrCost in BasicTTIImpl.h for masked gather			; Check getIntrinsicInstrCost in BasicTTIImpl.h for masked gather

	; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s

	define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {			define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
	; CHECK-LABEL: 'masked_gathers'			; CHECK-LABEL: 'masked_gathers'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128			; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128
	; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64			; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64
	%res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)			%res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
	%res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)			%res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
	Show All 11 Lines

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

	; Check getIntrinsicInstrCost in BasicTTIImpl.h with for masked scatter			; Check getIntrinsicInstrCost in BasicTTIImpl.h with for masked scatter

	; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s

	define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {			define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
	; CHECK-LABEL: 'masked_scatters'			; CHECK-LABEL: 'masked_scatters'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128			; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128
	; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64			; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64
	call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)			call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
	call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)			call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
	Show All 11 Lines

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll

Show All 12 Lines
; double f = b[d]; // Scalarize to form insertelements		; double f = b[d]; // Scalarize to form insertelements
; if (j * f)		; if (j * f)
; a++;		; a++;
; j = f;		; j = f;
; }		; }
; return a;		; return a;
; }		; }
;		;
define i32 @PR33613(double* %b, double %j, i32 %d) {		define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
; CHECK-VF4UF2-LABEL: @PR33613		; CHECK-VF4UF2-LABEL: @PR33613
; CHECK-VF4UF2: vector.body		; CHECK-VF4UF2: vector.body
; CHECK-VF4UF2: %[[VEC_RECUR:.]] = phi <vscale x 4 x double> [ {{.}}, %vector.ph ], [ {{.*}}, %vector.body ]		; CHECK-VF4UF2: %[[VEC_RECUR:.]] = phi <vscale x 4 x double> [ {{.}}, %vector.ph ], [ {{.*}}, %vector.body ]
; CHECK-VF4UF2: %[[SPLICE1:.]] = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> %[[VEC_RECUR]], <vscale x 4 x double> {{.}}, i32 -1)		; CHECK-VF4UF2: %[[SPLICE1:.]] = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> %[[VEC_RECUR]], <vscale x 4 x double> {{.}}, i32 -1)
; CHECK-VF4UF2-NEXT: %[[SPLICE2:.]] = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> %{{.}}, <vscale x 4 x double> %{{.*}}, i32 -1)		; CHECK-VF4UF2-NEXT: %[[SPLICE2:.]] = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> %{{.}}, <vscale x 4 x double> %{{.*}}, i32 -1)
; CHECK-VF4UF2-NOT: insertelement <vscale x 4 x double>		; CHECK-VF4UF2-NOT: insertelement <vscale x 4 x double>
; CHECK-VF4UF2: middle.block		; CHECK-VF4UF2: middle.block
entry:		entry:
Show All 31 Lines
; void PR34711(short[2] a, int b, int *c, int n) {		; void PR34711(short[2] a, int b, int *c, int n) {
; for(int i = 0; i < n; i++) {		; for(int i = 0; i < n; i++) {
; c[i] = 7;		; c[i] = 7;
; b[i] = (a[i][0] * a[i][1]);		; b[i] = (a[i][0] * a[i][1]);
; }		; }
; }		; }
;		;
; Check that the sext sank after the load in the vector loop.		; Check that the sext sank after the load in the vector loop.
define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {		define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
; CHECK-VF4UF1-LABEL: @PR34711		; CHECK-VF4UF1-LABEL: @PR34711
; CHECK-VF4UF1: vector.body		; CHECK-VF4UF1: vector.body
; CHECK-VF4UF1: %[[VEC_RECUR:.]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.]], %vector.body ]		; CHECK-VF4UF1: %[[VEC_RECUR:.]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.]], %vector.body ]
; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16> {{.}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> undef)		; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16> {{.}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> undef)
; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[MGATHER]], i32 -1)		; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[MGATHER]], i32 -1)
; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>		; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>
; CHECK-VF4UF1-NEXT: %[[SXT2:.*]] = sext <vscale x 4 x i16> %[[MGATHER]] to <vscale x 4 x i32>		; CHECK-VF4UF1-NEXT: %[[SXT2:.*]] = sext <vscale x 4 x i16> %[[MGATHER]] to <vscale x 4 x i32>
; CHECK-VF4UF1-NEXT: mul nsw <vscale x 4 x i32> %[[SXT2]], %[[SXT1]]		; CHECK-VF4UF1-NEXT: mul nsw <vscale x 4 x i32> %[[SXT2]], %[[SXT1]]
Show All 17 Lines	for.body:
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1		%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n		%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0

for.end:		for.end:
ret void		ret void
}		}

		attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1}		!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S 2>%t \| FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED		; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S 2>%t \| FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t \| FileCheck %s --check-prefix=CHECK-UNORDERED		; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t \| FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t \| FileCheck %s --check-prefix=CHECK-ORDERED		; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t \| FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t \| FileCheck %s --check-prefix=CHECK-UNORDERED		; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t \| FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t \| FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED		; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t \| FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED

define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {		define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict		; CHECK-ORDERED-LABEL: @fadd_strict
; CHECK-ORDERED: vector.body:		; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]		; CHECK-ORDERED: %[[VEC_PHI:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]
; CHECK-ORDERED: %[[LOAD:.]] = load <vscale x 8 x float>, <vscale x 8 x float>		; CHECK-ORDERED: %[[LOAD:.]] = load <vscale x 8 x float>, <vscale x 8 x float>
; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[VEC_PHI]], <vscale x 8 x float> %[[LOAD]])		; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[VEC_PHI]], <vscale x 8 x float> %[[LOAD]])
; CHECK-ORDERED: for.end		; CHECK-ORDERED: for.end
; CHECK-ORDERED: %[[PHI:.]] = phi float [ %[[SCALAR:.]], %for.body ], [ %[[RDX]], %middle.block ]		; CHECK-ORDERED: %[[PHI:.]] = phi float [ %[[SCALAR:.]], %for.body ], [ %[[RDX]], %middle.block ]
; CHECK-ORDERED: ret float %[[PHI]]		; CHECK-ORDERED: ret float %[[PHI]]
Show All 28 Lines	for.body:
%iv.next = add nuw nsw i64 %iv, 1		%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n		%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

for.end:		for.end:
ret float %add		ret float %add
}		}

define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {		define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_unroll		; CHECK-ORDERED-LABEL: @fadd_strict_unroll
; CHECK-ORDERED: vector.body:		; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI1:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.]], %vector.body ]		; CHECK-ORDERED: %[[VEC_PHI1:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.]], %vector.body ]
; CHECK-ORDERED-NOT: phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4]], %vector.body ]		; CHECK-ORDERED-NOT: phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4]], %vector.body ]
; CHECK-ORDERED: %[[LOAD1:.]] = load <vscale x 8 x float>, <vscale x 8 x float>		; CHECK-ORDERED: %[[LOAD1:.]] = load <vscale x 8 x float>, <vscale x 8 x float>
; CHECK-ORDERED: %[[LOAD2:.]] = load <vscale x 8 x float>, <vscale x 8 x float>		; CHECK-ORDERED: %[[LOAD2:.]] = load <vscale x 8 x float>, <vscale x 8 x float>
; CHECK-ORDERED: %[[LOAD3:.]] = load <vscale x 8 x float>, <vscale x 8 x float>		; CHECK-ORDERED: %[[LOAD3:.]] = load <vscale x 8 x float>, <vscale x 8 x float>
; CHECK-ORDERED: %[[LOAD4:.]] = load <vscale x 8 x float>, <vscale x 8 x float>		; CHECK-ORDERED: %[[LOAD4:.]] = load <vscale x 8 x float>, <vscale x 8 x float>
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	for.body:
%iv.next = add nuw nsw i64 %iv, 1		%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n		%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1

for.end:		for.end:
ret float %add		ret float %add
}		}

define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {		define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_interleave		; CHECK-ORDERED-LABEL: @fadd_strict_interleave
; CHECK-ORDERED: entry		; CHECK-ORDERED: entry
; CHECK-ORDERED: %[[ARRAYIDX:.]] = getelementptr inbounds float, float %a, i64 1		; CHECK-ORDERED: %[[ARRAYIDX:.]] = getelementptr inbounds float, float %a, i64 1
; CHECK-ORDERED: %[[LOAD1:.]] = load float, float %a		; CHECK-ORDERED: %[[LOAD1:.]] = load float, float %a
; CHECK-ORDERED: %[[LOAD2:.]] = load float, float %[[ARRAYIDX]]		; CHECK-ORDERED: %[[LOAD2:.]] = load float, float %[[ARRAYIDX]]
; CHECK-ORDERED: vector.ph		; CHECK-ORDERED: vector.ph
; CHECK-ORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()		; CHECK-ORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	for.body:
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2

for.end:		for.end:
store float %add1, float* %a, align 4		store float %add1, float* %a, align 4
store float %add2, float* %arrayidxa, align 4		store float %add2, float* %arrayidxa, align 4
ret void		ret void
}		}

define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {		define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_of_sum		; CHECK-ORDERED-LABEL: @fadd_of_sum
; CHECK-ORDERED: vector.body		; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI1:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]		; CHECK-ORDERED: %[[VEC_PHI1:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]
; CHECK-ORDERED: %[[LOAD1:.]] = load <vscale x 4 x float>, <vscale x 4 x float>		; CHECK-ORDERED: %[[LOAD1:.]] = load <vscale x 4 x float>, <vscale x 4 x float>
; CHECK-ORDERED: %[[LOAD2:.]] = load <vscale x 4 x float>, <vscale x 4 x float>		; CHECK-ORDERED: %[[LOAD2:.]] = load <vscale x 4 x float>, <vscale x 4 x float>
; CHECK-ORDERED: %[[ADD:.*]] = fadd <vscale x 4 x float> %[[LOAD1]], %[[LOAD2]]		; CHECK-ORDERED: %[[ADD:.*]] = fadd <vscale x 4 x float> %[[LOAD1]], %[[LOAD2]]
; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI1]], <vscale x 4 x float> %[[ADD]])		; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI1]], <vscale x 4 x float> %[[ADD]])
; CHECK-ORDERED: for.end.loopexit		; CHECK-ORDERED: for.end.loopexit
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	for.body: ; preds = %for.body
%exitcond.not = icmp eq i64 %iv.next, %n		%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2

for.end: ; preds = %for.body, %entry		for.end: ; preds = %for.body, %entry
%res = phi float [ 0.000000e+00, %entry ], [ %rdx, %for.body ]		%res = phi float [ 0.000000e+00, %entry ], [ %rdx, %for.body ]
ret float %res		ret float %res
}		}

define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {		define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_conditional		; CHECK-ORDERED-LABEL: @fadd_conditional
; CHECK-ORDERED: vector.body		; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI:.]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]		; CHECK-ORDERED: %[[VEC_PHI:.]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]
; CHECK-ORDERED: %[[LOAD:.]] = load <vscale x 4 x float>, <vscale x 4 x float>		; CHECK-ORDERED: %[[LOAD:.]] = load <vscale x 4 x float>, <vscale x 4 x float>
; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-ORDERED: %[[MASKED_LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float> {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison)		; CHECK-ORDERED: %[[MASKED_LOAD:.]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float> {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison)
; CHECK-ORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK-ORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-ORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]		; CHECK-ORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	for.inc:
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2

for.end:		for.end:
%rdx = phi float [ %fadd, %for.inc ]		%rdx = phi float [ %fadd, %for.inc ]
ret float %rdx		ret float %rdx
}		}

; Negative test - loop contains multiple fadds which we cannot safely reorder		; Negative test - loop contains multiple fadds which we cannot safely reorder
define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {		define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_multiple		; CHECK-ORDERED-LABEL: @fadd_multiple
; CHECK-ORDERED-NOT: vector.body		; CHECK-ORDERED-NOT: vector.body

; CHECK-UNORDERED-LABEL: @fadd_multiple		; CHECK-UNORDERED-LABEL: @fadd_multiple
; CHECK-UNORDERED: vector.body		; CHECK-UNORDERED: vector.body
; CHECK-UNORDERED: %[[PHI:.]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float -0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.]], %vector.body ]		; CHECK-UNORDERED: %[[PHI:.]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float -0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.]], %vector.body ]
; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>		; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>
; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <vscale x 8 x float> %[[PHI]], %[[VEC_LOAD1]]		; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <vscale x 8 x float> %[[PHI]], %[[VEC_LOAD1]]
Show All 30 Lines	for.body: ; preds = %entry, %for.body
%exitcond.not = icmp eq i64 %iv.next, %n		%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

for.end: ; preds = %for.body		for.end: ; preds = %for.body
%rdx = phi float [ %add3, %for.body ]		%rdx = phi float [ %add3, %for.body ]
ret float %rdx		ret float %rdx
}		}

		attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !3, !6, !8}		!0 = distinct !{!0, !3, !6, !8}
!1 = distinct !{!1, !3, !7, !8}		!1 = distinct !{!1, !3, !7, !8}
!2 = distinct !{!2, !4, !6, !8}		!2 = distinct !{!2, !4, !6, !8}
!3 = !{!"llvm.loop.vectorize.width", i32 8}		!3 = !{!"llvm.loop.vectorize.width", i32 8}
!4 = !{!"llvm.loop.vectorize.width", i32 4}		!4 = !{!"llvm.loop.vectorize.width", i32 4}
!5 = !{!"llvm.loop.vectorize.width", i32 2}		!5 = !{!"llvm.loop.vectorize.width", i32 2}
!6 = !{!"llvm.loop.interleave.count", i32 1}		!6 = !{!"llvm.loop.interleave.count", i32 1}
!7 = !{!"llvm.loop.interleave.count", i32 4}		!7 = !{!"llvm.loop.interleave.count", i32 4}
!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll

; REQUIRES: asserts		; REQUIRES: asserts
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON		; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED		; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED		; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW		; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 \| FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW

; Test that the MaxVF for the following loop, that has no dependence distances,		; Test that the MaxVF for the following loop, that has no dependence distances,
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16		; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
; (maximized bandwidth for i8 in the loop).		; (maximized bandwidth for i8 in the loop).
define void @test0(i32* %a, i8* %b, i32* %c) {		define void @test0(i32* %a, i8* %b, i32* %c) #0 {
; CHECK: LV: Checking a loop in "test0"		; CHECK: LV: Checking a loop in "test0"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4		; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4		; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4		; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4		; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4		; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16		; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16
Show All 16 Lines	loop:
br i1 %exitcond.not, label %exit, label %loop		br i1 %exitcond.not, label %exit, label %loop

exit:		exit:
ret void		ret void
}		}

; Test that the MaxVF for the following loop, with a dependence distance		; Test that the MaxVF for the following loop, with a dependence distance
; of 64 elements, is calculated as (maxvscale = 16) * 4.		; of 64 elements, is calculated as (maxvscale = 16) * 4.
define void @test1(i32* %a, i8* %b) {		define void @test1(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test1"		; CHECK: LV: Checking a loop in "test1"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4		; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4		; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4		; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4		; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4		; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4		; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4
Show All 17 Lines	loop:
br i1 %exitcond.not, label %exit, label %loop		br i1 %exitcond.not, label %exit, label %loop

exit:		exit:
ret void		ret void
}		}

; Test that the MaxVF for the following loop, with a dependence distance		; Test that the MaxVF for the following loop, with a dependence distance
; of 32 elements, is calculated as (maxvscale = 16) * 2.		; of 32 elements, is calculated as (maxvscale = 16) * 2.
define void @test2(i32* %a, i8* %b) {		define void @test2(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test2"		; CHECK: LV: Checking a loop in "test2"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2		; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_ON: LV: Selecting VF: 4		; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2		; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4		; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4		; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2		; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2
Show All 17 Lines	loop:
br i1 %exitcond.not, label %exit, label %loop		br i1 %exitcond.not, label %exit, label %loop

exit:		exit:
ret void		ret void
}		}

; Test that the MaxVF for the following loop, with a dependence distance		; Test that the MaxVF for the following loop, with a dependence distance
; of 16 elements, is calculated as (maxvscale = 16) * 1.		; of 16 elements, is calculated as (maxvscale = 16) * 1.
define void @test3(i32* %a, i8* %b) {		define void @test3(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test3"		; CHECK: LV: Checking a loop in "test3"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1		; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_ON: LV: Selecting VF: 4		; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1		; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4		; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4		; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1		; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1
Show All 17 Lines	loop:
br i1 %exitcond.not, label %exit, label %loop		br i1 %exitcond.not, label %exit, label %loop

exit:		exit:
ret void		ret void
}		}

; Test the fallback mechanism when scalable vectors are not feasible due		; Test the fallback mechanism when scalable vectors are not feasible due
; to e.g. dependence distance.		; to e.g. dependence distance.
define void @test4(i32* %a, i32* %b) {		define void @test4(i32* %a, i32* %b) #0 {
; CHECK: LV: Checking a loop in "test4"		; CHECK: LV: Checking a loop in "test4"
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON: LV: Selecting VF: 4		; CHECK_SCALABLE_ON: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4		; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4		; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF		; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF
Show All 13 Lines	loop:
store i32 %add, i32* %arrayidx5, align 4		store i32 %add, i32* %arrayidx5, align 4
%iv.next = add nuw nsw i64 %iv, 1		%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024		%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %loop		br i1 %exitcond.not, label %exit, label %loop

exit:		exit:
ret void		ret void
}		}

		attributes #0 = { vscale_range(0, 16) }

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll

	Show All 38 Lines

	; CHECK-DBG: LV: Checking a loop in "test1"			; CHECK-DBG: LV: Checking a loop in "test1"
	; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.			; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.
	; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible.			; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible.
	; CHECK-DBG: LV: The max safe fixed VF is: 8.			; CHECK-DBG: LV: The max safe fixed VF is: 8.
	; CHECK-DBG: LV: Selecting VF: 4.			; CHECK-DBG: LV: Selecting VF: 4.
	; CHECK-LABEL: @test1			; CHECK-LABEL: @test1
	; CHECK: <4 x i32>			; CHECK: <4 x i32>
	define void @test1(i32* %a, i32* %b) {			define void @test1(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 27 Lines

	; CHECK-DBG: LV: Checking a loop in "test2"			; CHECK-DBG: LV: Checking a loop in "test2"
	; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.			; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.
	; CHECK-DBG: LV: The max safe fixed VF is: 4.			; CHECK-DBG: LV: The max safe fixed VF is: 4.
	; CHECK-DBG: LV: User VF=vscale x 8 is unsafe. Ignoring scalable UserVF.			; CHECK-DBG: LV: User VF=vscale x 8 is unsafe. Ignoring scalable UserVF.
	; CHECK-DBG: LV: Selecting VF: 4.			; CHECK-DBG: LV: Selecting VF: 4.
	; CHECK-LABEL: @test2			; CHECK-LABEL: @test2
	; CHECK: <4 x i32>			; CHECK: <4 x i32>
	define void @test2(i32* %a, i32* %b) {			define void @test2(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 30 Lines
	;			;
	; Max fixed VF=32, Max scalable VF=2, safe to vectorize.			; Max fixed VF=32, Max scalable VF=2, safe to vectorize.

	; CHECK-DBG-LABEL: LV: Checking a loop in "test3"			; CHECK-DBG-LABEL: LV: Checking a loop in "test3"
	; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.			; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.
	; CHECK-DBG: LV: Using user VF vscale x 2.			; CHECK-DBG: LV: Using user VF vscale x 2.
	; CHECK-LABEL: @test3			; CHECK-LABEL: @test3
	; CHECK: <vscale x 2 x i32>			; CHECK: <vscale x 2 x i32>
	define void @test3(i32* %a, i32* %b) {			define void @test3(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 34 Lines
	; CHECK-DBG-LABEL: LV: Checking a loop in "test4"			; CHECK-DBG-LABEL: LV: Checking a loop in "test4"
	; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.			; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.
	; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.			; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
	; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe. Ignoring the hint to let the compiler pick a suitable VF.			; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe. Ignoring the hint to let the compiler pick a suitable VF.
	; CHECK-DBG: Found feasible scalable VF = vscale x 2			; CHECK-DBG: Found feasible scalable VF = vscale x 2
	; CHECK-DBG: LV: Selecting VF: 4.			; CHECK-DBG: LV: Selecting VF: 4.
	; CHECK-LABEL: @test4			; CHECK-LABEL: @test4
	; CHECK: <4 x i32>			; CHECK: <4 x i32>
	define void @test4(i32* %a, i32* %b) {			define void @test4(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 30 Lines
	;			;
	; Max fixed VF=128, Max scalable VF=8, safe to vectorize.			; Max fixed VF=128, Max scalable VF=8, safe to vectorize.

	; CHECK-DBG-LABEL: LV: Checking a loop in "test5"			; CHECK-DBG-LABEL: LV: Checking a loop in "test5"
	; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.			; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.
	; CHECK-DBG: LV: Using user VF vscale x 4			; CHECK-DBG: LV: Using user VF vscale x 4
	; CHECK-LABEL: @test5			; CHECK-LABEL: @test5
	; CHECK: <vscale x 4 x i32>			; CHECK: <vscale x 4 x i32>
	define void @test5(i32* %a, i32* %b) {			define void @test5(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 33 Lines
	; CHECK-DBG-LABEL: LV: Checking a loop in "test6"			; CHECK-DBG-LABEL: LV: Checking a loop in "test6"
	; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.			; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.
	; CHECK-DBG: LV: User VF=vscale x 16 is unsafe. Ignoring scalable UserVF.			; CHECK-DBG: LV: User VF=vscale x 16 is unsafe. Ignoring scalable UserVF.
	; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe. Ignoring the hint to let the compiler pick a suitable VF.			; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe. Ignoring the hint to let the compiler pick a suitable VF.
	; CHECK-DBG: LV: Found feasible scalable VF = vscale x 4			; CHECK-DBG: LV: Found feasible scalable VF = vscale x 4
	; CHECK-DBG: Selecting VF: 4.			; CHECK-DBG: Selecting VF: 4.
	; CHECK-LABEL: @test6			; CHECK-LABEL: @test6
	; CHECK: <4 x i32>			; CHECK: <4 x i32>
	define void @test6(i32* %a, i32* %b) {			define void @test6(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 17 Lines
	; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_sve"			; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_sve"
	; CHECK-NO-SVE-REMARKS: LV: Disabling scalable vectorization, because target does not support scalable vectors.			; CHECK-NO-SVE-REMARKS: LV: Disabling scalable vectorization, because target does not support scalable vectors.
	; CHECK-NO-SVE-REMARKS: remark: <unknown>:0:0: Disabling scalable vectorization, because target does not support scalable vectors.			; CHECK-NO-SVE-REMARKS: remark: <unknown>:0:0: Disabling scalable vectorization, because target does not support scalable vectors.
	; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.			; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
	; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.			; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
	; CHECK-NO-SVE-LABEL: @test_no_sve			; CHECK-NO-SVE-LABEL: @test_no_sve
	; CHECK-NO-SVE: <4 x i32>			; CHECK-NO-SVE: <4 x i32>
	; CHECK-NO-SVE-NOT: <vscale x 4 x i32>			; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
	define void @test_no_sve(i32* %a, i32* %b) {			define void @test_no_sve(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	Show All 16 Lines
	; supported but max vscale is undefined.			; supported but max vscale is undefined.
	;			;
	; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_max_vscale"			; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_max_vscale"
	; CHECK-NO-SVE-REMARKS: The max safe fixed VF is: 4.			; CHECK-NO-SVE-REMARKS: The max safe fixed VF is: 4.
	; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.			; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
	; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.			; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
	; CHECK-NO-SVE-LABEL: @test_no_max_vscale			; CHECK-NO-SVE-LABEL: @test_no_max_vscale
	; CHECK-NO-SVE: <4 x i32>			; CHECK-NO-SVE: <4 x i32>
	define void @test_no_max_vscale(i32* %a, i32* %b) {			define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv			%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
	%0 = load i32, i32* %arrayidx, align 4			%0 = load i32, i32* %arrayidx, align 4
	%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv			%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
	%1 = load i32, i32* %arrayidx2, align 4			%1 = load i32, i32* %arrayidx2, align 4
	%add = add nsw i32 %1, %0			%add = add nsw i32 %1, %0
	%2 = add nuw nsw i64 %iv, 4			%2 = add nuw nsw i64 %iv, 4
	%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2			%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
	store i32 %add, i32* %arrayidx5, align 4			store i32 %add, i32* %arrayidx5, align 4
	%iv.next = add nuw nsw i64 %iv, 1			%iv.next = add nuw nsw i64 %iv, 1
	%exitcond.not = icmp eq i64 %iv.next, 1024			%exitcond.not = icmp eq i64 %iv.next, 1024
	br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21			br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21

	exit:			exit:
	ret void			ret void
	}			}

				attributes #0 = { vscale_range(0, 16) }
	!21 = !{!21, !22, !23}			!21 = !{!21, !22, !23}
	!22 = !{!"llvm.loop.vectorize.width", i32 4}			!22 = !{!"llvm.loop.vectorize.width", i32 4}
	!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}			!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - \| FileCheck %s		; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - \| FileCheck %s

define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) {		define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_i32i32i16		; CHECK-LABEL: @cond_inv_load_i32i32i16
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK: %[[INVINS:.]] = insertelement <vscale x 4 x i16> poison, i16* %inv, i32 0		; CHECK: %[[INVINS:.]] = insertelement <vscale x 4 x i16> poison, i16* %inv, i32 0
; CHECK: %[[INVSPLAT:.]] = shufflevector <vscale x 4 x i16> %[[INVINS]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer		; CHECK: %[[INVSPLAT:.]] = shufflevector <vscale x 4 x i16> %[[INVINS]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[GEPCOND:.]] = getelementptr inbounds i32, i32 %cond, i64 %index		; CHECK: %[[GEPCOND:.]] = getelementptr inbounds i32, i32 %cond, i64 %index
; CHECK-NEXT: %[[GEPCOND2:.]] = bitcast i32 %[[GEPCOND]] to <vscale x 4 x i32>*		; CHECK-NEXT: %[[GEPCOND2:.]] = bitcast i32 %[[GEPCOND]] to <vscale x 4 x i32>*
; CHECK-NEXT: %[[CONDVALS:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %[[GEPCOND2]], align 4		; CHECK-NEXT: %[[CONDVALS:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %[[GEPCOND2]], align 4
Show All 22 Lines	for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i64 %i.07, 1		%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n		%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0

exit: ; preds = %for.inc		exit: ; preds = %for.inc
ret void		ret void
}		}

define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) {		define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_f64f64f64		; CHECK-LABEL: @cond_inv_load_f64f64f64
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK: %[[INVINS:.]] = insertelement <vscale x 4 x double> poison, double* %inv, i32 0		; CHECK: %[[INVINS:.]] = insertelement <vscale x 4 x double> poison, double* %inv, i32 0
; CHECK: %[[INVSPLAT:.]] = shufflevector <vscale x 4 x double> %[[INVINS]], <vscale x 4 x double*> poison, <vscale x 4 x i32> zeroinitializer		; CHECK: %[[INVSPLAT:.]] = shufflevector <vscale x 4 x double> %[[INVINS]], <vscale x 4 x double*> poison, <vscale x 4 x i32> zeroinitializer
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[GEPCOND:.]] = getelementptr inbounds double, double %cond, i64 %index		; CHECK: %[[GEPCOND:.]] = getelementptr inbounds double, double %cond, i64 %index
; CHECK-NEXT: %[[GEPCOND2:.]] = bitcast double %[[GEPCOND]] to <vscale x 4 x double>*		; CHECK-NEXT: %[[GEPCOND2:.]] = bitcast double %[[GEPCOND]] to <vscale x 4 x double>*
; CHECK-NEXT: %[[CONDVALS:.]] = load <vscale x 4 x double>, <vscale x 4 x double> %[[GEPCOND2]], align 8		; CHECK-NEXT: %[[CONDVALS:.]] = load <vscale x 4 x double>, <vscale x 4 x double> %[[GEPCOND2]], align 8
Show All 20 Lines	for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i64 %i.08, 1		%inc = add nuw nsw i64 %i.08, 1
%exitcond.not = icmp eq i64 %inc, %n		%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0

exit: ; preds = %for.inc		exit: ; preds = %for.inc
ret void		ret void
}		}

define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) {		define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 {
; CHECK-LABEL: @invariant_load_cond		; CHECK-LABEL: @invariant_load_cond
; CHECK: vector.body		; CHECK: vector.body
; CHECK: %[[GEP:.]] = getelementptr inbounds i32, i32 %b, i64 42		; CHECK: %[[GEP:.]] = getelementptr inbounds i32, i32 %b, i64 42
; CHECK-NEXT: %[[SPLATINS:.]] = insertelement <vscale x 4 x i32> poison, i32* %[[GEP]], i32 0		; CHECK-NEXT: %[[SPLATINS:.]] = insertelement <vscale x 4 x i32> poison, i32* %[[GEP]], i32 0
; CHECK-NEXT: %[[SPLAT:.]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer		; CHECK-NEXT: %[[SPLAT:.]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
; CHECK: %[[LOAD:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32>		; CHECK: %[[LOAD:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32>
; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK: %[[MASKED_LOAD:.]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[BITCAST:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> poison)		; CHECK: %[[MASKED_LOAD:.]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[BITCAST:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> poison)
Show All 24 Lines	for.inc:
%iv.next = add nuw nsw i64 %iv, 1		%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n		%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

for.end:		for.end:
ret void		ret void
}		}

		attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}		!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}		!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}		!2 = !{!"llvm.loop.vectorize.width", i32 4}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.interleave.count", i32 1}		!4 = !{!"llvm.loop.interleave.count", i32 1}
!5 = !{!"llvm.loop.vectorize.enable", i1 true}		!5 = !{!"llvm.loop.vectorize.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - \| FileCheck %s		; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - \| FileCheck %s

define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) {		define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
; CHECK-LABEL: @gather_nxv4i32_ind64		; CHECK-LABEL: @gather_nxv4i32_ind64
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[IND:.]] = load <vscale x 4 x i64>, <vscale x 4 x i64>		; CHECK: %[[IND:.]] = load <vscale x 4 x i64>, <vscale x 4 x i64>
; CHECK: %[[PTRS:.]] = getelementptr inbounds float, float %a, <vscale x 4 x i64> %[[IND]]		; CHECK: %[[PTRS:.]] = getelementptr inbounds float, float %a, <vscale x 4 x i64> %[[IND]]
; CHECK: %[[GLOAD:.]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float> %[[PTRS]]		; CHECK: %[[GLOAD:.]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float> %[[PTRS]]
; CHECK: store <vscale x 4 x float> %[[GLOAD]], <vscale x 4 x float>*		; CHECK: store <vscale x 4 x float> %[[GLOAD]], <vscale x 4 x float>*
entry:		entry:
br label %for.body		br label %for.body
Show All 12 Lines

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry		for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void		ret void
}		}

; NOTE: I deliberately chose '%b' as an array of i32 indices, since the		; NOTE: I deliberately chose '%b' as an array of i32 indices, since the
; additional 'sext' in the for.body loop exposes additional code paths		; additional 'sext' in the for.body loop exposes additional code paths
; during vectorisation.		; during vectorisation.
define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) {		define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 {
; CHECK-LABEL: @scatter_nxv4i32_ind32		; CHECK-LABEL: @scatter_nxv4i32_ind32
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[VALS:.*]] = load <vscale x 4 x float>		; CHECK: %[[VALS:.*]] = load <vscale x 4 x float>
; CHECK: %[[IND:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %7, align 4		; CHECK: %[[IND:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %7, align 4
; CHECK: %[[EXTIND:.*]] = sext <vscale x 4 x i32> %[[IND]] to <vscale x 4 x i64>		; CHECK: %[[EXTIND:.*]] = sext <vscale x 4 x i32> %[[IND]] to <vscale x 4 x i64>
; CHECK: %[[PTRS:.]] = getelementptr inbounds float, float %a, <vscale x 4 x i64> %[[EXTIND]]		; CHECK: %[[PTRS:.]] = getelementptr inbounds float, float %a, <vscale x 4 x i64> %[[EXTIND]]
; CHECK: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %[[VALS]], <vscale x 4 x float*> %[[PTRS]]		; CHECK: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %[[VALS]], <vscale x 4 x float*> %[[PTRS]]
entry:		entry:
Show All 11 Lines	for.body: ; preds = %entry, %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1		%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %n		%exitcond.not = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0

for.cond.cleanup: ; preds = %for.body, %entry		for.cond.cleanup: ; preds = %for.body, %entry
ret void		ret void
}		}

define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) {		define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-LABEL: @scatter_inv_nxv4i32		; CHECK-LABEL: @scatter_inv_nxv4i32
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK: %[[INS:.]] = insertelement <vscale x 4 x i32> poison, i32* %inv, i32 0		; CHECK: %[[INS:.]] = insertelement <vscale x 4 x i32> poison, i32* %inv, i32 0
; CHECK: %[[PTRSPLAT:.]] = shufflevector <vscale x 4 x i32> %[[INS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer		; CHECK: %[[PTRSPLAT:.]] = shufflevector <vscale x 4 x i32> %[[INS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[VALS:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %5, align 4		; CHECK: %[[VALS:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %5, align 4
; CHECK: %[[MASK:.*]] = icmp ne <vscale x 4 x i32> %[[VALS]],		; CHECK: %[[MASK:.*]] = icmp ne <vscale x 4 x i32> %[[VALS]],
; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32({{.}}, <vscale x 4 x i32> %[[PTRSPLAT]], i32 4, <vscale x 4 x i1> %[[MASK]])		; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32({{.}}, <vscale x 4 x i32> %[[PTRSPLAT]], i32 4, <vscale x 4 x i1> %[[MASK]])
Show All 15 Lines	for.inc: ; preds = %for.body, %if.then
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1		%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %n		%exitcond.not = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry		for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void		ret void
}		}

define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) {		define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @gather_inv_nxv4i32		; CHECK-LABEL: @gather_inv_nxv4i32
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK: %[[INS:.]] = insertelement <vscale x 4 x i32> poison, i32* %inv, i32 0		; CHECK: %[[INS:.]] = insertelement <vscale x 4 x i32> poison, i32* %inv, i32 0
; CHECK: %[[PTRSPLAT:.]] = shufflevector <vscale x 4 x i32> %[[INS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer		; CHECK: %[[PTRSPLAT:.]] = shufflevector <vscale x 4 x i32> %[[INS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[VALS:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %5, align 4		; CHECK: %[[VALS:.]] = load <vscale x 4 x i32>, <vscale x 4 x i32> %5, align 4
; CHECK: %[[MASK:.*]] = icmp sgt <vscale x 4 x i32> %[[VALS]],		; CHECK: %[[MASK:.*]] = icmp sgt <vscale x 4 x i32> %[[VALS]],
; CHECK: %{{.}} = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[PTRSPLAT]], i32 4, <vscale x 4 x i1> %[[MASK]]		; CHECK: %{{.}} = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[PTRSPLAT]], i32 4, <vscale x 4 x i1> %[[MASK]]
Show All 18 Lines	for.inc: ; preds = %for.body, %if.then
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0

for.cond.cleanup: ; preds = %for.inc, %entry		for.cond.cleanup: ; preds = %for.inc, %entry
ret void		ret void
}		}



define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {		define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-LABEL: @gather_nxv4i32_ind64_stride2		; CHECK-LABEL: @gather_nxv4i32_ind64_stride2
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK: %[[IDX:.]] = phi i64 [ 0, %vector.ph ], [ %{{.}}, %vector.body ]		; CHECK: %[[IDX:.]] = phi i64 [ 0, %vector.ph ], [ %{{.}}, %vector.body ]
; CHECK-DAG: %[[STEP:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()		; CHECK-DAG: %[[STEP:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
; CHECK-DAG: %[[IDXSPLATINS:.*]] = insertelement <vscale x 4 x i64> poison, i64 %[[IDX]], i32 0		; CHECK-DAG: %[[IDXSPLATINS:.*]] = insertelement <vscale x 4 x i64> poison, i64 %[[IDX]], i32 0
; CHECK-DAG: %[[IDXSPLAT:.*]] = shufflevector <vscale x 4 x i64> %[[IDXSPLATINS]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer		; CHECK-DAG: %[[IDXSPLAT:.*]] = shufflevector <vscale x 4 x i64> %[[IDXSPLATINS]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK: %[[ADD:.*]] = add <vscale x 4 x i64> %[[IDXSPLAT]], %[[STEP]]		; CHECK: %[[ADD:.*]] = add <vscale x 4 x i64> %[[IDXSPLAT]], %[[STEP]]
; CHECK: %[[MUL:.*]] = shl <vscale x 4 x i64> %[[ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK: %[[MUL:.*]] = shl <vscale x 4 x i64> %[[ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
Show All 12 Lines	for.body: ; preds = %entry, %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1		%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %n		%exitcond.not = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body		br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry		for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void		ret void
}		}

		attributes #0 = { vscale_range(0, 16) }

!0 = distinct !{!0, !1, !2, !3, !4, !5}		!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}		!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}		!2 = !{!"llvm.loop.vectorize.width", i32 4}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.interleave.count", i32 1}		!4 = !{!"llvm.loop.interleave.count", i32 1}
!5 = !{!"llvm.loop.vectorize.enable", i1 true}		!5 = !{!"llvm.loop.vectorize.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll

Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i64 %i.09, 1		%inc = add nuw nsw i64 %i.09, 1
%exitcond.not = icmp eq i64 %inc, %N		%exitcond.not = icmp eq i64 %inc, %N
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

for.end: ; preds = %for.inc, %entry		for.end: ; preds = %for.inc, %entry
ret void		ret void
}		}

attributes #0 = { "target-features"="+neon,+sve" }		attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }

!0 = distinct !{!0, !1, !2, !3, !4, !5}		!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}		!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}		!2 = !{!"llvm.loop.vectorize.width", i32 4}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.vectorize.enable", i1 true}		!4 = !{!"llvm.loop.vectorize.enable", i1 true}
!5 = !{!"llvm.loop.interleave.count", i32 1}		!5 = !{!"llvm.loop.interleave.count", i32 1}

llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s \| FileCheck %s		; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s \| FileCheck %s

define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {		define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_i32(		; CHECK-LABEL: @stride7_i32(
; CHECK: vector.body		; CHECK: vector.body
; CHECK: %[[VEC_IND:.]] = phi <vscale x 4 x i64> [ %{{.}}, %vector.ph ], [ %{{.*}}, %vector.body ]		; CHECK: %[[VEC_IND:.]] = phi <vscale x 4 x i64> [ %{{.}}, %vector.ph ], [ %{{.*}}, %vector.body ]
; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: %[[PTRS:.]] = getelementptr inbounds i32, i32 %dst, <vscale x 4 x i64> %[[PTR_INDICES]]		; CHECK-NEXT: %[[PTRS:.]] = getelementptr inbounds i32, i32 %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[PTRS]]		; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[PTRS]]
; CHECK-NEXT: %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],		; CHECK-NEXT: %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x i32*> %[[PTRS]]		; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x i32*> %[[PTRS]]
Show All 10 Lines	for.body: ; preds = %entry, %for.body
%inc = add nuw nsw i64 %i.05, 1		%inc = add nuw nsw i64 %i.05, 1
%exitcond.not = icmp eq i64 %inc, %n		%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

for.end: ; preds = %for.end.loopexit, %entry		for.end: ; preds = %for.end.loopexit, %entry
ret void		ret void
}		}

define void @stride7_f64(double* noalias nocapture %dst, i64 %n) {		define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_f64(		; CHECK-LABEL: @stride7_f64(
; CHECK: vector.body		; CHECK: vector.body
; CHECK: %[[VEC_IND:.]] = phi <vscale x 2 x i64> [ %{{.}}, %vector.ph ], [ %{{.*}}, %vector.body ]		; CHECK: %[[VEC_IND:.]] = phi <vscale x 2 x i64> [ %{{.}}, %vector.ph ], [ %{{.*}}, %vector.body ]
; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)		; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: %[[PTRS:.]] = getelementptr inbounds double, double %dst, <vscale x 2 x i64> %[[PTR_INDICES]]		; CHECK-NEXT: %[[PTRS:.]] = getelementptr inbounds double, double %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[PTRS]],		; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[PTRS]],
; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],		; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]],		; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]],
Show All 11 Lines	for.body: ; preds = %entry, %for.body
%exitcond.not = icmp eq i64 %inc, %n		%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6

for.end: ; preds = %for.end.loopexit, %entry		for.end: ; preds = %for.end.loopexit, %entry
ret void		ret void
}		}


define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) {		define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 {
; CHECK-LABEL: @cond_stride7_f64(		; CHECK-LABEL: @cond_stride7_f64(
; CHECK: vector.body		; CHECK: vector.body
; CHECK: %[[MASK:.*]] = icmp ne <vscale x 2 x i64>		; CHECK: %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
; CHECK: %[[PTRS:.]] = getelementptr inbounds double, double %dst, <vscale x 2 x i64> %{{.*}}		; CHECK: %[[PTRS:.]] = getelementptr inbounds double, double %dst, <vscale x 2 x i64> %{{.*}}
; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]		; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],		; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])		; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
entry:		entry:
Show All 18 Lines	for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i64 %i.07, 1		%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n		%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6

for.end: ; preds = %for.end.loopexit, %entry		for.end: ; preds = %for.end.loopexit, %entry
ret void		ret void
}		}

		attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}		!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}		!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}		!2 = !{!"llvm.loop.vectorize.width", i32 4}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.interleave.count", i32 1}		!4 = !{!"llvm.loop.interleave.count", i32 1}
!5 = !{!"llvm.loop.vectorize.enable", i1 true}		!5 = !{!"llvm.loop.vectorize.enable", i1 true}
!6 = distinct !{!6, !1, !7, !3, !4, !5}		!6 = distinct !{!6, !1, !7, !3, !4, !5}
!7 = !{!"llvm.loop.vectorize.width", i32 2}		!7 = !{!"llvm.loop.vectorize.width", i32 2}

llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	for.body:
%iv.next = add nuw nsw i64 %iv, 1		%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n		%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0		br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0

for.end:		for.end:
ret double %add		ret double %add
}		}

attributes #0 = { "target-features"="+sve" }		attributes #0 = { "target-features"="+sve" vscale_range(0, 16) }

!0 = distinct !{!0, !1}		!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S < %s \| FileCheck %s		; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S < %s \| FileCheck %s

; Ensure that we can vectorize loops such as:		; Ensure that we can vectorize loops such as:
; int *ptr = c;		; int *ptr = c;
; for (long long i = 0; i < n; i++) {		; for (long long i = 0; i < n; i++) {
; int X1 = *ptr++;		; int X1 = *ptr++;
; int X2 = *ptr++;		; int X2 = *ptr++;
; a[i] = X1 + 1;		; a[i] = X1 + 1;
; b[i] = X2 + 1;		; b[i] = X2 + 1;
; }		; }
; with scalable vectors, including unrolling. The test below makes sure		; with scalable vectors, including unrolling. The test below makes sure
; that we can use gather instructions with the correct offsets, taking		; that we can use gather instructions with the correct offsets, taking
; vscale into account.		; vscale into account.

define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) {		define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 {
; CHECK-LABEL: @widen_ptr_phi_unrolled(		; CHECK-LABEL: @widen_ptr_phi_unrolled(
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.]] = phi i32 [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ]		; CHECK-NEXT: [[POINTER_PHI:%.]] = phi i32 [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ]
; CHECK: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()		; CHECK: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2		; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP5]], 4		; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()		; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)		; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines

;		;
; Check multiple pointer induction variables where only one is recognized as		; Check multiple pointer induction variables where only one is recognized as
; uniform and remains uniform after vectorization. The other pointer induction		; uniform and remains uniform after vectorization. The other pointer induction
; variable is not recognized as uniform and is not uniform after vectorization		; variable is not recognized as uniform and is not uniform after vectorization
; because it is stored to memory.		; because it is stored to memory.
;		;

define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) {		define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
; CHECK-LABEL: @pointer_iv_mixed(		; CHECK-LABEL: @pointer_iv_mixed(
; CHECK: vector.body		; CHECK: vector.body
; CHECK: %[[IDX:.]] = phi i64 [ 0, %vector.ph ], [ %{{.}}, %vector.body ]		; CHECK: %[[IDX:.]] = phi i64 [ 0, %vector.ph ], [ %{{.}}, %vector.body ]
; CHECK: %[[STEPVEC:.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()		; CHECK: %[[STEPVEC:.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: %[[TMP1:.*]] = insertelement <vscale x 2 x i64> poison, i64 %[[IDX]], i32 0		; CHECK-NEXT: %[[TMP1:.*]] = insertelement <vscale x 2 x i64> poison, i64 %[[IDX]], i32 0
; CHECK-NEXT: %[[TMP2:.*]] = shufflevector <vscale x 2 x i64> %[[TMP1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer		; CHECK-NEXT: %[[TMP2:.*]] = shufflevector <vscale x 2 x i64> %[[TMP1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: %[[VECIND1:.*]] = add <vscale x 2 x i64> %[[TMP2]], %[[STEPVEC]]		; CHECK-NEXT: %[[VECIND1:.*]] = add <vscale x 2 x i64> %[[TMP2]], %[[STEPVEC]]
; CHECK-NEXT: %[[APTRS1:.]] = getelementptr i32, i32 %a, <vscale x 2 x i64> %[[VECIND1]]		; CHECK-NEXT: %[[APTRS1:.]] = getelementptr i32, i32 %a, <vscale x 2 x i64> %[[VECIND1]]
Show All 31 Lines	for.body:
%cond = icmp slt i64 %i.next, %n		%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end, !llvm.loop !6		br i1 %cond, label %for.body, label %for.end, !llvm.loop !6

for.end:		for.end:
%tmp5 = phi i32 [ %tmp2, %for.body ]		%tmp5 = phi i32 [ %tmp2, %for.body ]
ret i32 %tmp5		ret i32 %tmp5
}		}

		attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}		!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}		!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}		!2 = !{!"llvm.loop.vectorize.width", i32 4}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}		!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.vectorize.enable", i1 true}		!4 = !{!"llvm.loop.vectorize.enable", i1 true}
!5 = !{!"llvm.loop.interleave.count", i32 2}		!5 = !{!"llvm.loop.interleave.count", i32 2}
!6 = distinct !{!6, !1, !7, !3, !4, !5}		!6 = distinct !{!6, !1, !7, !3, !4, !5}
!7 = !{!"llvm.loop.vectorize.width", i32 2}		!7 = !{!"llvm.loop.vectorize.width", i32 2}

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Remove usage of getMaxVScale for AArch64, in favour of IR Attribute
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 366001

clang/include/clang/Basic/TargetInfo.h

clang/lib/Basic/Targets/AArch64.h

clang/lib/Basic/Targets/AArch64.cpp

clang/lib/CodeGen/CodeGenFunction.cpp

clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Remove usage of getMaxVScale for AArch64, in favour of IR AttributeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 366001

clang/include/clang/Basic/TargetInfo.h

clang/lib/Basic/Targets/AArch64.h

clang/lib/Basic/Targets/AArch64.cpp

clang/lib/CodeGen/CodeGenFunction.cpp

clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

[SVE] Remove usage of getMaxVScale for AArch64, in favour of IR Attribute
ClosedPublic