Diff 271674

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 901 Lines • ▼ Show 20 Lines	void ReplaceExtractSubVectorResults(SDNode *N,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;

bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;		bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;

void finalizeLowering(MachineFunction &MF) const override;		void finalizeLowering(MachineFunction &MF) const override;

bool shouldLocalize(const MachineInstr &MI,		bool shouldLocalize(const MachineInstr &MI,
const TargetTransformInfo *TTI) const override;		const TargetTransformInfo *TTI) const override;

		bool useSVEForFixedLengthVectors() const;
		bool useSVEForFixedLengthVectorVT(MVT VT) const;
};		};

namespace AArch64 {		namespace AArch64 {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,		FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);		const TargetLibraryInfo *libInfo);
} // end namespace AArch64		} // end namespace AArch64

} // end namespace llvm		} // end namespace llvm

#endif		#endif

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 178 Lines • ▼ Show 20 Lines	if (Subtarget->hasSVE()) {
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);

		if (useSVEForFixedLengthVectors()) {
		for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
		if (useSVEForFixedLengthVectorVT(VT))
		addRegisterClass(VT, &AArch64::ZPRRegClass);

		for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
		if (useSVEForFixedLengthVectorVT(VT))
		addRegisterClass(VT, &AArch64::ZPRRegClass);
		}

for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {		for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);		setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);		setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);		setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);		setOperationAction(ISD::USUBSAT, VT, Legal);
}		}

for (auto VT :		for (auto VT :
▲ Show 20 Lines • Show All 3,274 Lines • ▼ Show 20 Lines	case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);		return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:		case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);		return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VSCALE:		case ISD::VSCALE:
return LowerVSCALE(Op, DAG);		return LowerVSCALE(Op, DAG);
}		}
}		}

		bool AArch64TargetLowering::useSVEForFixedLengthVectors() const {
		// Prefer NEON unless larger SVE registers are available.
		return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256;
		}

		bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(MVT VT) const {
		assert(VT.isFixedLengthVector());
		if (!useSVEForFixedLengthVectors())
		return false;

		// Fixed length predicates should be promoted to i8.
		// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
		if (VT.getVectorElementType() == MVT::i1)
		return false;
		efriedmaUnsubmitted Done Reply Inline Actions Restricting i1 types like this seems a little weird to me. We don't need to pretend to be NEON. We want these types to be legal for similar reasons we want the other fixed vector types to be legal. I guess we could leave the check for now if it makes it easier to get simple testcases working, but I don't think this is what we want long-term. For non-i1 types, should we check the element type is legal? integer_fixedlen_vector_valuetypes() and fp_fixedlen_vector_valuetypes() can return some exotic types. efriedma: Restricting i1 types like this seems a little weird to me. We don't need to pretend to be NEON.
		paulwalker-armAuthorUnsubmitted Done Reply Inline Actions Part of this is me not looking for trouble at this stage and wanting to wait to see if there's a performance reason to make them legal. I think the majority of the cases can be handled after the conversion to scalable vector types, which just leaves the cross-block scenarios. However, from a functional point of view I'm trying not to make any ABI related changes. Currently the handling of i1 based vectors (well all fixed length vectors) has already been decided for NEON and so I cannot just change it in isolation without breaking compatibility with existing libraries (which don't know about SVE). In the future we'll need a function attribute that corresponds to an as yet undefined ABI. When that is in place we can relax the i1 restriction. For now though my focus is purely on block level code generation. paulwalker-arm: Part of this is me not looking for trouble at this stage and wanting to wait to see if there's…
		efriedmaUnsubmitted Not Done Reply Inline Actions We can mess with the ABI separately if we need to say the values are sign-extended or whatever. I'm concerned that if we don't make this legal now, we're going to end up writing a bunch of code to work around the fact that we can't produce i1 vectors after legalization. efriedma: We can mess with the ABI separately if we need to say the values are sign-extended or whatever.
		paulwalker-armAuthorUnsubmitted Done Reply Inline Actions Do you have any specific examples. I ask because my experience is the opposite. By keeping fixed length i1 vectors as illegal I am able to remove a lot of work that just isn't necessary to achieve my objective of supporting larger fixed length vectors. I'll admit that here I'm potentially trading a bit on performance but there's nothing binding here, it's just the order of implementation I prefer. None of this affects scalable vectors so it doesn't affect the post operation legalisation side of things. paulwalker-arm: Do you have any specific examples. I ask because my experience is the opposite. By keeping…
		efriedmaUnsubmitted Not Done Reply Inline Actions Say we have something like `(a==b\|c==d)? e : f`. In NEON, we do something with blending in vector registers. In SVE, we do not want the condition to ever be moved from predicate registers to vector registers. So now you're pattern-matching some combination of logic operators and blends to try to recover the obvious SVE code. If you aren't really doing that sort of optimization in the first iteration, transitioning later might not be a big deal. efriedma: Say we have something like `(a==b\|c==d)? e : f`. In NEON, we do something with blending in…

		// Don't use SVE for vectors we cannot scalarize if required.
		switch (VT.getVectorElementType().SimpleTy) {
		efriedmaUnsubmitted Done Reply Inline Actions `isTypeLegal(VT.getVectorElementType())` probably doesn't do what you want; i8 is not a legal type. efriedma: `isTypeLegal(VT.getVectorElementType())` probably doesn't do what you want; i8 is not a legal…
		paulwalker-armAuthorUnsubmitted Done Reply Inline Actions That'll teach me for taking things too literally without thinking about what I'm writing :) I've changed this to an explicit list and deliberately kept it minimal as bf and 128bit element types are things I don't really care about at this stage. paulwalker-arm: That'll teach me for taking things too literally without thinking about what I'm writing :)…
		default:
		return false;
		case MVT::i8:
		case MVT::i16:
		case MVT::i32:
		case MVT::i64:
		case MVT::f16:
		case MVT::f32:
		case MVT::f64:
		efriedmaUnsubmitted Not Done Reply Inline Actions Non-power-of-two types are an extra complication, yes; better to leave it out for now. efriedma: Non-power-of-two types are an extra complication, yes; better to leave it out for now.
		break;
		}

		// Ensure NEON MVTs only belong to a single register class.
		if (VT.getSizeInBits() <= 128)
		return false;

		// Don't use SVE for types that don't fit.
		if (VT.getSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
		return false;

		// TODO: Perhaps an artificial restriction, but worth having whilst getting
		// the base fixed length SVE support in place.
		if (!VT.isPow2VectorType())
		return false;

		return true;
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Calling Convention Implementation		// Calling Convention Implementation
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// Selects the correct CCAssignFn for a given CallingConvention value.		/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,		CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {		bool IsVarArg) const {
switch (CC) {		switch (CC) {
▲ Show 20 Lines • Show All 11,108 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64Subtarget.h

Show First 20 Lines • Show All 528 Lines • ▼ Show 20 Lines	bool isCallingConvWin64(CallingConv::ID CC) const {
case CallingConv::Win64:		case CallingConv::Win64:
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

void mirFileLoaded(MachineFunction &MF) const override;		void mirFileLoaded(MachineFunction &MF) const override;

		// Return the known range for the bit length of SVE data registers. A value
		// of 0 means nothing is known about that particular limit beyong what's
		// implied by the architecture.
		unsigned getMaxSVEVectorSizeInBits() const;
		unsigned getMinSVEVectorSizeInBits() const;
};		};
} // End llvm namespace		} // End llvm namespace

#endif		#endif

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Show First 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "		UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
"an address is ignored"), cl::init(false), cl::Hidden);		"an address is ignored"), cl::init(false), cl::Hidden);

static cl::opt<bool>		static cl::opt<bool>
UseNonLazyBind("aarch64-enable-nonlazybind",		UseNonLazyBind("aarch64-enable-nonlazybind",
cl::desc("Call nonlazybind functions via direct GOT load"),		cl::desc("Call nonlazybind functions via direct GOT load"),
cl::init(false), cl::Hidden);		cl::init(false), cl::Hidden);

		static cl::opt<unsigned> SVEVectorBitsMax(
		"aarch64-sve-vector-bits-max",
		cl::desc("Assume SVE vector registers are at most this big, "
		"with zero meaning no maximum size is assumed."),
		cl::init(0), cl::Hidden);

		static cl::opt<unsigned> SVEVectorBitsMin(
		"aarch64-sve-vector-bits-min",
		cl::desc("Assume SVE vector registers are at least this big, "
		"with zero meaning no minimum size is assumed."),
		cl::init(0), cl::Hidden);

AArch64Subtarget &		AArch64Subtarget &
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,		AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
StringRef CPUString) {		StringRef CPUString) {
// Determine default and user-specified characteristics		// Determine default and user-specified characteristics

if (CPUString.empty())		if (CPUString.empty())
CPUString = "generic";		CPUString = "generic";

▲ Show 20 Lines • Show All 266 Lines • ▼ Show 20 Lines	void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
// We usually compute max call frame size after ISel. Do the computation now		// We usually compute max call frame size after ISel. Do the computation now
// if the .mir file didn't specify it. Note that this will probably give you		// if the .mir file didn't specify it. Note that this will probably give you
// bogus values after PEI has eliminated the callframe setup/destroy pseudo		// bogus values after PEI has eliminated the callframe setup/destroy pseudo
// instructions, specify explicitly if you need it to be correct.		// instructions, specify explicitly if you need it to be correct.
MachineFrameInfo &MFI = MF.getFrameInfo();		MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isMaxCallFrameSizeComputed())		if (!MFI.isMaxCallFrameSizeComputed())
MFI.computeMaxCallFrameSize(MF);		MFI.computeMaxCallFrameSize(MF);
}		}

		unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const {
		efriedmaUnsubmitted Done Reply Inline Actions We probably want function attributes? It looks like this patch doesn't use the max size; what do you anticipate using it for? efriedma: We probably want function attributes? It looks like this patch doesn't use the max size; what…
		paulwalker-armAuthorUnsubmitted Done Reply Inline Actions When creating the patch I did spot "min-legal-vector-width" exists as a function attribute but as mentioned above I'm trying to avoid establishing anything ABI related at this stage. As part of D80385 where I've added the createPredicateForFixedVector function there's a comment that mentions the use case for the max size. Essentially when min==max we can avoid the need to create explicitly sized predicates and instead use "PTRUE ALL" as a route to select unpredicated instructions when available. paulwalker-arm: When creating the patch I did spot "min-legal-vector-width" exists as a function attribute but…
		efriedmaUnsubmitted Done Reply Inline Actions I guess we can leave out function attributes for now, but it's a temporary solution at best; LLVM flags are not properly usable from clang. efriedma: I guess we can leave out function attributes for now, but it's a temporary solution at best…
		paulwalker-armAuthorUnsubmitted Done Reply Inline Actions Yep, I understand. The flags allow me to write more concise tests so even when we nail down the way front-ends enables this feature I suspect the flags will still have value. paulwalker-arm: Yep, I understand. The flags allow me to write more concise tests so even when we nail down…
		assert(HasSVE && "Tried to get SVE vector length without SVE support!");
		assert(SVEVectorBitsMax % 128 == 0 &&
		"SVE requires vector length in multiples of 128!");
		assert((SVEVectorBitsMax >= SVEVectorBitsMin \|\| SVEVectorBitsMax == 0) &&
		"Minimum SVE vector size should not be larger than its maximum!");
		if (SVEVectorBitsMax == 0)
		return 0;
		return (std::max(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
		}

		unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
		assert(HasSVE && "Tried to get SVE vector length without SVE support!");
		assert(SVEVectorBitsMin % 128 == 0 &&
		"SVE requires vector length in multiples of 128!");
		assert((SVEVectorBitsMax >= SVEVectorBitsMin \|\| SVEVectorBitsMax == 0) &&
		"Minimum SVE vector size should not be larger than its maximum!");
		if (SVEVectorBitsMax == 0)
		return (SVEVectorBitsMin / 128) * 128;
		return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
		}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Show First 20 Lines • Show All 92 Lines • ▼ Show 20 Lines	if (Vector) {
return 32;		return 32;
return 0;		return 0;
}		}
return 31;		return 31;
}		}

unsigned getRegisterBitWidth(bool Vector) const {		unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {		if (Vector) {
		if (ST->hasSVE())
		return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
if (ST->hasNEON())		if (ST->hasNEON())
return 128;		return 128;
return 0;		return 0;
}		}
return 64;		return 64;
}		}

unsigned getMinVectorRegisterBitWidth() {		unsigned getMinVectorRegisterBitWidth() {
▲ Show 20 Lines • Show All 136 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll

This file was added.

				; RUN: opt < %s -cost-model -analyze \| FileCheck %s -D#VBITS=128
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=128 \| FileCheck %s -D#VBITS=128
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=256 \| FileCheck %s -D#VBITS=256
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=384 \| FileCheck %s -D#VBITS=256
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=512 \| FileCheck %s -D#VBITS=512
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=640 \| FileCheck %s -D#VBITS=512
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=768 \| FileCheck %s -D#VBITS=512
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=896 \| FileCheck %s -D#VBITS=512
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1024 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1152 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1280 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1408 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1536 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1664 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1792 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=1920 \| FileCheck %s -D#VBITS=1024
				; RUN: opt < %s -cost-model -analyze -aarch64-sve-vector-bits-min=2048 \| FileCheck %s -D#VBITS=2048

				; VBITS represents the useful bit size of a vector register from the code
				; generator's point of view. It is clamped to power-of-2 values because
				; only power-of-2 vector lengths are considered legal, regardless of the
				; user specified vector length.

				target triple = "aarch64-unknown-linux-gnu"

				; Ensure the cost of legalisation is removed as the vector length grows.
				; NOTE: Assumes BaseCost_add=1, BaseCost_fadd=2.
				define void @add() #0 {
				; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'add':
				; CHECK: cost of [[#div(127,VBITS)+1]] for instruction: %add128 = add <4 x i32> undef, undef
				; CHECK: cost of [[#div(255,VBITS)+1]] for instruction: %add256 = add <8 x i32> undef, undef
				; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512 = add <16 x i32> undef, undef
				; CHECK: cost of [[#div(1023,VBITS)+1]] for instruction: %add1024 = add <32 x i32> undef, undef
				; CHECK: cost of [[#div(2047,VBITS)+1]] for instruction: %add2048 = add <64 x i32> undef, undef
				%add128 = add <4 x i32> undef, undef
				%add256 = add <8 x i32> undef, undef
				%add512 = add <16 x i32> undef, undef
				%add1024 = add <32 x i32> undef, undef
				%add2048 = add <64 x i32> undef, undef

				; Using a single vector length, ensure all element types are recognised.
				; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i8 = add <64 x i8> undef, undef
				; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i16 = add <32 x i16> undef, undef
				; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i32 = add <16 x i32> undef, undef
				; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i64 = add <8 x i64> undef, undef
				; CHECK: cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %add512.f16 = fadd <32 x half> undef, undef
				; CHECK: cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %add512.f32 = fadd <16 x float> undef, undef
				; CHECK: cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %add512.f64 = fadd <8 x double> undef, undef
				%add512.i8 = add <64 x i8> undef, undef
				%add512.i16 = add <32 x i16> undef, undef
				%add512.i32 = add <16 x i32> undef, undef
				%add512.i64 = add <8 x i64> undef, undef
				%add512.f16 = fadd <32 x half> undef, undef
				%add512.f32 = fadd <16 x float> undef, undef
				%add512.f64 = fadd <8 x double> undef, undef

				ret void
				}

				attributes #0 = { "target-features"="+sve" }

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Add flag to specify SVE register size, using this to calculate legal vector types.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 271674

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Add flag to specify SVE register size, using this to calculate legal vector types.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 271674

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll

[SVE] Add flag to specify SVE register size, using this to calculate legal vector types.
ClosedPublic