Diff 555336

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,453 Lines • ▼ Show 20 Lines	for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
setOperationAction(ISD::FABS, VT, Custom);		setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);		setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);		setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
		if (Subtarget->isSVEAvailable())
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
		CarolineConcattoUnsubmitted Done Reply Inline Actions Here you are also changing the logic. Before for SVE FADD was custom, now it can be legal. So it means we can lower to an assembly, but I don't see that anywhere. I believe it will crash. Is that the expected behaviour? I have the same question for line 1508. I believe it will crash. Is that the expected behaviour? CarolineConcatto: Here you are also changing the logic. Before for SVE FADD was custom, now it can be legal. So…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions By default VECREDUCE_SEQ_FADD is marked as 'Expand', which means for fixed-length vectors that it will expand the operation to a sequence of scalar options to do the vector reduction. For scalable vectors, no such scalarisation exists (because it would require SelectionDAG to generate a loop to do this), so the compiler will fail to compile. That is the expected behaviour, because if the target can't use these instruction due to the selected streaming-mode, then the intrinsic/operation should not have been formed in the LLVM IR in the first place. sdesmalen: By default VECREDUCE_SEQ_FADD is marked as 'Expand', which means for fixed-length vectors that…
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);		setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);		setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);		setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);

setOperationAction(ISD::SELECT_CC, VT, Expand);		setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);		setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);		setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);		setOperationAction(ISD::FPOWI, VT, Expand);
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
setOperationAction(ISD::SDIV, VT, Custom);		setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);		setOperationAction(ISD::UDIV, VT, Custom);
}		}

// NEON doesn't support 64-bit vector integer muls, but SVE does.		// NEON doesn't support 64-bit vector integer muls, but SVE does.
setOperationAction(ISD::MUL, MVT::v1i64, Custom);		setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);		setOperationAction(ISD::MUL, MVT::v2i64, Custom);

		if (Subtarget->isSVEAvailable()) {
// NEON doesn't support across-vector reductions, but SVE does.		// NEON doesn't support across-vector reductions, but SVE does.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})		for (auto VT :
		{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
		}

if (!Subtarget->isNeonAvailable()) {		if (!Subtarget->isNeonAvailable()) {
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);		setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);		setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);		setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);
setTruncStoreAction(MVT::v1f64, MVT::v1f16, Custom);		setTruncStoreAction(MVT::v1f64, MVT::v1f16, Custom);
setTruncStoreAction(MVT::v2f64, MVT::v2f16, Custom);		setTruncStoreAction(MVT::v2f64, MVT::v2f16, Custom);
setTruncStoreAction(MVT::v4f64, MVT::v4f16, Custom);		setTruncStoreAction(MVT::v4f64, MVT::v4f16, Custom);
▲ Show 20 Lines • Show All 341 Lines • ▼ Show 20 Lines	void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT,
		StreamingSVE ? Expand : Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);		setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);		setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);		setOperationAction(ISD::VSELECT, VT, Custom);
▲ Show 20 Lines • Show All 17,691 Lines • ▼ Show 20 Lines	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),		N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETNE));		N->getOperand(3), DAG.getCondCode(ISD::SETNE));
break;		break;
case Intrinsic::aarch64_sve_fcmpuo:		case Intrinsic::aarch64_sve_fcmpuo:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),		return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),		N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUO));		N->getOperand(3), DAG.getCondCode(ISD::SETUO));
break;		break;
case Intrinsic::aarch64_sve_fadda:		case Intrinsic::aarch64_sve_fadda:
		CarolineConcattoUnsubmitted Done Reply Inline Actions This does not look right. What will happen when isFullSVEAvailable() is no available? Should it crash? CarolineConcatto: This does not look right. What will happen when isFullSVEAvailable() is no available? Should it…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Answered in my other comment. sdesmalen: Answered in my other comment.
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);		return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
case Intrinsic::aarch64_sve_faddv:		case Intrinsic::aarch64_sve_faddv:
		paulwalker-armUnsubmitted Done Reply Inline Actions As discussed this shouldn't be necessary because we require users of the target specific SVE intrinsics to know what they're doing. There are several cases where incorrect usage of these intrinsics can lead to unexpected behaviour and we make no attempt to guard against them. paulwalker-arm: As discussed this shouldn't be necessary because we require users of the target specific SVE…
return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);		return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxnmv:		case Intrinsic::aarch64_sve_fmaxnmv:
return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);		return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxv:		case Intrinsic::aarch64_sve_fmaxv:
return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);		return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_fminnmv:		case Intrinsic::aarch64_sve_fminnmv:
return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);		return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fminv:		case Intrinsic::aarch64_sve_fminv:
▲ Show 20 Lines • Show All 6,561 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64Subtarget.h

Show First 20 Lines • Show All 198 Lines • ▼ Show 20 Lines	#include "AArch64GenSubtargetInfo.inc"
/// and preferably modeled with SubtargetFeatures or properties in		/// and preferably modeled with SubtargetFeatures or properties in
/// initializeProperties().		/// initializeProperties().
ARMProcFamilyEnum getProcFamily() const {		ARMProcFamilyEnum getProcFamily() const {
return ARMProcFamily;		return ARMProcFamily;
}		}

bool isXRaySupported() const override { return true; }		bool isXRaySupported() const override { return true; }

/// Returns true if the function has the streaming attribute.		/// Returns true if the function has a streaming body.
bool isStreaming() const { return StreamingSVEMode; }		bool isStreaming() const { return StreamingSVEMode; }

/// Returns true if the function has the streaming-compatible attribute.		/// Returns true if the function has a streaming-compatible body.
bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }		bool isStreamingCompatible() const;

/// Returns true if the target has NEON and the function at runtime is known		/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE		/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).		/// mode, which disables NEON instructions).
bool isNeonAvailable() const;		bool isNeonAvailable() const;

		/// Returns true if the target has SVE and can use the full range of SVE
		/// instructions, for example because it knows the function is known not to be
		CarolineConcattoUnsubmitted Done Reply Inline Actions s/becuase/because/ CarolineConcatto: s/becuase/because/
		/// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
		bool isSVEAvailable() const;
		MattDevereauUnsubmitted Done Reply Inline Actions Maybe its best just to mention the feature instead of explaining it here since there will likely be a more accurate description later that diverges from this text? MattDevereau: Maybe its best just to mention the feature instead of explaining it here since there will…
		paulwalker-armUnsubmitted Done Reply Inline Actions I don't like this name. FullSVE isn't really a thing, you either have SVE or you don't, much like with NEON. Can this just be `isSVEAvailable()`? paulwalker-arm: I don't like this name. FullSVE isn't really a thing, you either have SVE or you don't, much…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions I'm happy to change it to `isSVEAvailable()`, but it's perhaps a bit confusing in the context of other interfaces such as `useSVEforFixedLengthVectors`, where `SVE` has a meaning where it allows for the streaming-compatible subset of SVE, whereas for `isSVEAvailable` it strictly relates to the full set of SVE instructions. sdesmalen: I'm happy to change it to `isSVEAvailable()`, but it's perhaps a bit confusing in the context…
		paulwalker-armUnsubmitted Done Reply Inline Actions For clarity I interpret `useSVEforFixedLengthVectors` as just "use SVE instructions for fixed length vectors". It makes no judgement whether such instructions are streaming-compatible, it's just that most are and the few that are not use different code paths based on a more specific query like isSVEAvailable or isStreamingCompatible after this top level decision. That's not to say the `useSVEforFixedLengthVectors` interface is perfect when considering this newer requirement but for now it seems to be working ok. paulwalker-arm: For clarity I interpret `useSVEforFixedLengthVectors` as just "use SVE instructions for fixed…

unsigned getMinVectorRegisterBitWidth() const {		unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0.		// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
if (StreamingSVEMode \|\| StreamingCompatibleSVEMode)		// we don't yet support streaming-compatible codegen support that we trust
		// is safe for functions that may be executed in streaming-SVE mode.
		// By returning '0' here, we disable vectorization.
		if (!isSVEAvailable() && !isNeonAvailable())
		david-armUnsubmitted Done Reply Inline Actions Do we need to also check the same get-out clause that we do in AArch64TargetTransformInfo.cpp, i.e. `EnableScalableAutovecInStreamingMode`? david-arm: Do we need to also check the same get-out clause that we do in AArch64TargetTransformInfo.cpp…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Possibly, but that requires changing the interface for getMinVectorRegisterBitWidth to also take the RegisterKind. We'd rather be conservative at first and disable the automatic use of vector instructions. Changing this interface would be a step towards being 'less conservative'. sdesmalen: Possibly, but that requires changing the interface for getMinVectorRegisterBitWidth to also…
return 0;		return 0;
return MinVectorRegisterBitWidth;		return MinVectorRegisterBitWidth;
}		}

bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }		bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }		bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
unsigned getNumXRegisterReserved() const {		unsigned getNumXRegisterReserved() const {
BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());		BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
▲ Show 20 Lines • Show All 202 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Show First 20 Lines • Show All 471 Lines • ▼ Show 20 Lines	void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
// instructions, specify explicitly if you need it to be correct.		// instructions, specify explicitly if you need it to be correct.
MachineFrameInfo &MFI = MF.getFrameInfo();		MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isMaxCallFrameSizeComputed())		if (!MFI.isMaxCallFrameSizeComputed())
MFI.computeMaxCallFrameSize(MF);		MFI.computeMaxCallFrameSize(MF);
}		}

bool AArch64Subtarget::useAA() const { return UseAA; }		bool AArch64Subtarget::useAA() const { return UseAA; }

bool AArch64Subtarget::isNeonAvailable() const {		bool AArch64Subtarget::isStreamingCompatible() const {
if (!hasNEON())		return StreamingCompatibleSVEMode \|\| ForceStreamingCompatibleSVE;
return false;		}

// The 'force-streaming-comaptible-sve' flag overrides the streaming		bool AArch64Subtarget::isNeonAvailable() const {
// function attributes.		return hasNEON() && !isStreaming() && !isStreamingCompatible();
if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)		}
return !ForceStreamingCompatibleSVE;

return !isStreaming() && !isStreamingCompatible();		bool AArch64Subtarget::isSVEAvailable() const{
		// FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
		// as we don't yet support the feature in LLVM.
		return hasSVE() && !isStreaming() && !isStreamingCompatible();
		david-armUnsubmitted Done Reply Inline Actions Perhaps it's worth being consistent with `isNeonAvailable`and using the `isStreaming` and `isStreamingCompatible` interfaces? david-arm: Perhaps it's worth being consistent with `isNeonAvailable`and using the `isStreaming` and…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Thanks, good point. sdesmalen: Thanks, good point.
		paulwalker-armUnsubmitted Done Reply Inline Actions Is the presence of SME relevant to this question? Just `hasSVE()` seems more fitting. paulwalker-arm: Is the presence of SME relevant to this question? Just `hasSVE()` seems more fitting.
}		}
		MattDevereauUnsubmitted Done Reply Inline Actions Typo, comaptible -> compatible. It might be nice to include the "-" on the front as well. MattDevereau: Typo, comaptible -> compatible. It might be nice to include the "-" on the front as well.
		MattDevereauUnsubmitted Done Reply Inline Actions Using just `if (ForceStreamingCompatibleSVE)` is passing check-llvm for me, so either this can be simplified or a test is missing. If a test is missing, this check is copy/pasted from `isNeonAvailable` above. We can separate this into a new function such as bool AArch64Subtarget::isForceStreamingCompatibleSVE() const{ return ForceStreamingCompatibleSVE.getNumOccurrences() > 0; } Then we can simplify things to bool AArch64Subtarget::isFullSVEAvailable() const{ return hasSVEorSME() && !StreamingSVEMode && !StreamingCompatibleSVEMode && !isForceStreamingCompatibleSVE(); } And provide a nice check from the subtarget. If `ForceStreamingCompatibleSVE.getNumOccurrences() > 0;` and `ForceStreamingCompatibleSVE` are equivalent then you can just do `return hasSVEorSME() && !StreamingSVEMode && !StreamingCompatibleSVEMode && !ForceStreamingCompatibleSVEMode;` MattDevereau: Using just `if (ForceStreamingCompatibleSVE)` is passing check-llvm for me, so either this can…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions You're right, in this case they are equivalent. sdesmalen: You're right, in this case they are equivalent.

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,937 Lines • ▼ Show 20 Lines	if (!ST->isNeonAvailable() && !EnableFixedwidthAutovecInStreamingMode)
return TypeSize::getFixed(0);		return TypeSize::getFixed(0);

if (ST->hasSVE())		if (ST->hasSVE())
return TypeSize::getFixed(		return TypeSize::getFixed(
std::max(ST->getMinSVEVectorSizeInBits(), 128u));		std::max(ST->getMinSVEVectorSizeInBits(), 128u));

return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);		return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:		case TargetTransformInfo::RGK_ScalableVector:
if ((ST->isStreaming() \|\| ST->isStreamingCompatible()) &&		if (!ST->isSVEAvailable() && !EnableScalableAutovecInStreamingMode)
!EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);		return TypeSize::getScalable(0);

return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);		return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
}		}
llvm_unreachable("Unsupported register kind");		llvm_unreachable("Unsupported register kind");
}		}

bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,		bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
▲ Show 20 Lines • Show All 1,918 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mattr=+sve < %s \| FileCheck %s

	; FIXME: Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!			; Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!
	; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s \| FileCheck %s			; RUN: not --crash llc -mattr=+sve -force-streaming-compatible-sve < %s

				paulwalker-armUnsubmitted Done Reply Inline Actions I'm not sure it worth having RUN lines that we know will crash? If we care about ensuring such IR isn't code generated then we should update the IR Verifier to generate a clean failure rather than require a compiler crash. paulwalker-arm: I'm not sure it worth having RUN lines that we know will crash? If we care about ensuring such…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions I'd rather keep the RUN line to make sure the code I added in the patch is protected by a test and therefore cannot be removed without breaking something. sdesmalen: I'd rather keep the RUN line to make sure the code I added in the patch is protected by a test…
				paulwalker-armUnsubmitted Done Reply Inline Actions Fair enough. paulwalker-arm: Fair enough.
	target triple = "aarch64-linux-gnu"			target triple = "aarch64-linux-gnu"

	; FADD			; FADD

	define half @fadda_nxv2f16(half %init, <vscale x 2 x half> %a) {			define half @fadda_nxv2f16(half %init, <vscale x 2 x half> %a) {
	; CHECK-LABEL: fadda_nxv2f16:			; CHECK-LABEL: fadda_nxv2f16:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	▲ Show 20 Lines • Show All 151 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s \| FileCheck %s			; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	;			;
	; FADDA			; FADDA
	;			;

	define half @fadda_v4f16(half %start, <4 x half> %a) {			define half @fadda_v4f16(half %start, <4 x half> %a) {
	; CHECK-LABEL: fadda_v4f16:			; CHECK-LABEL: fadda_v4f16:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.h, vl4
	; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1			; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
	; CHECK-NEXT: fadda h0, p0, h0, z1.h			; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0			; CHECK-NEXT: mov z2.h, z1.h[1]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[2]
				; CHECK-NEXT: mov z1.h, z1.h[3]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)			%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
	ret half %res			ret half %res
	}			}

	define half @fadda_v8f16(half %start, <8 x half> %a) {			define half @fadda_v8f16(half %start, <8 x half> %a) {
	; CHECK-LABEL: fadda_v8f16:			; CHECK-LABEL: fadda_v8f16:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.h, vl8
	; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
	; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1			; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
	; CHECK-NEXT: fadda h0, p0, h0, z1.h			; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0			; CHECK-NEXT: mov z2.h, z1.h[1]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[2]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[3]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[4]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[5]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[6]
				; CHECK-NEXT: mov z1.h, z1.h[7]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)			%res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
	ret half %res			ret half %res
	}			}

	define half @fadda_v16f16(half %start, ptr %a) {			define half @fadda_v16f16(half %start, ptr %a) {
	; CHECK-LABEL: fadda_v16f16:			; CHECK-LABEL: fadda_v16f16:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.h, vl8
	; CHECK-NEXT: ldr q1, [x0]			; CHECK-NEXT: ldr q1, [x0]
	; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0			; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: fadda h0, p0, h0, z1.h			; CHECK-NEXT: mov z2.h, z1.h[1]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[2]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[3]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[4]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[5]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[6]
				; CHECK-NEXT: mov z1.h, z1.h[7]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: ldr q1, [x0, #16]			; CHECK-NEXT: ldr q1, [x0, #16]
	; CHECK-NEXT: fadda h0, p0, h0, z1.h			; CHECK-NEXT: mov z2.h, z1.h[1]
	; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0			; CHECK-NEXT: fadd h0, h0, h1
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[2]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[3]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[4]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[5]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: mov z2.h, z1.h[6]
				; CHECK-NEXT: mov z1.h, z1.h[7]
				; CHECK-NEXT: fadd h0, h0, h2
				; CHECK-NEXT: fadd h0, h0, h1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%op = load <16 x half>, ptr %a			%op = load <16 x half>, ptr %a
	%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)			%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
	ret half %res			ret half %res
	}			}

	define float @fadda_v2f32(float %start, <2 x float> %a) {			define float @fadda_v2f32(float %start, <2 x float> %a) {
	; CHECK-LABEL: fadda_v2f32:			; CHECK-LABEL: fadda_v2f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl2
	; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1			; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
	; CHECK-NEXT: fadda s0, p0, s0, z1.s			; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0			; CHECK-NEXT: mov z1.s, z1.s[1]
				; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)			%res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
	ret float %res			ret float %res
	}			}

	define float @fadda_v4f32(float %start, <4 x float> %a) {			define float @fadda_v4f32(float %start, <4 x float> %a) {
	; CHECK-LABEL: fadda_v4f32:			; CHECK-LABEL: fadda_v4f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl4
	; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
	; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1			; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
	; CHECK-NEXT: fadda s0, p0, s0, z1.s			; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0			; CHECK-NEXT: mov z2.s, z1.s[1]
				; CHECK-NEXT: fadd s0, s0, s2
				; CHECK-NEXT: mov z2.s, z1.s[2]
				; CHECK-NEXT: mov z1.s, z1.s[3]
				; CHECK-NEXT: fadd s0, s0, s2
				; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)			%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
	ret float %res			ret float %res
	}			}

	define float @fadda_v8f32(float %start, ptr %a) {			define float @fadda_v8f32(float %start, ptr %a) {
	; CHECK-LABEL: fadda_v8f32:			; CHECK-LABEL: fadda_v8f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl4
	; CHECK-NEXT: ldr q1, [x0]			; CHECK-NEXT: ldr q1, [x0]
	; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0			; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: fadda s0, p0, s0, z1.s			; CHECK-NEXT: mov z2.s, z1.s[1]
				; CHECK-NEXT: fadd s0, s0, s2
				; CHECK-NEXT: mov z2.s, z1.s[2]
				; CHECK-NEXT: mov z1.s, z1.s[3]
				; CHECK-NEXT: fadd s0, s0, s2
				; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: ldr q1, [x0, #16]			; CHECK-NEXT: ldr q1, [x0, #16]
	; CHECK-NEXT: fadda s0, p0, s0, z1.s			; CHECK-NEXT: mov z2.s, z1.s[1]
	; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0			; CHECK-NEXT: fadd s0, s0, s1
				; CHECK-NEXT: fadd s0, s0, s2
				; CHECK-NEXT: mov z2.s, z1.s[2]
				; CHECK-NEXT: mov z1.s, z1.s[3]
				; CHECK-NEXT: fadd s0, s0, s2
				; CHECK-NEXT: fadd s0, s0, s1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%op = load <8 x float>, ptr %a			%op = load <8 x float>, ptr %a
	%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)			%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
	ret float %res			ret float %res
	}			}

	define double @fadda_v1f64(double %start, <1 x double> %a) {			define double @fadda_v1f64(double %start, <1 x double> %a) {
	; CHECK-LABEL: fadda_v1f64:			; CHECK-LABEL: fadda_v1f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1			; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
	; CHECK-NEXT: fadd d0, d0, d1			; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)			%res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
	ret double %res			ret double %res
	}			}

	define double @fadda_v2f64(double %start, <2 x double> %a) {			define double @fadda_v2f64(double %start, <2 x double> %a) {
	; CHECK-LABEL: fadda_v2f64:			; CHECK-LABEL: fadda_v2f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
	; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1			; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
	; CHECK-NEXT: fadda d0, p0, d0, z1.d			; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0			; CHECK-NEXT: mov z1.d, z1.d[1]
				; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)			%res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
	ret double %res			ret double %res
	}			}

	define double @fadda_v4f64(double %start, ptr %a) {			define double @fadda_v4f64(double %start, ptr %a) {
	; CHECK-LABEL: fadda_v4f64:			; CHECK-LABEL: fadda_v4f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: ldr q1, [x0]			; CHECK-NEXT: ldr q1, [x0]
	; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0			; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: fadda d0, p0, d0, z1.d			; CHECK-NEXT: mov z1.d, z1.d[1]
				; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: ldr q1, [x0, #16]			; CHECK-NEXT: ldr q1, [x0, #16]
	; CHECK-NEXT: fadda d0, p0, d0, z1.d			; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0			; CHECK-NEXT: mov z1.d, z1.d[1]
				; CHECK-NEXT: fadd d0, d0, d1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%op = load <4 x double>, ptr %a			%op = load <4 x double>, ptr %a
	%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)			%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
	ret double %res			ret double %res
	}			}

	;			;
	; FADDV			; FADDV
	▲ Show 20 Lines • Show All 637 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SME] Create new interface for isSVEAvailable.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 555336

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SME] Create new interface for isSVEAvailable.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 555336

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll

[AArch64][SME] Create new interface for isSVEAvailable.
ClosedPublic