Diff 247584

llvm/include/llvm/IR/IntrinsicsAArch64.td

	Show First 20 Lines • Show All 1,270 Lines • ▼ Show 20 Lines

	//			//
	// Prefetch			// Prefetch
	//			//

	def int_aarch64_sve_prf : Intrinsic<[], [llvm_anyvector_ty,			def int_aarch64_sve_prf : Intrinsic<[], [llvm_anyvector_ty,
	llvm_ptr_ty,			llvm_ptr_ty,
	llvm_i32_ty], [IntrArgMemOnly]>;			llvm_i32_ty], [IntrArgMemOnly]>;

				sdesmalenUnsubmitted Not Done Reply Inline Actions Can you derive from AdvSIMD_GatherLoad_VectorBase_Intrinsic instead? (and something similar for the scatter store) This also makes it more clear that these have the exact same form as the normal gathers. sdesmalen: Can you derive from AdvSIMD_GatherLoad_VectorBase_Intrinsic instead? (and something similar for…
	//			//
	// Scalar to vector operations			// Scalar to vector operations
	//			//

	def int_aarch64_sve_dup : AdvSIMD_SVE_DUP_Intrinsic;			def int_aarch64_sve_dup : AdvSIMD_SVE_DUP_Intrinsic;

	def int_aarch64_sve_index : AdvSIMD_SVE_Index_Intrinsic;			def int_aarch64_sve_index : AdvSIMD_SVE_Index_Intrinsic;

	▲ Show 20 Lines • Show All 468 Lines • ▼ Show 20 Lines
	//			//
	// First-faulting gather loads: vector base + scalar offset			// First-faulting gather loads: vector base + scalar offset
	//			//

	def int_aarch64_sve_ldff1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;			def int_aarch64_sve_ldff1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;


	//			//
				// Non-temporal gather loads: scalar base + vector offsets
				//

				// 64 bit unscaled offsets
				def int_aarch64_sve_ldnt1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;

				// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
				def int_aarch64_sve_ldnt1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;

				//
				// Non-temporal gather loads: vector base + scalar offset
				//

				def int_aarch64_sve_ldnt1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;

				//
	// Scatter stores: scalar base + vector offsets			// Scatter stores: scalar base + vector offsets
	//			//

	// 64 bit unscaled offsets			// 64 bit unscaled offsets
	def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;			def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;

	// 64 bit scaled offsets			// 64 bit scaled offsets
	def int_aarch64_sve_st1_scatter_index			def int_aarch64_sve_st1_scatter_index
	Show All 15 Lines

	//			//
	// Scatter stores: vector base + scalar offset			// Scatter stores: vector base + scalar offset
	//			//

	def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;			def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;

	//			//
				// Non-temporal scatter stores: scalar base + vector offsets
				//

				// 64 bit unscaled offsets
				def int_aarch64_sve_stnt1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;

				// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
				def int_aarch64_sve_stnt1_scatter_uxtw : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;

				//
				// Non-temporal scatter stores: vector base + scalar offset
				//

				def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;

				//
	// SVE2 - Uniform DSP operations			// SVE2 - Uniform DSP operations
	//			//

	def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic;			def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic;
	def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic;			def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic;
	def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic;			def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic;
	def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic;			def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
	def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic;			def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic;
	▲ Show 20 Lines • Show All 322 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 255 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
GLDFF1S,		GLDFF1S,
GLDFF1S_SCALED,		GLDFF1S_SCALED,
GLDFF1S_UXTW,		GLDFF1S_UXTW,
GLDFF1S_SXTW,		GLDFF1S_SXTW,
GLDFF1S_UXTW_SCALED,		GLDFF1S_UXTW_SCALED,
GLDFF1S_SXTW_SCALED,		GLDFF1S_SXTW_SCALED,
GLDFF1S_IMM,		GLDFF1S_IMM,

		// Non-temporal gather loads
		GLDNT1,
		GLDNT1S,

// Scatter store		// Scatter store
SST1,		SST1,
SST1_SCALED,		SST1_SCALED,
SST1_UXTW,		SST1_UXTW,
SST1_SXTW,		SST1_SXTW,
SST1_UXTW_SCALED,		SST1_UXTW_SCALED,
SST1_SXTW_SCALED,		SST1_SXTW_SCALED,
SST1_IMM,		SST1_IMM,

		// Non-temporal scatter store
		SSTNT1,

// Strict (exception-raising) floating point comparison		// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,		STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,		STRICT_FCMPE,

// NEON Load/Store with post-increment base updates		// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,		LD3post,
LD4post,		LD4post,
▲ Show 20 Lines • Show All 582 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,432 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED";		case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED";
case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW";		case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW";
case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";		case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";
case AArch64ISD::GLDFF1S_SXTW_SCALED:		case AArch64ISD::GLDFF1S_SXTW_SCALED:
return "AArch64ISD::GLDFF1S_SXTW_SCALED";		return "AArch64ISD::GLDFF1S_SXTW_SCALED";
case AArch64ISD::GLDFF1S_UXTW_SCALED:		case AArch64ISD::GLDFF1S_UXTW_SCALED:
return "AArch64ISD::GLDFF1S_UXTW_SCALED";		return "AArch64ISD::GLDFF1S_UXTW_SCALED";
case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";		case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";

		case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
		case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";

case AArch64ISD::SST1: return "AArch64ISD::SST1";		case AArch64ISD::SST1: return "AArch64ISD::SST1";
case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";		case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";		case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";		case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";		case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";		case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";		case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";

		case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";

case AArch64ISD::LDP: return "AArch64ISD::LDP";		case AArch64ISD::LDP: return "AArch64ISD::LDP";
case AArch64ISD::STP: return "AArch64ISD::STP";		case AArch64ISD::STP: return "AArch64ISD::STP";
case AArch64ISD::STNP: return "AArch64ISD::STNP";		case AArch64ISD::STNP: return "AArch64ISD::STNP";
case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";		case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";
case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";		case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";
}		}
return nullptr;		return nullptr;
}		}
▲ Show 20 Lines • Show All 8,996 Lines • ▼ Show 20 Lines	static SDValue performSVEAndCombine(SDNode *N,
case AArch64ISD::GLD1_IMM:		case AArch64ISD::GLD1_IMM:
case AArch64ISD::GLDFF1:		case AArch64ISD::GLDFF1:
case AArch64ISD::GLDFF1_SCALED:		case AArch64ISD::GLDFF1_SCALED:
case AArch64ISD::GLDFF1_SXTW:		case AArch64ISD::GLDFF1_SXTW:
case AArch64ISD::GLDFF1_SXTW_SCALED:		case AArch64ISD::GLDFF1_SXTW_SCALED:
case AArch64ISD::GLDFF1_UXTW:		case AArch64ISD::GLDFF1_UXTW:
case AArch64ISD::GLDFF1_UXTW_SCALED:		case AArch64ISD::GLDFF1_UXTW_SCALED:
case AArch64ISD::GLDFF1_IMM:		case AArch64ISD::GLDFF1_IMM:
		case AArch64ISD::GLDNT1:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();		MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;		break;
default:		default:
return SDValue();		return SDValue();
}		}

if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))		if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
return Src;		return Src;
▲ Show 20 Lines • Show All 2,171 Lines • ▼ Show 20 Lines	static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
SDValue Base = N->getOperand(4);		SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);		SDValue Offset = N->getOperand(5);

		// In the case of non-temporal gather loads there's only one SVE instruction
		// per data-size: "scalar + vector", i.e.
		// * stnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
		// Since we do have intrinsics that allow the arguments to be in a different
		// order, we may need to swap them to match the spec.
		if (Opcode == AArch64ISD::SSTNT1 && Offset.getValueType().isVector())
		sdesmalenUnsubmitted Not Done Reply Inline Actions Have you experimented moving this code out of this function (same for the `SST1_IMM` case below), and pass in the Base and Offset (and possibly Chain/PG) as operands to `performScatterStoreCombine`. In the case-statement for `aarch64_sve_st1_scatter_scalar_offset` and `aarch64_sve_stnt1_scatter_scalar_offset` you can than do the swap. That seems a bit better than special handling these cases in this combine itself. sdesmalen: Have you experimented moving this code out of this function (same for the `SST1_IMM` case…
		andwarAuthorUnsubmitted Done Reply Inline Actions Yes, but currently: it is very explicit that `AArch64ISD::SSTNT1` requires some special treatment, which IMO is a bit counterintuitive (hopefully the comments make it clear!) I'm only passing one argument (`N`), instead of 3 (`N`, `SDValue`, `SDValue`) to `performScatterStoreCombine` (not counting the other arguments), so the call-site is cleaner if we keep things as they are. Also, `SST1_IMM` requires 2 conditions to be checked and the opcode to be updated (and there are 2 possibilities here, either `SST1_UXTW` or `SST1`). Swapping `Base` and `Offset` when calling `performScatterStoreCombine` wouldn't be enough to replicate this. Having said that, I've been looking at `performScatterStoreCombine`/ `performGatherLoadCombine` for a while now and I wouldn't be surprised if I'm over-engineering this :) andwar: Yes, but currently: * it is very explicit that `AArch64ISD::SSTNT1` requires some special…
		sdesmalenUnsubmitted Not Done Reply Inline Actions Fair enough. If this function ever needs to be extended for more intrinsics, we may want to reconsider generalising this, but this is fine for now then. sdesmalen: Fair enough. If this function ever needs to be extended for more intrinsics, we may want to…
		std::swap(Base, Offset);

// SST1_IMM requires that the offset is an immediate that is:		// SST1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,		// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],		// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the stored items. For		// where #SizeInBytes is the size in bytes of the stored items. For
// immediates outside that range and non-immediate scalar offsets use SST1 or		// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1_UXTW instead.		// SST1_UXTW instead.
if (Opcode == AArch64ISD::SST1_IMM) {		if (Opcode == AArch64ISD::SST1_IMM) {
uint64_t MaxIndex = 31;		uint64_t MaxIndex = 31;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
SDValue Base = N->getOperand(3);		SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);		SDValue Offset = N->getOperand(4);

		// In the case of non-temporal gather loads there's only one SVE instruction
		// per data-size: "scalar + vector", i.e.
		// * ldnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
		// Since we do have intrinsics that allow the arguments to be in a different
		// order, we may need to swap them to match the spec.
		if (Opcode == AArch64ISD::GLDNT1 && Offset.getValueType().isVector())
		std::swap(Base, Offset);

// GLD{FF}1_IMM requires that the offset is an immediate that is:		// GLD{FF}1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,		// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],		// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the loaded items. For		// where #SizeInBytes is the size in bytes of the loaded items. For
// immediates outside that range and non-immediate scalar offsets use GLD1 or		// immediates outside that range and non-immediate scalar offsets use GLD1 or
// GLD1_UXTW instead.		// GLD1_UXTW instead.
if (Opcode == AArch64ISD::GLD1_IMM \|\| Opcode == AArch64ISD::GLDFF1_IMM) {		if (Opcode == AArch64ISD::GLD1_IMM \|\| Opcode == AArch64ISD::GLDFF1_IMM) {
uint64_t MaxIndex = 31;		uint64_t MaxIndex = 31;
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines	case AArch64ISD::GLDFF1_UXTW:
NewOpc = AArch64ISD::GLDFF1S_UXTW;		NewOpc = AArch64ISD::GLDFF1S_UXTW;
break;		break;
case AArch64ISD::GLDFF1_UXTW_SCALED:		case AArch64ISD::GLDFF1_UXTW_SCALED:
NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED;		NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED;
break;		break;
case AArch64ISD::GLDFF1_IMM:		case AArch64ISD::GLDFF1_IMM:
NewOpc = AArch64ISD::GLDFF1S_IMM;		NewOpc = AArch64ISD::GLDFF1S_IMM;
break;		break;
		case AArch64ISD::GLDNT1:
		NewOpc = AArch64ISD::GLDNT1S;
		break;
default:		default:
return SDValue();		return SDValue();
}		}

EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();		EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();		EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();

if ((SignExtSrcVT != SrcMemVT) \|\| !Src.hasOneUse())		if ((SignExtSrcVT != SrcMemVT) \|\| !Src.hasOneUse())
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	case ISD::INTRINSIC_W_CHAIN:
case Intrinsic::aarch64_neon_st1x3:		case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:		case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:		case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:		case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:		case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);		return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:		case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);		return performLDNT1Combine(N, DAG);
		case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
		case Intrinsic::aarch64_sve_ldnt1_gather:
		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
		case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnf1:		case Intrinsic::aarch64_sve_ldnf1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);		return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);
case Intrinsic::aarch64_sve_ldff1:		case Intrinsic::aarch64_sve_ldff1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);		return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);
case Intrinsic::aarch64_sve_stnt1:		case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);		return performSTNT1Combine(N, DAG);
		case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
		case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
		case Intrinsic::aarch64_sve_stnt1_scatter:
		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_ld1_gather:		case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);
case Intrinsic::aarch64_sve_ld1_gather_index:		case Intrinsic::aarch64_sve_ld1_gather_index:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:		case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW,		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:		case Intrinsic::aarch64_sve_ld1_gather_uxtw:
▲ Show 20 Lines • Show All 764 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show All 21 Lines
def AArch64ldnf1 : SDNode<"AArch64ISD::LDNF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldnf1 : SDNode<"AArch64ISD::LDNF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1 : SDNode<"AArch64ISD::LDFF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1 : SDNode<"AArch64ISD::LDFF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

def AArch64ldnf1s : SDNode<"AArch64ISD::LDNF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldnf1s : SDNode<"AArch64ISD::LDNF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s : SDNode<"AArch64ISD::LDFF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s : SDNode<"AArch64ISD::LDFF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

// Gather loads - node definitions		// Gather loads - node definitions
//		//
def SDT_AArch64_GATHER_SV : SDTypeProfile<1, 4, [		def SDT_AArch64_GATHER_SV : SDTypeProfile<1, 4, [
		andwarAuthorUnsubmitted Done Reply Inline Actions This should be `SDT` instead of `STD`. I will fix this. andwar: This should be `SDT` instead of `STD`. I will fix this.
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [		def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;
Show All 25 Lines
def AArch64ldff1s_gather : SDNode<"AArch64ISD::GLDFF1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather : SDNode<"AArch64ISD::GLDFF1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_scaled : SDNode<"AArch64ISD::GLDFF1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather_scaled : SDNode<"AArch64ISD::GLDFF1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_uxtw : SDNode<"AArch64ISD::GLDFF1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather_uxtw : SDNode<"AArch64ISD::GLDFF1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_sxtw : SDNode<"AArch64ISD::GLDFF1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather_sxtw : SDNode<"AArch64ISD::GLDFF1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_imm : SDNode<"AArch64ISD::GLDFF1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s_gather_imm : SDNode<"AArch64ISD::GLDFF1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

		def AArch64ldnt1_gather : SDNode<"AArch64ISD::GLDNT1", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
		def AArch64ldnt1s_gather : SDNode<"AArch64ISD::GLDNT1S", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;

// Scatter stores - node definitions		// Scatter stores - node definitions
//		//
def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [		def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [		def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;

		def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;

// AArch64 SVE/SVE2 - the remaining node definitions		// AArch64 SVE/SVE2 - the remaining node definitions
//		//

// SVE CNT/INC/RDVL		// SVE CNT/INC/RDVL
def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;		def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;
def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;		def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;
def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;		def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;
def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;		def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;
▲ Show 20 Lines • Show All 1,804 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE2] in {

// SVE2 bitwise xor and rotate right by immediate		// SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;		defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;

// SVE2 extract vector (immediate offset, constructive)		// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;		def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;

// SVE2 non-temporal gather loads		// SVE2 non-temporal gather loads
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;		defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather, nxv4i8>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;		defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather, nxv4i8>;
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;		defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather, nxv4i16>;
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;		defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather, nxv4i16>;
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;		defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather, nxv4i32>;

defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;		defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather, nxv2i8>;
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;		defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather, nxv2i8>;
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;		defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather, nxv2i16>;
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;		defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather, nxv2i16>;
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;		defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather, nxv2i32>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;		defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather, nxv2i32>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;		defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather, nxv2i64>;

// SVE2 vector splice (constructive)		// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;		defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;

// SVE2 non-temporal scatter stores		// SVE2 non-temporal scatter stores
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;		defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;		defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>;
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;		defm STNT1W_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b101, "stnt1w", AArch64stnt1_scatter, nxv4i32>;

defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;		defm STNT1B_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b000, "stnt1b", AArch64stnt1_scatter, nxv2i8>;
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;		defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;		defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;		defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;

// SVE2 table lookup (three sources)		// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;		defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;		defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;

// SVE2 integer compare scalar count and limit		// SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;		defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;		defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,065 Lines • ▼ Show 20 Lines	: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
let Inst{15-13} = 0b001;		let Inst{15-13} = 0b001;
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Zn;		let Inst{9-5} = Zn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayStore = 1;		let mayStore = 1;
}		}

multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,		multiclass sve2_mem_sstnt_vs_32_ptrs<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {		SDPatternOperator op,
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;		ValueType vt> {
		def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_s, ZPR32>;
		sdesmalenUnsubmitted Done Reply Inline Actions nit: vec_32b_ptrs ? sdesmalen: nit: vec_32b_ptrs ?
		andwarAuthorUnsubmitted Done Reply Inline Actions I'll use `vec_32_ptrs` instead. That's consistent with regular gather loads: https://github.com/llvm/llvm-project/blob/cff90c938b7be43de482ffb7a8a7fdbdf57c32a3/llvm/lib/Target/AArch64/SVEInstrFormats.td#L6257 andwar: I'll use `vec_32_ptrs` instead. That's consistent with regular gather loads: https://github.
		sdesmalenUnsubmitted Not Done Reply Inline Actions If you change the name of this class, you may want to update the parent class as well. sdesmalen: If you change the name of this class, you may want to update the parent class as well.
		andwarAuthorUnsubmitted Done Reply Inline Actions What about setting the name of this multi class to `sve2_mem_gldnt_vec_vs_32_ptrs` instead? This way updating the name of the base class is no longer needed. andwar: What about setting the name of this multi class to `sve2_mem_gldnt_vec_vs_32_ptrs` instead?

		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;

		def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt),
		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>;
		}

		multiclass sve2_mem_sstnt_vs_64_ptrs<bits<3> opc, string asm,
		SDPatternOperator op,
		ValueType vt> {
		def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_d, ZPR64>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;		(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;

		def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt),
		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
}		}

class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,		class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
RegisterOperand VecList, RegisterOperand zprext>		RegisterOperand VecList, RegisterOperand zprext>
: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),		: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
asm, "\t$Zt, $Pg, [$Rn, $Zm]",		asm, "\t$Zt, $Pg, [$Rn, $Zm]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
▲ Show 20 Lines • Show All 1,432 Lines • ▼ Show 20 Lines	: I<(outs VecList:$Zt), iops,
let Inst{14-13} = opc{1-0};		let Inst{14-13} = opc{1-0};
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Zn;		let Inst{9-5} = Zn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayLoad = 1;		let mayLoad = 1;
}		}

multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,		multiclass sve2_mem_gldnt_vs_32_ptrs<bits<5> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {		SDPatternOperator op,
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),		ValueType vt> {
asm, listty>;		def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm),
		asm, Z_s>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;		(!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;

		def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)),
		(!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>;
		}

		multiclass sve2_mem_gldnt_vs_64_ptrs<bits<5> opc, string asm,
		SDPatternOperator op,
		ValueType vt> {
		def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm),
		asm, Z_d>;

		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;

		def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)),
		(!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SVE Memory - 64-bit Gather Group		// SVE Memory - 64-bit Gather Group
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

// bit xs is '1' if offsets are signed		// bit xs is '1' if offsets are signed
// bit scaled is '1' if the offsets are scaled		// bit scaled is '1' if the offsets are scaled
▲ Show 20 Lines • Show All 722 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; LDNT1B, LDNT1W, LDNT1H, LDNT1D: base + 32-bit unscaled offsets, zero (uxtw)
				; extended to 64 bits.
				; e.g. ldnt1h { z0.s }, p0/z, [z0.s, x0]
				;

				; LDNT1B
				define <vscale x 4 x i32> @gldnt1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: gldnt1b_s_uxtw:
				; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
				i8* %base,
				<vscale x 4 x i32> %b)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				; LDNT1H
				define <vscale x 4 x i32> @gldnt1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: gldnt1h_s_uxtw:
				; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %b)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				; LDNT1W
				define <vscale x 4 x i32> @gldnt1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: gldnt1w_s_uxtw:
				; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
				i32* %base,
				<vscale x 4 x i32> %b)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 4 x float> @gldnt1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: gldnt1w_s_uxtw_float:
				; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
				float* %base,
				<vscale x 4 x i32> %b)
				ret <vscale x 4 x float> %load
				}

				; LDNT1SB, LDNT1SW, LDNT1SH: base + 32-bit unscaled offsets, zero (uxtw)
				; extended to 64 bits.
				; e.g. ldnt1sh { z0.s }, p0/z, [z0.s, x0]
				;

				; LDNT1SB
				define <vscale x 4 x i32> @gldnt1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: gldnt1sb_s_uxtw:
				; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
				i8* %base,
				<vscale x 4 x i32> %b)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				; LDNT1SH
				define <vscale x 4 x i32> @gldnt1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
				; CHECK-LABEL: gldnt1sh_s_uxtw:
				; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %b)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				; LDNT1B/LDNT1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)

				; LDNT1H/LDNT1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)

				; LDNT1W/LDNT1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
				declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; LDNT1B, LDNT1W, LDNT1H, LDNT1D: base + 64-bit unscaled offsets
				; e.g. ldnt1h { z0.d }, p0/z, [z0.d, x0]
				;

				define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1b_d:
				; CHECK: ldnt1b { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1h_d:
				; CHECK: ldnt1h { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: gldnt1w_d:
				; CHECK: ldnt1w { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %offsets)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1d_d:
				; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1d_d_double:
				; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x double> %load
				}

				;
				; LDNT1SB, LDNT1SW, LDNT1SH: base + 64-bit unscaled offsets
				; e.g. ldnt1sh { z0.d }, p0/z, [z0.d, x0]
				;

				define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1sb_d:
				; CHECK: ldnt1sb { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i64> %b)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1sh_d:
				; CHECK: ldnt1sh { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1sw_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: gldnt1sw_d:
				; CHECK: ldnt1sw { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %offsets)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; LDNT1B, LDNT1W, LDNT1H, LDNT1D: vector base + scalar offset
				; ldnt1b { z0.s }, p0/z, [z0.s, x0]
				;

				; LDNT1B
				define <vscale x 4 x i32> @gldnt1b_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1b_s:
				; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1b_d:
				; CHECK: ldnt1b { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1H
				define <vscale x 4 x i32> @gldnt1h_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1h_s:
				; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1h_d:
				; CHECK: ldnt1h { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1W
				define <vscale x 4 x i32> @gldnt1w_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1w_s:
				; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 4 x float> @gldnt1w_s_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1w_s_float:
				; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x float> %load
				}

				define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1w_d:
				; CHECK: ldnt1w { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1D
				define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1d_d:
				; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x i64> %load
				}

				; LDNT1D
				define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1d_d_double:
				; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x double> %load
				}

				;
				; LDNT1SB, LDNT1SW, LDNT1SH, LDNT1SD: vector base + scalar offset
				; ldnt1sb { z0.s }, p0/z, [z0.s, x0]
				;

				; LDNT1SB
				define <vscale x 4 x i32> @gldnt1sb_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sb_s:
				; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sb_d:
				; CHECK: ldnt1sb { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1SH
				define <vscale x 4 x i32> @gldnt1sh_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sh_s:
				; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sh_d:
				; CHECK: ldnt1sh { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1SW
				define <vscale x 2 x i64> @gldnt1sw_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sw_d:
				; CHECK: ldnt1sw { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1B/LDNT1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LDNT1H/LDNT1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LDNT1W/LDNT1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; LDNT1D
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; STNT1B, STNT1W, STNT1H, STNT1D: base + 32-bit unscaled offset, zero (uxtw)
				; extended to 64 bits.
				; e.g. stnt1h { z0.d }, p0, [z1.d, x0]
				;

				; STNT1B
				define void @sstnt1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sstnt1b_s_uxtw:
				; CHECK: stnt1b { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				i8* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				; STNT1H
				define void @sstnt1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sstnt1h_s_uxtw:
				; CHECK: stnt1h { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				; STNT1W
				define void @sstnt1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sstnt1w_s_uxtw:
				; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				i32* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sstnt1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sstnt1w_s_uxtw_float:
				; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				float* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				; STNT1B
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i32>)

				; STNT1H
				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)

				; STNT1W
				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)

				declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; STNT1B, STNT1W, STNT1H, STNT1D: base + 64-bit unscaled offset
				; e.g. stnt1h { z0.d }, p0, [z1.d, x0]
				;

				define void @sstnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sstnt1b_d:
				; CHECK: stnt1b { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sstnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sstnt1h_d:
				; CHECK: stnt1h { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sstnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sstnt1w_d:
				; CHECK: stnt1w { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sstnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sstnt1d_d:
				; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sstnt1d_d_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sstnt1d_d_double:
				; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; STNT1B, STNT1W, STNT1H, STNT1D: vector base + scalar offset
				; stnt1b { z0.s }, p0/z, [z0.s, x0]
				;

				; STNT1B
				define void @stnt1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1b_s:
				; CHECK: stnt1b { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1b_d:
				; CHECK: stnt1b { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1H
				define void @stnt1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1h_s:
				; CHECK: stnt1h { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1h_d:
				; CHECK: stnt1h { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1W
				define void @stnt1w_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1w_s:
				; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1w_f32_s(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1w_f32_s:
				; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1w_d:
				; CHECK: stnt1w { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1D
				define void @stnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1d_d:
				; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1d_f64_d(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1d_f64_d:
				; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1B
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; STNT1H
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; STNT1W
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; STNT1D
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f32.nxv2i64(<vscale x 2 x float>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-stores
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 247584

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-storesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 247584

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll

[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-stores
ClosedPublic