Diff 234022

llvm/include/llvm/IR/IntrinsicsAArch64.td

Show First 20 Lines • Show All 986 Lines • ▼ Show 20 Lines	: Intrinsic<[llvm_anyvector_ty],
LLVMSubdivide2VectorType<0>,		LLVMSubdivide2VectorType<0>,
llvm_i32_ty],		llvm_i32_ty],
[IntrNoMem]>;		[IntrNoMem]>;

class SVE2_1VectorArg_Narrowing_Intrinsic		class SVE2_1VectorArg_Narrowing_Intrinsic
: Intrinsic<[LLVMSubdivide2VectorType<0>],		: Intrinsic<[LLVMSubdivide2VectorType<0>],
[llvm_anyvector_ty],		[llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;

		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: The formatting for these intrinsics is odd. sdesmalen: nit: The formatting for these intrinsics is odd.
		andwarAuthorUnsubmitted Done Reply Inline Actions Fixed - I try to use clang-format (which IMHO does a good job here), but sadly the result is inconsistent with the rest of the file. andwar: Fixed - I try to use clang-format (which IMHO does a good job here), but sadly the result is…
class SVE2_Merged1VectorArg_Narrowing_Intrinsic		class SVE2_Merged1VectorArg_Narrowing_Intrinsic
: Intrinsic<[LLVMSubdivide2VectorType<0>],		: Intrinsic<[LLVMSubdivide2VectorType<0>],
[LLVMSubdivide2VectorType<0>,		[LLVMSubdivide2VectorType<0>,
llvm_anyvector_ty],		llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;

// NOTE: There is no relationship between these intrinsics beyond an attempt		// NOTE: There is no relationship between these intrinsics beyond an attempt
// to reuse currently identical class definitions.		// to reuse currently identical class definitions.
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	: Intrinsic<[llvm_anyvector_ty],
[IntrReadMem, IntrArgMemOnly]>;		[IntrReadMem, IntrArgMemOnly]>;

class AdvSIMD_1VectorArg_Imm_Intrinsic		class AdvSIMD_1VectorArg_Imm_Intrinsic
: Intrinsic<[llvm_anyvector_ty],		: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,		[LLVMMatchType<0>,
llvm_i32_ty],		llvm_i32_ty],
[IntrNoMem, ImmArg<1>]>;		[IntrNoMem, ImmArg<1>]>;

		class AdvSIMD_ScatterStore_64bitOffset_Intrinsic
		: Intrinsic<[],
		[
		llvm_anyvector_ty,
		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
		LLVMPointerToElt<0>,
		LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
		],
		[IntrWriteMem, IntrArgMemOnly]>;

		class AdvSIMD_ScatterStore_32bitOffset_Intrinsic
		: Intrinsic<[],
		[
		llvm_anyvector_ty,
		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
		LLVMPointerToElt<0>,
		LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
		],
		[IntrWriteMem, IntrArgMemOnly]>;

		efriedmaUnsubmitted Not Done Reply Inline Actions ImmArg? efriedma: ImmArg?
		andwarAuthorUnsubmitted Done Reply Inline Actions Sorry, you mentioned that earlier and I missed that. I've updated this patch accordingly. I also had to make sure that the affected instruction multiclasses in _AArch64SVEInstrInfo.td_ use `TImmLeaf` instead of `ImmLeaf` (this came up in other patches similar to this one). To this end, I duplicated the following definitions that use `ImmLeaf`: `imm0_31` `uimm5s2` `uimm5s4` `uimm5s8` and defined equivalents using `TImmLeaf`: `timm0_31` `tuimm5s2` `tuimm5s4` `tuimm5s8` We may implement this later with a `ComplexPattern`, instead of duplicating these. I think that @kmclaughlin might already be looking into it. andwar: Sorry, you mentioned that earlier and I missed that. I've updated this patch accordingly. I…
		class AdvSIMD_ScatterStore_VectorBase_Intrinsic
		: Intrinsic<[],
		[
		llvm_anyvector_ty,
		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
		llvm_anyvector_ty, llvm_i64_ty
		],
		[IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>;

//		//
// Loads		// Loads
//		//

def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;		def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;

//		//
// Stores		// Stores
▲ Show 20 Lines • Show All 327 Lines • ▼ Show 20 Lines
// to 64 bits		// to 64 bits
def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;

// vector base + immediate index		// vector base + immediate index
def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic;		def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic;

//		//
		// Scatter stores:
		//

		// scalar + vector, 64 bit unscaled offsets
		def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;

		// scalar + vector, 64 bit scaled offsets
		def int_aarch64_sve_st1_scatter_index
		: AdvSIMD_ScatterStore_64bitOffset_Intrinsic;

		// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)
		// extended to 64 bits
		def int_aarch64_sve_st1_scatter_sxtw
		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

		def int_aarch64_sve_st1_scatter_uxtw
		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

		// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended
		// to 64 bits
		def int_aarch64_sve_st1_scatter_sxtw_index
		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

		def int_aarch64_sve_st1_scatter_uxtw_index
		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

		// vector base + immediate index
		def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic;

		//
// SVE2 - Non-widening pairwise arithmetic		// SVE2 - Non-widening pairwise arithmetic
//		//

def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
Show All 11 Lines
def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;		def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;
def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;		def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;

//		//
// SVE2 - Floating-point integer binary logarithm		// SVE2 - Floating-point integer binary logarithm
//		//

def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic;		def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic;

		sdesmalenUnsubmitted Done Reply Inline Actions nit: unrelated whitespace change. sdesmalen: nit: unrelated whitespace change.
//		//
// SVE2 - Unary narrowing operations		// SVE2 - Unary narrowing operations
//		//

def int_aarch64_sve_sqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;		def int_aarch64_sve_sqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_sqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;		def int_aarch64_sve_sqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_sqxtunb : SVE2_1VectorArg_Narrowing_Intrinsic;		def int_aarch64_sve_sqxtunb : SVE2_1VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_sqxtunt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;		def int_aarch64_sve_sqxtunt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_uqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;		def int_aarch64_sve_uqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_uqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;		def int_aarch64_sve_uqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
}		}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 217 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
// Signed gather loads		// Signed gather loads
GLD1S,		GLD1S,
GLD1S_SCALED,		GLD1S_SCALED,
GLD1S_UXTW,		GLD1S_UXTW,
GLD1S_SXTW,		GLD1S_SXTW,
GLD1S_UXTW_SCALED,		GLD1S_UXTW_SCALED,
GLD1S_SXTW_SCALED,		GLD1S_SXTW_SCALED,
GLD1S_IMM,		GLD1S_IMM,
		// Scatter store
		SST1,
		SST1_SCALED,
		SST1_UXTW,
		SST1_SXTW,
		SST1_UXTW_SCALED,
		SST1_SXTW_SCALED,
		SST1_IMM,

// NEON Load/Store with post-increment base updates		// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,		LD3post,
LD4post,		LD4post,
ST2post,		ST2post,
ST3post,		ST3post,
ST4post,		ST4post,
▲ Show 20 Lines • Show All 569 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,351 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";		case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";		case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";		case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";		case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";		case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";		case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";		case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";		case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
		case AArch64ISD::SST1: return "AArch64ISD::SST1";
		case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
		case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
		case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
		case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
		case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
		case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
}		}
return nullptr;		return nullptr;
}		}

MachineBasicBlock *		MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,		AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {		MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction as some control flow and a		// We materialise the F128CSEL pseudo-instruction as some control flow and a
▲ Show 20 Lines • Show All 10,707 Lines • ▼ Show 20 Lines	static MVT getSVEContainerType(EVT ContentTy) {
case MVT::nxv4i8:		case MVT::nxv4i8:
case MVT::nxv4i16:		case MVT::nxv4i16:
case MVT::nxv4i32:		case MVT::nxv4i32:
case MVT::nxv4f32:		case MVT::nxv4f32:
return MVT::nxv4i32;		return MVT::nxv4i32;
}		}
}		}

		static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
		unsigned Opcode,
		bool OnlyPackedOffsets = true) {
		sdesmalenUnsubmitted Not Done Reply Inline Actions It is unclear what `N->getOperand(2)` is: EVT SrcVT = N->getOperand(2)->getValueType(0); const SDValue Dst = N->getOperand(2); sdesmalen: It is unclear what `N->getOperand(2)` is: ```EVT SrcVT = N->getOperand(2)->getValueType(0)…
		andwarAuthorUnsubmitted Done Reply Inline Actions That's a copy & paste leftover, sorry! I've renamed `Dst` as well as few other variables for consistency. andwar: That's a copy & paste leftover, sorry! I've renamed `Dst` as well as few other variables for…
		const SDValue Src = N->getOperand(2);
		const EVT SrcVT = Src->getValueType(0);
		assert(SrcVT.isScalableVector() &&
		"Scatter stores are only possible for SVE vectors");

		SDLoc DL(N);
		MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();

		// Make sure that source data will fit into an SVE register
		if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
		sdesmalenUnsubmitted Done Reply Inline Actions Should we simply compare: SrcVT.getSizeInBits().getScalableSize() > AArch64::SVEBitsPerBlock ? sdesmalen: Should we simply compare: ```SrcVT.getSizeInBits().getScalableSize() > AArch64…
		sdesmalenUnsubmitted Done Reply Inline Actions Sorry, I meant: SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock sdesmalen: Sorry, I meant: ```SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock```
		return SDValue();
		efriedmaUnsubmitted Not Done Reply Inline Actions Is the getKnownMinSize comparison trying to reject illegal types? Or something else? efriedma: Is the getKnownMinSize comparison trying to reject illegal types? Or something else?
		andwarAuthorUnsubmitted Done Reply Inline Actions It's meant to reject illegal types that won't fit into an SVE register (i.e. `nxv8i32`). I've added a comment to clarify this. andwar: It's meant to reject illegal types that won't fit into an SVE register (i.e. `nxv8i32`). I've…

		// For FPs, ACLE only supports _packed_ single and double precision types.
		if (SrcElVT.isFloatingPoint())
		if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
		return SDValue();

		// Depending on the addressing mode, this is either a pointer or a vector of
		// pointers (that fits into one register)
		const SDValue Base = N->getOperand(4);
		// Depending on the addressing mode, this is either a single offset or a
		// vector of offsets (that fits into one register)
		SDValue Offset = N->getOperand(5);

		auto &TLI = DAG.getTargetLoweringInfo();
		if (!TLI.isTypeLegal(Base.getValueType()))
		return SDValue();

		// Some scatter store variants allow unpacked offsets, but only as nxv2i32
		// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
		// nxv2i64. Legalize accordingly.
		if (!OnlyPackedOffsets &&
		Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
		Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

		if (!TLI.isTypeLegal(Offset.getValueType()))
		return SDValue();

		// Source value type that is representable in hardware
		efriedmaUnsubmitted Not Done Reply Inline Actions Does this work correctly for nxv2f32? It looks like we're missing test coverage, also. efriedma: Does this work correctly for nxv2f32? It looks like we're missing test coverage, also.
		andwarAuthorUnsubmitted Done Reply Inline Actions That's a good catch, thanks! For scatter stores ACLE only supports _packed_ vectors of single or double precision floats. I will add a check for this. I also missed this when implementing `performLD1GatherLoad` (which is very similar to this method, but it felt worthwhile to keep them seperate). When this patch is approved I'll prepare an update for `performLD1GatherLoad`. andwar: That's a good catch, thanks! For scatter stores ACLE only supports _packed_ vectors of single…
		efriedmaUnsubmitted Not Done Reply Inline Actions We could support this in the backend even if clang can't generate it... but I guess it's fine to leave it out. efriedma: We could support this in the backend even if clang can't generate it... but I guess it's fine…
		EVT HwSrcVt = getSVEContainerType(SrcVT);

		// Keep the original type of the input data to store - this is needed to
		// differentiate between ST1B, ST1H, ST1W and ST1D. For FP values we want the
		// integer equivalent, so just use HwSrcVt.
		SDValue InputVT = DAG.getValueType(SrcVT);
		if (SrcVT.isFloatingPoint())
		InputVT = DAG.getValueType(HwSrcVt);

		SDVTList VTs = DAG.getVTList(MVT::Other);
		SDValue SrcNew;

		efriedmaUnsubmitted Done Reply Inline Actions INTRINSIC_VOID has one result: the chain. You don't need to MergeValues here. efriedma: INTRINSIC_VOID has one result: the chain. You don't need to MergeValues here.
		if (Src.getValueType().isFloatingPoint())
		SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
		else
		SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);

		SDValue Ops[] = {N->getOperand(0), // Chain
		SrcNew,
		N->getOperand(3), // Pg
		Base,
		Offset,
		InputVT};

		return DAG.getNode(Opcode, DL, VTs, Ops);
		}

static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,		static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode) {		unsigned Opcode) {
EVT RetVT = N->getValueType(0);		EVT RetVT = N->getValueType(0);
assert(RetVT.isScalableVector() &&		assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");		"Gather loads are only possible for SVE vectors");

SDLoc DL(N);		SDLoc DL(N);
MVT RetElVT = RetVT.getVectorElementType().getSimpleVT();		MVT RetElVT = RetVT.getVectorElementType().getSimpleVT();
▲ Show 20 Lines • Show All 204 Lines • ▼ Show 20 Lines	case ISD::INTRINSIC_W_CHAIN:
case Intrinsic::aarch64_sve_ld1_gather_uxtw:		case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_imm:		case Intrinsic::aarch64_sve_ld1_gather_imm:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
		case Intrinsic::aarch64_sve_st1_scatter:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);
		case Intrinsic::aarch64_sve_st1_scatter_index:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);
		case Intrinsic::aarch64_sve_st1_scatter_sxtw:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,
		/OnlyPackedOffsets=/false);
		case Intrinsic::aarch64_sve_st1_scatter_uxtw:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,
		/OnlyPackedOffsets=/false);
		case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
		/OnlyPackedOffsets=/false);
		case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
		/OnlyPackedOffsets=/false);
		case Intrinsic::aarch64_sve_st1_scatter_imm:
		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);
default:		default:
break;		break;
}		}
break;		break;
case ISD::GlobalAddress:		case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());		return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}		}
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 645 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 387 Lines • ▼ Show 20 Lines
	}			}
	def uimm5s8 : Operand<i64>, ImmLeaf<i64,			def uimm5s8 : Operand<i64>, ImmLeaf<i64,
	[{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],			[{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],
	UImmS8XForm> {			UImmS8XForm> {
	let ParserMatchClass = UImm5s8Operand;			let ParserMatchClass = UImm5s8Operand;
	let PrintMethod = "printImmScale<8>";			let PrintMethod = "printImmScale<8>";
	}			}

				// tuimm5sN predicate - similiar to uimm5sN, but use TImmLeaf (TargetConstant)
				// instead of ImmLeaf (Constant)
				def tuimm5s2 : Operand<i64>, TImmLeaf<i64,
				[{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }],
				UImmS2XForm> {
				let ParserMatchClass = UImm5s2Operand;
				let PrintMethod = "printImmScale<2>";
				}
				def tuimm5s4 : Operand<i64>, TImmLeaf<i64,
				[{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }],
				UImmS4XForm> {
				let ParserMatchClass = UImm5s4Operand;
				let PrintMethod = "printImmScale<4>";
				}
				def tuimm5s8 : Operand<i64>, TImmLeaf<i64,
				[{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],
				UImmS8XForm> {
				let ParserMatchClass = UImm5s8Operand;
				let PrintMethod = "printImmScale<8>";
				}

	// uimm6sN predicate - True if the immediate is a multiple of N in the range			// uimm6sN predicate - True if the immediate is a multiple of N in the range
	// [0 * N, 64 * N].			// [0 * N, 64 * N].
	def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;			def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
	def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;			def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;
	def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>;			def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>;
	def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>;			def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>;
	def UImm6s16Operand : UImmScaledMemoryIndexed<6, 16>;			def UImm6s16Operand : UImmScaledMemoryIndexed<6, 16>;

	▲ Show 20 Lines • Show All 341 Lines • ▼ Show 20 Lines

	// imm0_31 predicate - True if the immediate is in the range [0,31]			// imm0_31 predicate - True if the immediate is in the range [0,31]
	def imm0_31 : Operand<i64>, ImmLeaf<i64, [{			def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
	return ((uint64_t)Imm) < 32;			return ((uint64_t)Imm) < 32;
	}]> {			}]> {
	let ParserMatchClass = Imm0_31Operand;			let ParserMatchClass = Imm0_31Operand;
	}			}

				// timm0_31 predicate - same ass imm0_31, but use TargetConstant (TimmLeaf)
				// instead of Contant (ImmLeaf)
				def timm0_31 : Operand<i64>, TImmLeaf<i64, [{
				return ((uint64_t)Imm) < 32;
				}]> {
				let ParserMatchClass = Imm0_31Operand;
				}

	// True if the 32-bit immediate is in the range [0,31]			// True if the 32-bit immediate is in the range [0,31]
	def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{			def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
	return ((uint64_t)Imm) < 32;			return ((uint64_t)Imm) < 32;
	}]> {			}]> {
	let ParserMatchClass = Imm0_31Operand;			let ParserMatchClass = Imm0_31Operand;
	}			}

	// imm0_1 predicate - True if the immediate is in the range [0,1]			// imm0_1 predicate - True if the immediate is in the range [0,1]
	▲ Show 20 Lines • Show All 10,024 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show All 14 Lines	def SDT_AArch64_GLD1 : SDTypeProfile<1, 4, [
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [		def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

		def SDT_AArch64_SST1 : SDTypeProfile<0, 5, [
		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
		]>;

		def SDT_AArch64_SST1_IMM : SDTypeProfile<0, 5, [
		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
		]>;

		def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
		def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
		def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
		def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
		def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
		def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
		def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SST1_IMM, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;

def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

▲ Show 20 Lines • Show All 548 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE] in {
defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>;		defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>;
defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>;		defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>;		defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>;
defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>;		defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>;
defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;		defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;		defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;		defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;

// Scatters using unscaled 32-bit offsets, e.g.		// Scatters using unpacked, unscaled 32-bit offsets, e.g.
		efriedmaUnsubmitted Done Reply Inline Actions "unpacked" efriedma: "unpacked"
// st1h z0.s, p0, [x0, z0.s, uxtw]
// and unpacked:
// st1h z0.d, p0, [x0, z0.d, uxtw]		// st1h z0.d, p0, [x0, z0.d, uxtw]
defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;		defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;		defm SST1H_D : sve_mem_64b_sst_sv_32_unscaled<0b010, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;		defm SST1W_D : sve_mem_64b_sst_sv_32_unscaled<0b100, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8,nxv2i32>;
defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;		defm SST1D : sve_mem_64b_sst_sv_32_unscaled<0b110, "st1d", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;		// Scatters using packed, unscaled 32-bit offsets, e.g.
defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;		// st1h z0.s, p0, [x0, z0.s, uxtw]
		defm SST1B_S : sve_mem_32b_sst_sv_32_unscaled<0b001, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
		defm SST1H_S : sve_mem_32b_sst_sv_32_unscaled<0b011, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
		defm SST1W : sve_mem_32b_sst_sv_32_unscaled<0b101, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;

// Scatters using scaled 32-bit offsets, e.g.		// Scatters using packed, scaled 32-bit offsets, e.g.
// st1h z0.s, p0, [x0, z0.s, uxtw #1]		// st1h z0.s, p0, [x0, z0.s, uxtw #1]
// and unpacked:		defm SST1H_S : sve_mem_32b_sst_sv_32_scaled<0b011, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
		defm SST1W : sve_mem_32b_sst_sv_32_scaled<0b101, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;

		// Scatters using unpacked, scaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw #1]		// st1h z0.d, p0, [x0, z0.d, uxtw #1]
defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>;		defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>;		defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>;		defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>;

// Scatters using 32/64-bit pointers with offset, e.g.		// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.s, p0, [z0.s, #16]		// st1h z0.s, p0, [z0.s, #16]
		defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>;
		defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>;
		defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>;

		// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.d, p0, [z0.d, #16]		// st1h z0.d, p0, [z0.d, #16]
defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>;		defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>;
defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>;		defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>;
defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>;		defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>;
defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>;		defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>;
defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>;
defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>;
defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>;

// Scatters using unscaled 64-bit offsets, e.g.		// Scatters using unscaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d]		// st1h z0.d, p0, [x0, z0.d]
defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">;		defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;
defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">;		defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">;		defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">;		defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;

// Scatters using scaled 64-bit offsets, e.g.		// Scatters using scaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, lsl #1]		// st1h z0.d, p0, [x0, z0.d, lsl #1]
defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>;		defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>;		defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>;		defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;

// ST(2\|3\|4) structured stores (register + immediate)		// ST(2\|3\|4) structured stores (register + immediate)
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;		defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;		defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;
defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>;		defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>;
defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>;		defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>;
defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>;		defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>;
defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>;		defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>;
▲ Show 20 Lines • Show All 959 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

Show First 20 Lines • Show All 4,558 Lines • ▼ Show 20 Lines	: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
let Inst{13} = 0;		let Inst{13} = 0;
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Rn;		let Inst{9-5} = Rn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayStore = 1;		let mayStore = 1;
}		}

multiclass sve_mem_sst_sv_32_scaled<bits<3> opc, string asm,		multiclass sve_mem_32b_sst_sv_32_scaled<bits<3> opc, string asm,
RegisterOperand listty,		SDPatternOperator sxtw_op,
ZPRRegOp zprty,		SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,		RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd > {		RegisterOperand uxtw_opnd,
def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, listty, uxtw_opnd>;		ValueType vt > {
def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, listty, sxtw_opnd>;		def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, Z_s, uxtw_opnd>;
		def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, Z_s, sxtw_opnd>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;

		def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
		def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}		}

multiclass sve_mem_sst_sv_32_unscaled<bits<3> opc, string asm,		multiclass sve_mem_64b_sst_sv_32_scaled<bits<3> opc, string asm,
RegisterOperand listty,		SDPatternOperator sxtw_op,
ZPRRegOp zprty,		SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,		RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd> {		RegisterOperand uxtw_opnd,
def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, listty, uxtw_opnd>;		ValueType vt > {
def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, listty, sxtw_opnd>;		def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, Z_d, uxtw_opnd>;
		def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, Z_d, sxtw_opnd>;

		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
		(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
		(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;

		def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
		def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
		}

		multiclass sve_mem_64b_sst_sv_32_unscaled<bits<3> opc, string asm,
		SDPatternOperator sxtw_op,
		SDPatternOperator uxtw_op,
		RegisterOperand sxtw_opnd,
		RegisterOperand uxtw_opnd,
		ValueType vt> {
		def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, Z_d, uxtw_opnd>;
		def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, Z_d, sxtw_opnd>;

		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
		(!cast<Instruction>(NAME # _UXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
		(!cast<Instruction>(NAME # _SXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;

		def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
		def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
		}

		multiclass sve_mem_32b_sst_sv_32_unscaled<bits<3> opc, string asm,
		SDPatternOperator sxtw_op,
		SDPatternOperator uxtw_op,
		RegisterOperand sxtw_opnd,
		RegisterOperand uxtw_opnd,
		ValueType vt> {
		def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, Z_s, uxtw_opnd>;
		def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, Z_s, sxtw_opnd>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _UXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _SXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;

		def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
		def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}		}

class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,		class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
RegisterOperand zprext>		RegisterOperand zprext>
: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),		: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
asm, "\t$Zt, $Pg, [$Rn, $Zm]",		asm, "\t$Zt, $Pg, [$Rn, $Zm]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
Show All 10 Lines	: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Rn;		let Inst{9-5} = Rn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayStore = 1;		let mayStore = 1;
}		}

multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm,		multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm,
RegisterOperand zprext> {		SDPatternOperator op,
def "" : sve_mem_sst_sv2<msz, 1, asm, zprext>;		RegisterOperand zprext,
		ValueType vt> {
		def _SCALED_REAL : sve_mem_sst_sv2<msz, 1, asm, zprext>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;		(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;

		def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt),
		(!cast<Instruction>(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}		}

multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm> {		multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm,
def "" : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;		SDPatternOperator op,
		ValueType vt> {
		def _REAL : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;

		def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
		(!cast<Instruction>(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}		}

class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,		class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
RegisterOperand VecList, Operand imm_ty>		RegisterOperand VecList, Operand imm_ty>
: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5),		: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5),
asm, "\t$Zt, $Pg, [$Zn, $imm5]",		asm, "\t$Zt, $Pg, [$Zn, $imm5]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
Show All 9 Lines	: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5),
let Inst{15-13} = 0b101;		let Inst{15-13} = 0b101;
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Zn;		let Inst{9-5} = Zn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayStore = 1;		let mayStore = 1;
}		}

multiclass sve_mem_sst_vi_ptrs<bits<3> opc, string asm, RegisterOperand listty,		multiclass sve_mem_32b_sst_vi_ptrs<bits<3> opc, string asm,
ZPRRegOp zprty, Operand imm_ty> {		Operand imm_ty,
def _IMM : sve_mem_sst_vi<opc, asm, zprty, listty, imm_ty>;		SDPatternOperator op,
		ValueType vt> {
		def _IMM : sve_mem_sst_vi<opc, asm, ZPR32, Z_s, imm_ty>;

		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
		(!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
		(!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
		(!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;

		def : Pat<(op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt),
		(!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
		}

		multiclass sve_mem_64b_sst_vi_ptrs<bits<3> opc, string asm,
		Operand imm_ty,
		SDPatternOperator op,
		ValueType vt> {
		def _IMM : sve_mem_sst_vi<opc, asm, ZPR64, Z_d, imm_ty>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 0>;		(!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
(!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), 0>;		(!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _IMM) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 1>;		(!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;

		def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt),
		(!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}		}

class sve_mem_z_spill<string asm>		class sve_mem_z_spill<string asm>
: I<(outs), (ins ZPRAny:$Zt, GPR64sp:$Rn, simm9:$imm9),		: I<(outs), (ins ZPRAny:$Zt, GPR64sp:$Rn, simm9:$imm9),
asm, "\t$Zt, [$Rn, $imm9, mul vl]",		asm, "\t$Zt, [$Rn, $imm9, mul vl]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
bits<5> Rn;		bits<5> Rn;
▲ Show 20 Lines • Show All 1,767 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1H, ST1W, ST1D: base + 32-bit scaled offset, sign (sxtw) or zero
				; (uxtw) extended to 64 bits.
				; e.g. st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
				;

				; ST1H
				define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %indices) {
				; CHECK-LABEL: sst1h_s_uxtw:
				; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %indices)
				ret void
				}

				define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %indices) {
				; CHECK-LABEL: sst1h_s_sxtw:
				; CHECK: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %indices)
				ret void
				}

				define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1h_d_uxtw:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}

				define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1h_d_sxtw:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}

				; ST1W
				define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %indices) {
				; CHECK-LABEL: sst1w_s_uxtw:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				i32* %base,
				<vscale x 4 x i32> %indices)
				ret void
				}

				define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %indices) {
				; CHECK-LABEL: sst1w_s_sxtw:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				i32* %base,
				<vscale x 4 x i32> %indices)
				ret void
				}

				define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1w_d_uxtw:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d, uxtw #2]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}

				define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1w_d_sxtw:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d, sxtw #2]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}

				define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %indices) {
				; CHECK-LABEL: sst1w_s_uxtw_float:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				float* %base,
				<vscale x 4 x i32> %indices)
				ret void
				}

				define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %indices) {
				; CHECK-LABEL: sst1w_s_sxtw_float:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				float* %base,
				<vscale x 4 x i32> %indices)
				ret void
				}

				; ST1D
				define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1d_d_uxtw:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}

				define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1d_d_sxtw:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}

				define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1d_d_uxtw_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i32> %indices)
				ret void
				efriedmaUnsubmitted Not Done Reply Inline Actions Why are the offsets here using `<vscale x 2 x i64>`? `<vscale x 2 x i32>` seems more natural. efriedma: Why are the offsets here using `<vscale x 2 x i64>`? `<vscale x 2 x i32>` seems more natural.
				andwarAuthorUnsubmitted Done Reply Inline Actions Although these _are_ 32-bit wide offsets, they are stored in 64-bit registers (and are implicitly sign or zero-extended) By using `<vscale x 2 x i64>` we can rely on `LLVMScalarOrSameVectorWidth` to capture error (i.e. `incorrect argument type`) This is consistent with the implementation for gather loads Do you see any disadvantages of using `<vscale x 2 x i64>` instead of `<vscale x 2 x i32>`? andwar: * Although these _are_ 32-bit wide offsets, they are stored in 64-bit registers (and are…
				efriedmaUnsubmitted Not Done Reply Inline Actions The biggest advantage of using `<vscale x 2 x i32>` is that it's obvious the high bits are unused, so you don't end up with extra mask/64-bit multiply/etc. That said, we can recover this in other ways. I'm mostly concerned that we should try to be consistent here. We use `<vscale x 2 x i32>` for the stored value in `llvm.aarch64.sve.st1.scatter.index.nxv2i32`. efriedma: The biggest advantage of using `<vscale x 2 x i32>` is that it's obvious the high bits are…
				sdesmalenUnsubmitted Not Done Reply Inline Actions We envisioned the type for the indices generated by Clang to be `<vscale x 2 x i64>`, since the ACLE does not have any knowledge of unpacked types (like `<vscale x 2 x i32>`). The uxtw in the intrinsic would make it clear that the indices will need to be zero-extended from from word to double-word. However, I agree this isn't very clear when reading/writing the intrinsics in IR and it should be simple to change the patch to take `<vscale x 2 x i32>` indices instead and generating a `truncate` in Clang. If the `scatterSt1Combine` then ANY_EXTENDs the data, the truncate will likely fall away. If we change this, we should be consistent and update the loads as well. sdesmalen: We envisioned the type for the indices generated by Clang to be `<vscale x 2 x i64>`, since the…
				andwarAuthorUnsubmitted Done Reply Inline Actions Thank you both, I'll update accordingly. I suggest that gather loads are updated in a separate patch. andwar: Thank you both, I'll update accordingly. I suggest that gather loads are updated in a separate…
				}

				define void @sst1d_d_sxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %indices) {
				; CHECK-LABEL: sst1d_d_sxtw_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i32> %indices)
				ret void
				}


				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)

				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)

				declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: base + 32-bit unscaled offset, sign (sxtw) or zero
				; (uxtw) extended to 64 bits.
				; e.g. st1h { z0.d }, p0, [x0, z1.d, uxtw]
				;

				; ST1B
				define void @sst1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1b_s_uxtw:
				; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				i8* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1b_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1b_s_sxtw:
				; CHECK: st1b { z0.s }, p0, [x0, z1.s, sxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				i8* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1b_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1b_d_uxtw:
				; CHECK: st1b { z0.d }, p0, [x0, z1.d, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i8(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1b_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1b_d_sxtw:
				; CHECK: st1b { z0.d }, p0, [x0, z1.d, sxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i8(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				; ST1H
				define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1h_s_uxtw:
				; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1h_s_sxtw:
				; CHECK: st1h { z0.s }, p0, [x0, z1.s, sxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				i16* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1h_d_uxtw:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1h_d_sxtw:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d, sxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				; ST1W
				define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1w_s_uxtw:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				i32* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1w_s_sxtw:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				i32* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1w_d_uxtw:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1w_d_sxtw:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d, sxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1w_s_uxtw_float:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				float* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
				; CHECK-LABEL: sst1w_s_sxtw_float:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				float* %base,
				<vscale x 4 x i32> %offsets)
				ret void
				}

				; ST1D
				define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1d_d_uxtw:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1d_d_sxtw:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1d_d_uxtw_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}

				define void @sst1d_d_sxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %offsets) {
				; CHECK-LABEL: sst1d_d_sxtw_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i32> %offsets)
				ret void
				}


				; ST1B
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i32>)

				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)

				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)

				declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)
				declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1H, ST1W, ST1D: base + 64-bit scaled offset
				; e.g. st1h { z0.d }, p0, [x0, z0.d, lsl #1]
				;

				define void @sst1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sst1h_index
				; CHECK: st1h { z0.d }, p0, [x0, z1.d, lsl #1]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}

				define void @sst1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sst1w_index
				; CHECK: st1w { z0.d }, p0, [x0, z1.d, lsl #2]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}

				define void @sst1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sst1d_index
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, lsl #3]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}

				define void @sst1d_index_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sst1d_index_double
				; CHECK: st1d { z0.d }, p0, [x0, z1.d, lsl #3]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}


				declare void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: base + 64-bit unscaled offset
				; e.g. st1h { z0.d }, p0, [x0, z1.d]
				;

				define void @sst1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sst1b_d:
				; CHECK: st1b { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sst1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sst1h_d:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sst1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sst1w_d:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sst1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sst1d_d:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				define void @sst1d_d_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: sst1d_d_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %b)
				ret void
				}

				declare void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: vector + immediate (index)
				; e.g. st1h { z0.s }, p0, [z1.s, #16]
				;

				; ST1B
				define void @sst1b_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1b_s_imm:
				; CHECK: st1b { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1b_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1b_d_imm:
				; CHECK: st1b { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1H
				define void @sst1h_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1h_s_imm:
				; CHECK: st1h { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1h_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1h_d_imm:
				; CHECK: st1h { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1W
				define void @sst1w_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm:
				; CHECK: st1w { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1w_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1w_d_imm:
				; CHECK: st1w { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				define void @sst1w_s_imm_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_float:
				; CHECK: st1w { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				; ST1D
				define void @sst1d_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm:
				; CHECK: st1d { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				define void @sst1d_d_imm_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_double:
				; CHECK: st1d { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1B
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

This is an archive of the discontinued LLVM Phabricator instance.

[Aarch64][SVE] Add intrinsics for scatter stores
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 234022

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Aarch64][SVE] Add intrinsics for scatter storesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 234022

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

[Aarch64][SVE] Add intrinsics for scatter stores
ClosedPublic