Diff 239071

llvm/include/llvm/IR/IntrinsicsAArch64.td

Show First 20 Lines • Show All 1,119 Lines • ▼ Show 20 Lines	class AdvSIMD_GatherLoad_32bitOffset_Intrinsic
: Intrinsic<[llvm_anyvector_ty],		: Intrinsic<[llvm_anyvector_ty],
[		[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>,		LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>		LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
],		],
[IntrReadMem, IntrArgMemOnly]>;		[IntrReadMem, IntrArgMemOnly]>;

class AdvSIMD_GatherLoad_VecTorBase_Intrinsic		class AdvSIMD_GatherLoad_VectorBase_Intrinsic
: Intrinsic<[llvm_anyvector_ty],		: Intrinsic<[llvm_anyvector_ty],
[		[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty,		llvm_anyvector_ty,
llvm_i64_ty		llvm_i64_ty
],		],
[IntrReadMem, IntrArgMemOnly]>;		[IntrReadMem, IntrArgMemOnly]>;

Show All 19 Lines

class AdvSIMD_ScatterStore_VectorBase_Intrinsic		class AdvSIMD_ScatterStore_VectorBase_Intrinsic
: Intrinsic<[],		: Intrinsic<[],
[		[
llvm_anyvector_ty,		llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty, llvm_i64_ty		llvm_anyvector_ty, llvm_i64_ty
],		],
[IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>;		[IntrWriteMem, IntrArgMemOnly]>;

//		//
// Loads		// Loads
//		//

def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;		def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;

//		//
▲ Show 20 Lines • Show All 396 Lines • ▼ Show 20 Lines
// Testing predicates		// Testing predicates
//		//

def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;		def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;
def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;		def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;
def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;		def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;

//		//
// Gather loads:		// Gather loads: scalar base + vector offsets
//		//

// scalar + vector, 64 bit unscaled offsets		// 64 bit unscaled offsets
def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;

// scalar + vector, 64 bit scaled offsets		// 64 bit scaled offsets
def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;

// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)		// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
// extended to 64 bits
def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;

// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended		// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
// to 64 bits
def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;		def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;

// vector base + immediate index		//
def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic;		// Gather loads: vector base + scalar offset
		//

		def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VectorBase_Intrinsic;

//		//
// Scatter stores:		// Scatter stores: scalar base + vector offsets
//		//

// scalar + vector, 64 bit unscaled offsets		// 64 bit unscaled offsets
def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;		def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;

// scalar + vector, 64 bit scaled offsets		// 64 bit scaled offsets
def int_aarch64_sve_st1_scatter_index		def int_aarch64_sve_st1_scatter_index
: AdvSIMD_ScatterStore_64bitOffset_Intrinsic;		: AdvSIMD_ScatterStore_64bitOffset_Intrinsic;

// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)		// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
// extended to 64 bits
def int_aarch64_sve_st1_scatter_sxtw		def int_aarch64_sve_st1_scatter_sxtw
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

def int_aarch64_sve_st1_scatter_uxtw		def int_aarch64_sve_st1_scatter_uxtw
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended		// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
// to 64 bits
def int_aarch64_sve_st1_scatter_sxtw_index		def int_aarch64_sve_st1_scatter_sxtw_index
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

def int_aarch64_sve_st1_scatter_uxtw_index		def int_aarch64_sve_st1_scatter_uxtw_index
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;		: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

// vector base + immediate index		//
def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic;		// Scatter stores: vector base + scalar offset
		//

		def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_Intrinsic;

//		//
// SVE2 - Non-widening pairwise arithmetic		// SVE2 - Non-widening pairwise arithmetic
//		//

def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;		def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
▲ Show 20 Lines • Show All 76 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 12,297 Lines • ▼ Show 20 Lines	static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,

// For FPs, ACLE only supports _packed_ single and double precision types.		// For FPs, ACLE only supports _packed_ single and double precision types.
if (SrcElVT.isFloatingPoint())		if (SrcElVT.isFloatingPoint())
if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))		if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
return SDValue();		return SDValue();

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
const SDValue Base = N->getOperand(4);		SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);		SDValue Offset = N->getOperand(5);

		// SST1_IMM requires that the offset is an immediate:
		// * multiple of #SizeInBytes
		// * in the range [0, 31 x #SizeInBytes]
		// where #SizeInBytes is the size in bytes of the stored
		// items. For immediates outside that range and non-immediate scalar offsets use
		// SST1 or SST1_UXTW instead.
		if (Opcode == AArch64ISD::SST1_IMM) {
		uint64_t MaxIndex = 31;
		uint64_t SrcElSize = SrcElVT.getStoreSize().getKnownMinSize();
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: SrcEltVT is fixed-sized, so you can ask for `getFixedSize()` here. sdesmalen: nit: SrcEltVT is fixed-sized, so you can ask for `getFixedSize()` here.

		ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
		if (nullptr == OffsetConst \|\|
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: `!OffsetConst` sdesmalen: nit: `!OffsetConst`
		OffsetConst->getZExtValue() > MaxIndex * SrcElSize \|\|
		OffsetConst->getZExtValue() % SrcElSize) {
		if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
		Opcode = AArch64ISD::SST1_UXTW;
		else
		Opcode = AArch64ISD::SST1;
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: perhaps add an assert here that Base.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i64 ? sdesmalen: nit: perhaps add an assert here that Base.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i64…

		std::swap(Base, Offset);
		}
		}

auto &TLI = DAG.getTargetLoweringInfo();		auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))		if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();		return SDValue();

// Some scatter store variants allow unpacked offsets, but only as nxv2i32		// Some scatter store variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to		// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.		// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&		if (!OnlyPackedOffsets &&
Show All 39 Lines	assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");		"Gather loads are only possible for SVE vectors");
SDLoc DL(N);		SDLoc DL(N);

if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)		if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();		return SDValue();

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
const SDValue Base = N->getOperand(3);		SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);		SDValue Offset = N->getOperand(4);

		// GLD1_IMM requires that the offset is an immediate:
		efriedmaUnsubmitted Done Reply Inline Actions "GLD1_IMM requires that the offset is an immediate in the range 0-31" is not correct, in general; the immediate is multiplied by the element size. efriedma: "GLD1_IMM requires that the offset is an immediate in the range 0-31" is not correct, in…
		sdesmalenUnsubmitted Not Done Reply Inline Actions This code should not interpret the offset as a byte offset and then scale the offset limit, but rather scale the offset with the element size before swapping Base and Offset. Perhaps we should rename the intrinsic to `llvm.aarch64.sve.ld1.gather.scalar.index` to clarify the distinction between it being a scaled and unscaled offset (this also gives it the same name as in the ACLE and the architecture spec). sdesmalen: This code should not interpret the offset as a byte offset and then scale the offset limit, but…
		efriedmaUnsubmitted Not Done Reply Inline Actions Does it actually make sense to expose both llvm.aarch64.sve.ld1.gather.scalar.index and llvm.aarch64.sve.ld1.gather.scalar? Yes, the ACLE has both "(vector base, scalar index)" and "(vector base, scalar offset in bytes)" intrinsics, but the "(vector base, scalar index)" intrinsics don't map to any single instruction if the index isn't a small constant. You have to emit a separate shift instruction. efriedma: Does it actually make sense to expose both llvm.aarch64.sve.ld1.gather.scalar.index and llvm.
		sdesmalenUnsubmitted Not Done Reply Inline Actions You're absolutely right; when I checked the ACLE document, I completely overlooked the intrinsics for scalar byte offsets. I agree it makes little sense to support two intrinsics in that case. Sorry for the confusion! sdesmalen: You're absolutely right; when I checked the ACLE document, I completely overlooked the…
		// * multiple of #SizeInBytes
		// * in the range [0, 31 x #SizeInBytes]
		efriedmaUnsubmitted Done Reply Inline Actions Do you need to handle the possibility that Offset is an immediate, but can't be encoded into a GLD1_IMM? efriedma: Do you need to handle the possibility that Offset is an immediate, but can't be encoded into a…
		// where #SizeInBytes is the size in bytes of the loaded items. For immediates
		// outside that range and non-immediate scalar offsets use GLD1 or GLD1_UXTW
		// instead.
		if (Opcode == AArch64ISD::GLD1_IMM) {
		uint64_t MaxIndex = 31;
		uint64_t RetElSize = RetVT.getVectorElementType()
		.getSimpleVT()
		.getStoreSize()
		.getKnownMinSize();

		ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
		if (nullptr == OffsetConst \|\|
		OffsetConst->getZExtValue() > MaxIndex * RetElSize \|\|
		OffsetConst->getZExtValue() % RetElSize) {
		if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
		Opcode = AArch64ISD::GLD1_UXTW;
		else
		Opcode = AArch64ISD::GLD1;

		std::swap(Base, Offset);
		}
		}

auto &TLI = DAG.getTargetLoweringInfo();		auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))		if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();		return SDValue();

// Some gather load variants allow unpacked offsets, but only as nxv2i32		// Some gather load variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to		// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.		// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&		if (!OnlyPackedOffsets &&
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW,		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_imm:		case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
case Intrinsic::aarch64_sve_st1_scatter:		case Intrinsic::aarch64_sve_st1_scatter:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);
case Intrinsic::aarch64_sve_st1_scatter_index:		case Intrinsic::aarch64_sve_st1_scatter_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:		case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:		case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:		case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:		case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_imm:		case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);
default:		default:
break;		break;
}		}
break;		break;
case ISD::GlobalAddress:		case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());		return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}		}
▲ Show 20 Lines • Show All 709 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 634 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE] in {
// Scatters using unpacked, scaled 32-bit offsets, e.g.		// Scatters using unpacked, scaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw #1]		// st1h z0.d, p0, [x0, z0.d, uxtw #1]
defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;		defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;		defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;		defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;

// Scatters using 32/64-bit pointers with offset, e.g.		// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.s, p0, [z0.s, #16]		// st1h z0.s, p0, [z0.s, #16]
defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>;		defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>;
defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>;		defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>;
defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>;		defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>;

// Scatters using 32/64-bit pointers with offset, e.g.		// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.d, p0, [z0.d, #16]		// st1h z0.d, p0, [z0.d, #16]
defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>;		defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>;
defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>;		defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>;		defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>;
defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>;		defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>;

// Scatters using unscaled 64-bit offsets, e.g.		// Scatters using unscaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d]		// st1h z0.d, p0, [x0, z0.d]
defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;		defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;
defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;		defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;		defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;		defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;

▲ Show 20 Lines • Show All 981 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll

	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	; Verify that DAG combine rules for LD1 + sext/zext don't apply when the			; Verify that DAG combine rules for LD1 + sext/zext don't apply when the
	; result of LD1 has multiple uses			; result of LD1 has multiple uses

	define <vscale x 2 x i64> @no_dag_combine_zext_sext(<vscale x 2 x i1> %pg,			define <vscale x 2 x i64> @no_dag_combine_zext_sext(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	<vscale x 2 x i8>* %res_out,			<vscale x 2 x i8>* %res_out,
	<vscale x 2 x i1> %pred) {			<vscale x 2 x i1> %pred) {
	; CHECK-LABEL: no_dag_combine_zext_sext			; CHECK-LABEL: no_dag_combine_zext_sext
	; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]			; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: st1b { z0.d }, p1, [x0]			; CHECK-NEXT: st1b { z0.d }, p1, [x0]
	; CHECK-NEXT: and z0.d, z0.d, #0xff			; CHECK-NEXT: and z0.d, z0.d, #0xff
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,			%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	i64 16)			i64 16)
	%res1 = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res1 = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	%res2 = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res2 = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,			call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,
	<vscale x 2 x i8> *%res_out,			<vscale x 2 x i8> *%res_out,
	i32 8,			i32 8,
	<vscale x 2 x i1> %pred)			<vscale x 2 x i1> %pred)

	ret <vscale x 2 x i64> %res1			ret <vscale x 2 x i64> %res1
	}			}

	define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,			define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	<vscale x 2 x i8>* %res_out,			<vscale x 2 x i8>* %res_out,
	<vscale x 2 x i1> %pred) {			<vscale x 2 x i1> %pred) {
	; CHECK-LABEL: no_dag_combine_sext			; CHECK-LABEL: no_dag_combine_sext
	; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]			; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: sxtb z0.d, p0/m, z1.d			; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
	; CHECK-NEXT: st1b { z1.d }, p1, [x0]			; CHECK-NEXT: st1b { z1.d }, p1, [x0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,			%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	i64 16)			i64 16)
	%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,			call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,
	<vscale x 2 x i8> *%res_out,			<vscale x 2 x i8> *%res_out,
	i32 8,			i32 8,
	<vscale x 2 x i1> %pred)			<vscale x 2 x i1> %pred)

	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	define <vscale x 2 x i64> @no_dag_combine_zext(<vscale x 2 x i1> %pg,			define <vscale x 2 x i64> @no_dag_combine_zext(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	<vscale x 2 x i8>* %res_out,			<vscale x 2 x i8>* %res_out,
	<vscale x 2 x i1> %pred) {			<vscale x 2 x i1> %pred) {
	; CHECK-LABEL: no_dag_combine_zext			; CHECK-LABEL: no_dag_combine_zext
	; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]			; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: st1b { z0.d }, p1, [x0]			; CHECK-NEXT: st1b { z0.d }, p1, [x0]
	; CHECK-NEXT: and z0.d, z0.d, #0xff			; CHECK-NEXT: and z0.d, z0.d, #0xff
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,			%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	i64 16)			i64 16)
	%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,			call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,
	<vscale x 2 x i8> *%res_out,			<vscale x 2 x i8> *%res_out,
	i32 8,			i32 8,
	<vscale x 2 x i1> %pred)			<vscale x 2 x i1> %pred)

	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)			declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
	declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)			declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; LD1B, LD1W, LD1H, LD1D: vector base + immediate offset (index)
				; e.g. ld1h { z0.s }, p0/z, [z0.s, #16]
				;

				; LD1B
				define <vscale x 4 x i32> @gld1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1b_s_imm_offset:
				; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1b_d_imm_offset:
				; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1H
				define <vscale x 4 x i32> @gld1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1h_s_imm_offset:
				; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1h_d_imm_offset:
				; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1W
				define <vscale x 4 x i32> @gld1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset:
				; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 2 x i64> @gld1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1w_d_imm_offset:
				; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x float> @gld1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset_float:
				; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret <vscale x 4 x float> %load
				}

				; LD1D
				define <vscale x 2 x i64> @gld1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset:
				; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset_double:
				; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret <vscale x 2 x double> %load
				}

				;
				; LD1SB, LD1SW, LD1SH: vector base + immediate offset (index)
				; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16]
				;

				; LD1SB
				define <vscale x 4 x i32> @gld1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sb_s_imm_offset:
				; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sb_d_imm_offset:
				; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SH
				define <vscale x 4 x i32> @gld1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sh_s_imm_offset:
				; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sh_d_imm_offset:
				; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SW
				define <vscale x 2 x i64> @gld1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sw_d_imm_offset:
				; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				;
				; LD1B, LD1W, LD1H, LD1D: vector base + out of range immediate offset
				; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
				;

				; LD1B
				define <vscale x 4 x i32> @gld1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1b_s_imm_offset_out_of_range:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, uxtw]
				sdesmalenUnsubmitted Done Reply Inline Actions this needs to be a `mov w8, #128` (scaled by the element size, 4) instead. sdesmalen: this needs to be a `mov w8, #128` (scaled by the element size, 4) instead.
				sdesmalenUnsubmitted Done Reply Inline Actions Sorry, I pasted this with the wrong example. My comment related to `llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32`. sdesmalen: Sorry, I pasted this with the wrong example. My comment related to `llvm.aarch64.sve.ld1.gather.
				andwarAuthorUnsubmitted Done Reply Inline Actions The offset in `llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32` represent bytes, so it doesn't require scaling. andwar: The offset in `llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32` represent bytes, so…
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1b_d_imm_offset_out_of_range:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1H
				define <vscale x 4 x i32> @gld1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1h_s_imm_offset_out_of_range:
				; CHECK: mov w8, #63
				; CHECK-NEXT: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 63)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1h_d_imm_offset_out_of_range:
				; CHECK: mov w8, #63
				; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 63)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1W
				define <vscale x 4 x i32> @gld1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset_out_of_range:
				; CHECK: mov w8, #125
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 125)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 2 x i64> @gld1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1w_d_imm_offset_out_of_range:
				; CHECK: mov w8, #125
				; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 125)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x float> @gld1w_s_imm_offset_out_of_range_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset_out_of_range_float:
				; CHECK: mov w8, #125
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 125)
				ret <vscale x 4 x float> %load
				}

				; LD1D
				define <vscale x 2 x i64> @gld1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset_out_of_range:
				; CHECK: mov w8, #249
				; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 249)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_imm_offset_out_of_range_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset_out_of_range_double:
				; CHECK: mov w8, #249
				; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 249)
				ret <vscale x 2 x double> %load
				}

				;
				; LD1SB, LD1SW, LD1SH: vector base + out of range immediate offset
				; e.g. ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
				;

				; LD1SB
				define <vscale x 4 x i32> @gld1sb_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sb_s_imm_offset_out_of_range:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sb_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sb_d_imm_offset_out_of_range:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SH
				define <vscale x 4 x i32> @gld1sh_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sh_s_imm_offset_out_of_range:
				; CHECK: mov w8, #63
				; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 63)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sh_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sh_d_imm_offset_out_of_range:
				; CHECK: mov w8, #63
				; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 63)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SW
				define <vscale x 2 x i64> @gld1sw_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sw_d_imm_offset_out_of_range:
				; CHECK: mov w8, #125
				; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 125)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1B/LD1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1H/LD1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1W/LD1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; LD1D
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index)
				; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
				;

				; LD1B
				define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1b_s_scalar_offset:
				; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1b_d_scalar_offset:
				; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1H
				define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1h_s_scalar_offset:
				; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1h_d_scalar_offset:
				; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1W
				define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1w_s_scalar_offset:
				; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1w_d_scalar_offset:
				; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1w_s_scalar_offset_float:
				; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x float> %load
				}

				; LD1D
				define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1d_d_scalar_offset:
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1d_d_scalar_offset_double:
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x double> %load
				}

				; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index)
				; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
				;

				; LD1SB
				define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1sb_s_scalar_offset:
				; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1sb_d_scalar_offset:
				; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SH
				define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1sh_s_scalar_offset:
				; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1sh_d_scalar_offset:
				; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SW
				define <vscale x 2 x i64> @gld1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1sw_d_scalar_offset:
				; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1B/LD1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1H/LD1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1W/LD1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; LD1D
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll

This file was deleted.

	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	;
	; LD1B, LD1W, LD1H, LD1D: vector + immediate (index)
	; e.g. ld1h { z0.s }, p0/z, [z0.s, #16]
	;

	; LD1B
	define <vscale x 4 x i32> @gld1b_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1b_s_imm:
	; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1b_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1b_d_imm:
	; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1H
	define <vscale x 4 x i32> @gld1h_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1h_s_imm:
	; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1h_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1h_d_imm:
	; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1W
	define <vscale x 4 x i32> @gld1w_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1w_s_imm:
	; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret <vscale x 4 x i32> %load
	}

	define <vscale x 2 x i64> @gld1w_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1w_d_imm:
	; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	define <vscale x 4 x float> @gld1w_s_imm_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1w_s_imm_float:
	; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret <vscale x 4 x float> %load
	}

	; LD1D
	define <vscale x 2 x i64> @gld1d_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1d_d_imm:
	; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret <vscale x 2 x i64> %load
	}

	define <vscale x 2 x double> @gld1d_d_imm_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1d_d_imm_double:
	; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret <vscale x 2 x double> %load
	}

	; LD1SB, LD1SW, LD1SH: vector + immediate (index)
	; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16]
	;

	; LD1SB
	define <vscale x 4 x i32> @gld1sb_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1sb_s_imm:
	; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1sb_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1sb_d_imm:
	; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1SH
	define <vscale x 4 x i32> @gld1sh_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1sh_s_imm:
	; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1sh_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1sh_d_imm:
	; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1SW
	define <vscale x 2 x i64> @gld1sw_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1sw_d_imm:
	; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1B/LD1SB
	declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; LD1H/LD1SH
	declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; LD1W/LD1SW
	declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

	; LD1D
	declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: vector base + immediate offset
				; e.g. st1h { z0.s }, p0, [z1.s, #16]
				;

				; ST1B
				define void @sst1b_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1b_s_imm_offset:
				; CHECK: st1b { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				sdesmalenUnsubmitted Not Done Reply Inline Actions Maybe worth adding one test-case with a negative value (that should be covered by the use of `getZExtValue()`) ? sdesmalen: Maybe worth adding one test-case with a negative value (that should be covered by the use of…
				}

				define void @sst1b_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1b_d_imm_offset:
				; CHECK: st1b { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1H
				define void @sst1h_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1h_s_imm_offset:
				; CHECK: st1h { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1h_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1h_d_imm_offset:
				; CHECK: st1h { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1W
				define void @sst1w_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset:
				; CHECK: st1w { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1w_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1w_d_imm_offset:
				; CHECK: st1w { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				define void @sst1w_s_imm_offset_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset_float:
				; CHECK: st1w { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				; ST1D
				define void @sst1d_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset:
				; CHECK: st1d { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				define void @sst1d_d_imm_offset_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset_double:
				; CHECK: st1d { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				;
				; ST1B, ST1W, ST1H, ST1D: vector base + out of range immediate offset
				; e.g. st1h { z0.s }, p0, [z1.s, #16]
				;

				; ST1B
				define void @sst1b_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1b_s_imm_offset_out_of_range:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret void
				}

				define void @sst1b_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1b_d_imm_offset_out_of_range:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret void
				}

				; ST1H
				define void @sst1h_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1h_s_imm_offset_out_of_range:
				; CHECK: mov w8, #63
				; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 63)
				ret void
				}

				define void @sst1h_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1h_d_imm_offset_out_of_range:
				; CHECK: mov w8, #63
				; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 63)
				ret void
				}

				; ST1W
				define void @sst1w_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset_out_of_range:
				; CHECK: mov w8, #125
				; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 125)
				ret void
				}

				define void @sst1w_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1w_d_imm_offset_out_of_range:
				; CHECK: mov w8, #125
				; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 125)
				ret void
				}

				define void @sst1w_s_imm_offset_float_out_of_range(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset_float_out_of_range:
				; CHECK: mov w8, #125
				; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 125)
				ret void
				}

				; ST1D
				define void @sst1d_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset_out_of_range:
				; CHECK: mov w8, #249
				; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 249)
				ret void
				}

				define void @sst1d_d_imm_offset_double_out_of_range(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset_double_out_of_range:
				; CHECK: mov w8, #249
				; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 249)
				ret void
				}

				; ST1B
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: vector base + scalar offset
				; e.g. st1h { z0.s }, p0, [x0, z1.d]
				;

				; ST1B
				define void @sst1b_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1b_s_scalar_offset:
				; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @sst1b_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1b_d_scalar_offset:
				; CHECK: st1b { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; ST1H
				define void @sst1h_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1h_s_scalar_offset:
				; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @sst1h_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1h_d_scalar_offset:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; ST1W
				define void @sst1w_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1w_s_scalar_offset:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @sst1w_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1w_d_scalar_offset:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				define void @sst1w_s_scalar_offset_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1w_s_scalar_offset_float:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				; ST1D
				define void @sst1d_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1d_d_scalar_offset:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				define void @sst1d_d_scalar_offset_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1d_d_scalar_offset_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; ST1B
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

This file was deleted.

	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	;
	; ST1B, ST1W, ST1H, ST1D: vector + immediate (index)
	; e.g. st1h { z0.s }, p0, [z1.s, #16]
	;

	; ST1B
	define void @sst1b_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1b_s_imm:
	; CHECK: st1b { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	define void @sst1b_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1b_d_imm:
	; CHECK: st1b { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	; ST1H
	define void @sst1h_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1h_s_imm:
	; CHECK: st1h { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	define void @sst1h_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1h_d_imm:
	; CHECK: st1h { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	; ST1W
	define void @sst1w_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1w_s_imm:
	; CHECK: st1w { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	define void @sst1w_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1w_d_imm:
	; CHECK: st1w { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	define void @sst1w_s_imm_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1w_s_imm_float:
	; CHECK: st1w { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	; ST1D
	define void @sst1d_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1d_d_imm:
	; CHECK: st1d { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	define void @sst1d_d_imm_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1d_d_imm_double:
	; CHECK: st1d { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	; ST1B
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; ST1H
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; ST1W
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

	; ST1D
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Update the definition of AdvSIMD_GatherLoad_VecTorBase_Intrinsic
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 239071

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Update the definition of AdvSIMD_GatherLoad_VecTorBase_IntrinsicClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 239071

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

[AArch64][SVE] Update the definition of AdvSIMD_GatherLoad_VecTorBase_Intrinsic
ClosedPublic