Diff 238322

llvm/include/llvm/IR/IntrinsicsAArch64.td

	Show First 20 Lines • Show All 1,076 Lines • ▼ Show 20 Lines
	class AdvSIMD_GatherLoad_32bitOffset_Intrinsic			class AdvSIMD_GatherLoad_32bitOffset_Intrinsic
	: Intrinsic<[llvm_anyvector_ty],			: Intrinsic<[llvm_anyvector_ty],
	[			[
	LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,			LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
	LLVMPointerToElt<0>, llvm_anyvector_ty			LLVMPointerToElt<0>, llvm_anyvector_ty
	],			],
	[IntrReadMem, IntrArgMemOnly]>;			[IntrReadMem, IntrArgMemOnly]>;

	class AdvSIMD_GatherLoad_VecTorBase_Intrinsic			class AdvSIMD_GatherLoad_VectorBase_Intrinsic
	: Intrinsic<[llvm_anyvector_ty],			: Intrinsic<[llvm_anyvector_ty],
	[			[
	LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,			LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
	llvm_anyvector_ty,			llvm_anyvector_ty,
	llvm_i64_ty			llvm_i64_ty
	],			],
	[IntrReadMem, IntrArgMemOnly]>;			[IntrReadMem, IntrArgMemOnly]>;

	Show All 25 Lines

	class AdvSIMD_ScatterStore_VectorBase_Intrinsic			class AdvSIMD_ScatterStore_VectorBase_Intrinsic
	: Intrinsic<[],			: Intrinsic<[],
	[			[
	llvm_anyvector_ty,			llvm_anyvector_ty,
	LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,			LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
	llvm_anyvector_ty, llvm_i64_ty			llvm_anyvector_ty, llvm_i64_ty
	],			],
	[IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>;			[IntrWriteMem, IntrArgMemOnly]>;

	class AdvSIMD_1VectorArg_Imm64_Intrinsic			class AdvSIMD_1VectorArg_Imm64_Intrinsic
	: Intrinsic<[llvm_anyvector_ty],			: Intrinsic<[llvm_anyvector_ty],
	[LLVMMatchType<0>,			[LLVMMatchType<0>,
	llvm_i64_ty],			llvm_i64_ty],
	[IntrNoMem, ImmArg<1>]>;			[IntrNoMem, ImmArg<1>]>;

	//			//
	▲ Show 20 Lines • Show All 323 Lines • ▼ Show 20 Lines
	//			//

	def int_aarch64_sve_pfirst : AdvSIMD_Pred1VectorArg_Intrinsic;			def int_aarch64_sve_pfirst : AdvSIMD_Pred1VectorArg_Intrinsic;
	def int_aarch64_sve_pnext : AdvSIMD_Pred1VectorArg_Intrinsic;			def int_aarch64_sve_pnext : AdvSIMD_Pred1VectorArg_Intrinsic;
	def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;			def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
	def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;			def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;

	//			//
	// Gather loads:			// Gather loads: scalar base + vector offsets
	//			//

	// scalar + vector, 64 bit unscaled offsets			// 64 bit unscaled offsets
	def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;			def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;

	// scalar + vector, 64 bit scaled offsets			// 64 bit scaled offsets
	def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;			def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;

	// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)			// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
	// extended to 64 bits
	def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;			def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
	def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;			def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;

	// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended			// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
	// to 64 bits
	def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;			def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
	def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;			def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;

	// vector base + immediate index			//
	def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic;			// Gather loads: vector base + scalar offset
				//

				def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VectorBase_Intrinsic;

	//			//
	// Scatter stores:			// Scatter stores: scalar base + vector offsets
	//			//

	// scalar + vector, 64 bit unscaled offsets			// 64 bit unscaled offsets
	def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;			def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;

	// scalar + vector, 64 bit scaled offsets			// 64 bit scaled offsets
	def int_aarch64_sve_st1_scatter_index			def int_aarch64_sve_st1_scatter_index
	: AdvSIMD_ScatterStore_64bitOffset_Intrinsic;			: AdvSIMD_ScatterStore_64bitOffset_Intrinsic;

	// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)			// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
	// extended to 64 bits
	def int_aarch64_sve_st1_scatter_sxtw			def int_aarch64_sve_st1_scatter_sxtw
	: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;			: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

	def int_aarch64_sve_st1_scatter_uxtw			def int_aarch64_sve_st1_scatter_uxtw
	: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;			: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

	// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended			// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
	// to 64 bits
	def int_aarch64_sve_st1_scatter_sxtw_index			def int_aarch64_sve_st1_scatter_sxtw_index
	: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;			: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

	def int_aarch64_sve_st1_scatter_uxtw_index			def int_aarch64_sve_st1_scatter_uxtw_index
	: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;			: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;

	// vector base + immediate index			//
	def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic;			// Scatter stores: vector base + scalar offset
				//

				def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_Intrinsic;

	//			//
	// SVE2 - Non-widening pairwise arithmetic			// SVE2 - Non-widening pairwise arithmetic
	//			//

	def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;			def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
	def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;			def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
	def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;			def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
	▲ Show 20 Lines • Show All 76 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 12,127 Lines • ▼ Show 20 Lines	static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,

// For FPs, ACLE only supports _packed_ single and double precision types.		// For FPs, ACLE only supports _packed_ single and double precision types.
if (SrcElVT.isFloatingPoint())		if (SrcElVT.isFloatingPoint())
if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))		if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
return SDValue();		return SDValue();

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
const SDValue Base = N->getOperand(4);		SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);		SDValue Offset = N->getOperand(5);

		// SST1_IMM requires that the offset is an immediate in the range 0-31. For
		// immediates outside that range and non-immediate scalar offsets use SST1 or
		// SST1_UXTW instead.
		if (Opcode == AArch64ISD::SST1_IMM &&
		(!isa<ConstantSDNode>(Offset.getNode()) \|\|
		cast<ConstantSDNode>(Offset.getNode())->getZExtValue() > 31)) {
		if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
		Opcode = AArch64ISD::SST1_UXTW;
		else
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: SrcEltVT is fixed-sized, so you can ask for `getFixedSize()` here. sdesmalen: nit: SrcEltVT is fixed-sized, so you can ask for `getFixedSize()` here.
		Opcode = AArch64ISD::SST1;

		std::swap(Base, Offset);
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: `!OffsetConst` sdesmalen: nit: `!OffsetConst`
		}

auto &TLI = DAG.getTargetLoweringInfo();		auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))		if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();		return SDValue();

		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: perhaps add an assert here that Base.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i64 ? sdesmalen: nit: perhaps add an assert here that Base.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i64…
// Some scatter store variants allow unpacked offsets, but only as nxv2i32		// Some scatter store variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to		// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.		// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&		if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)		Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);		Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

if (!TLI.isTypeLegal(Offset.getValueType()))		if (!TLI.isTypeLegal(Offset.getValueType()))
Show All 39 Lines	static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,

EVT MaxVT = llvm::MVT::getScalableVectorVT(RetElVT, NumElements);		EVT MaxVT = llvm::MVT::getScalableVectorVT(RetElVT, NumElements);
if (RetVT.getSizeInBits().getKnownMinSize() >		if (RetVT.getSizeInBits().getKnownMinSize() >
MaxVT.getSizeInBits().getKnownMinSize())		MaxVT.getSizeInBits().getKnownMinSize())
return SDValue();		return SDValue();

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
const SDValue Base = N->getOperand(3);		SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
const SDValue Offset = N->getOperand(4);		SDValue Offset = N->getOperand(4);

		// GLD1_IMM requires that the offset is an immediate in the range 0-31. For
		efriedmaUnsubmitted Done Reply Inline Actions "GLD1_IMM requires that the offset is an immediate in the range 0-31" is not correct, in general; the immediate is multiplied by the element size. efriedma: "GLD1_IMM requires that the offset is an immediate in the range 0-31" is not correct, in…
		sdesmalenUnsubmitted Not Done Reply Inline Actions This code should not interpret the offset as a byte offset and then scale the offset limit, but rather scale the offset with the element size before swapping Base and Offset. Perhaps we should rename the intrinsic to `llvm.aarch64.sve.ld1.gather.scalar.index` to clarify the distinction between it being a scaled and unscaled offset (this also gives it the same name as in the ACLE and the architecture spec). sdesmalen: This code should not interpret the offset as a byte offset and then scale the offset limit, but…
		efriedmaUnsubmitted Not Done Reply Inline Actions Does it actually make sense to expose both llvm.aarch64.sve.ld1.gather.scalar.index and llvm.aarch64.sve.ld1.gather.scalar? Yes, the ACLE has both "(vector base, scalar index)" and "(vector base, scalar offset in bytes)" intrinsics, but the "(vector base, scalar index)" intrinsics don't map to any single instruction if the index isn't a small constant. You have to emit a separate shift instruction. efriedma: Does it actually make sense to expose both llvm.aarch64.sve.ld1.gather.scalar.index and llvm.
		sdesmalenUnsubmitted Not Done Reply Inline Actions You're absolutely right; when I checked the ACLE document, I completely overlooked the intrinsics for scalar byte offsets. I agree it makes little sense to support two intrinsics in that case. Sorry for the confusion! sdesmalen: You're absolutely right; when I checked the ACLE document, I completely overlooked the…
		// immediates outside that range and non-immediate scalar offsets use GLD1 or
		// GLD1_UXTW instead.
		efriedmaUnsubmitted Done Reply Inline Actions Do you need to handle the possibility that Offset is an immediate, but can't be encoded into a GLD1_IMM? efriedma: Do you need to handle the possibility that Offset is an immediate, but can't be encoded into a…
		if (Opcode == AArch64ISD::GLD1_IMM &&
		(!isa<ConstantSDNode>(Offset.getNode()) \|\|
		cast<ConstantSDNode>(Offset.getNode())->getZExtValue() > 31)) {
		if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
		Opcode = AArch64ISD::GLD1_UXTW;
		else
		Opcode = AArch64ISD::GLD1;

		std::swap(Base, Offset);
		}

if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) \|\|		if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) \|\|
!DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType()))		!DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType()))
return SDValue();		return SDValue();

// Return value type that is representable in hardware		// Return value type that is representable in hardware
EVT HwRetVt = getSVEContainerType(RetVT);		EVT HwRetVt = getSVEContainerType(RetVT);

▲ Show 20 Lines • Show All 181 Lines • ▼ Show 20 Lines	case ISD::INTRINSIC_W_CHAIN:
case Intrinsic::aarch64_sve_ld1_gather_sxtw:		case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:		case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_imm:		case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
case Intrinsic::aarch64_sve_st1_scatter:		case Intrinsic::aarch64_sve_st1_scatter:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);
case Intrinsic::aarch64_sve_st1_scatter_index:		case Intrinsic::aarch64_sve_st1_scatter_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:		case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:		case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:		case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:		case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_imm:		case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);		return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);
default:		default:
break;		break;
}		}
break;		break;
case ISD::GlobalAddress:		case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());		return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}		}
▲ Show 20 Lines • Show All 667 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 622 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE] in {
// Scatters using unpacked, scaled 32-bit offsets, e.g.		// Scatters using unpacked, scaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw #1]		// st1h z0.d, p0, [x0, z0.d, uxtw #1]
defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;		defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;		defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;		defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;

// Scatters using 32/64-bit pointers with offset, e.g.		// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.s, p0, [z0.s, #16]		// st1h z0.s, p0, [z0.s, #16]
defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>;		defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>;
defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>;		defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>;
defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>;		defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>;

// Scatters using 32/64-bit pointers with offset, e.g.		// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.d, p0, [z0.d, #16]		// st1h z0.d, p0, [z0.d, #16]
defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>;		defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>;
defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>;		defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>;		defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>;
defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>;		defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>;

// Scatters using unscaled 64-bit offsets, e.g.		// Scatters using unscaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d]		// st1h z0.d, p0, [x0, z0.d]
defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;		defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;
defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;		defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;		defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;		defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;

▲ Show 20 Lines • Show All 972 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll

	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	; Verify that DAG combine rules for LD1 + sext/zext don't apply when the			; Verify that DAG combine rules for LD1 + sext/zext don't apply when the
	; result of LD1 has multiple uses			; result of LD1 has multiple uses

	define <vscale x 2 x i64> @no_dag_combine_zext_sext(<vscale x 2 x i1> %pg,			define <vscale x 2 x i64> @no_dag_combine_zext_sext(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	<vscale x 2 x i8>* %res_out,			<vscale x 2 x i8>* %res_out,
	<vscale x 2 x i1> %pred) {			<vscale x 2 x i1> %pred) {
	; CHECK-LABEL: no_dag_combine_zext_sext			; CHECK-LABEL: no_dag_combine_zext_sext
	; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]			; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: mov w8, #255			; CHECK-NEXT: mov w8, #255
	; CHECK-NEXT: mov z0.d, x8			; CHECK-NEXT: mov z0.d, x8
	; CHECK-NEXT: and z0.d, z1.d, z0.d			; CHECK-NEXT: and z0.d, z1.d, z0.d
	; CHECK-NEXT: st1b { z1.d }, p1, [x0]			; CHECK-NEXT: st1b { z1.d }, p1, [x0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,			%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	i64 16)			i64 16)
	%res1 = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res1 = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	%res2 = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res2 = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,			call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,
	<vscale x 2 x i8> *%res_out,			<vscale x 2 x i8> *%res_out,
	i32 8,			i32 8,
	<vscale x 2 x i1> %pred)			<vscale x 2 x i1> %pred)

	ret <vscale x 2 x i64> %res1			ret <vscale x 2 x i64> %res1
	}			}

	define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,			define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	<vscale x 2 x i8>* %res_out,			<vscale x 2 x i8>* %res_out,
	<vscale x 2 x i1> %pred) {			<vscale x 2 x i1> %pred) {
	; CHECK-LABEL: no_dag_combine_sext			; CHECK-LABEL: no_dag_combine_sext
	; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]			; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: sxtb z0.d, p0/m, z1.d			; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
	; CHECK-NEXT: st1b { z1.d }, p1, [x0]			; CHECK-NEXT: st1b { z1.d }, p1, [x0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,			%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	i64 16)			i64 16)
	%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,			call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,
	<vscale x 2 x i8> *%res_out,			<vscale x 2 x i8> *%res_out,
	i32 8,			i32 8,
	<vscale x 2 x i1> %pred)			<vscale x 2 x i1> %pred)

	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	define <vscale x 2 x i64> @no_dag_combine_zext(<vscale x 2 x i1> %pg,			define <vscale x 2 x i64> @no_dag_combine_zext(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	<vscale x 2 x i8>* %res_out,			<vscale x 2 x i8>* %res_out,
	<vscale x 2 x i1> %pred) {			<vscale x 2 x i1> %pred) {
	; CHECK-LABEL: no_dag_combine_zext			; CHECK-LABEL: no_dag_combine_zext
	; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]			; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: mov w8, #255			; CHECK-NEXT: mov w8, #255
	; CHECK-NEXT: mov z0.d, x8			; CHECK-NEXT: mov z0.d, x8
	; CHECK-NEXT: and z0.d, z1.d, z0.d			; CHECK-NEXT: and z0.d, z1.d, z0.d
	; CHECK-NEXT: st1b { z1.d }, p1, [x0]			; CHECK-NEXT: st1b { z1.d }, p1, [x0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,			%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,			<vscale x 2 x i64> %base,
	i64 16)			i64 16)
	%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>			%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,			call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %load,
	<vscale x 2 x i8> *%res_out,			<vscale x 2 x i8> *%res_out,
	i32 8,			i32 8,
	<vscale x 2 x i1> %pred)			<vscale x 2 x i1> %pred)

	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)			declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
	declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)			declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; LD1B, LD1W, LD1H, LD1D: vector base + immediate offset (index)
				; e.g. ld1h { z0.s }, p0/z, [z0.s, #16]
				;

				; LD1B
				define <vscale x 4 x i32> @gld1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1b_s_imm_offset:
				; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1b_d_imm_offset:
				; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1H
				define <vscale x 4 x i32> @gld1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1h_s_imm_offset:
				; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1h_d_imm_offset:
				; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1W
				define <vscale x 4 x i32> @gld1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset:
				; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 2 x i64> @gld1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1w_d_imm_offset:
				; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x float> @gld1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset_float:
				; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret <vscale x 4 x float> %load
				}

				; LD1D
				define <vscale x 2 x i64> @gld1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset:
				; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset_double:
				; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret <vscale x 2 x double> %load
				}

				;
				; LD1SB, LD1SW, LD1SH: vector base + immediate offset (index)
				; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16]
				;

				; LD1SB
				define <vscale x 4 x i32> @gld1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sb_s_imm_offset:
				; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sb_d_imm_offset:
				; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SH
				define <vscale x 4 x i32> @gld1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sh_s_imm_offset:
				; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sh_d_imm_offset:
				; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SW
				define <vscale x 2 x i64> @gld1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sw_d_imm_offset:
				; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				;
				; LD1B, LD1W, LD1H, LD1D: vector base + out of range immediate offset
				; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
				;

				; LD1B
				define <vscale x 4 x i32> @gld1b_s_imm_offset_oor(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1b_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, uxtw]
				sdesmalenUnsubmitted Done Reply Inline Actions this needs to be a `mov w8, #128` (scaled by the element size, 4) instead. sdesmalen: this needs to be a `mov w8, #128` (scaled by the element size, 4) instead.
				sdesmalenUnsubmitted Done Reply Inline Actions Sorry, I pasted this with the wrong example. My comment related to `llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32`. sdesmalen: Sorry, I pasted this with the wrong example. My comment related to `llvm.aarch64.sve.ld1.gather.
				andwarAuthorUnsubmitted Done Reply Inline Actions The offset in `llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32` represent bytes, so it doesn't require scaling. andwar: The offset in `llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32` represent bytes, so…
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1b_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1b_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1H
				define <vscale x 4 x i32> @gld1h_s_imm_offset_oor(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1h_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1h_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1h_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1W
				define <vscale x 4 x i32> @gld1w_s_imm_offset_oor(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 2 x i64> @gld1w_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1w_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x float> @gld1w_s_imm_offset_oor_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1w_s_imm_offset_oor_float:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret <vscale x 4 x float> %load
				}

				; LD1D
				define <vscale x 2 x i64> @gld1d_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_imm_offset_oor_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1d_d_imm_offset_oor_double:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret <vscale x 2 x double> %load
				}

				;
				; LD1SB, LD1SW, LD1SH: vector base + out of range immediate offset
				; e.g. ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
				;

				; LD1SB
				define <vscale x 4 x i32> @gld1sb_s_imm_offset_oor(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sb_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sb_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sb_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SH
				define <vscale x 4 x i32> @gld1sh_s_imm_offset_oor(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: gld1sh_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sh_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sh_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SW
				define <vscale x 2 x i64> @gld1sw_d_imm_offset_oor(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: gld1sw_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x8, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1B/LD1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1H/LD1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1W/LD1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; LD1D
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index)
				; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
				;

				; LD1B
				define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1b_s_scalar_offset:
				; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1b_d_scalar_offset:
				; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1H
				define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1h_s_scalar_offset:
				; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1h_d_scalar_offset:
				; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1W
				define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1w_s_scalar_offset:
				; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1w_d_scalar_offset:
				; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1w_s_scalar_offset_float:
				; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x float> %load
				}

				; LD1D
				define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1d_d_scalar_offset:
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1d_d_scalar_offset_double:
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x double> %load
				}

				; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index)
				; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
				;

				; LD1SB
				define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1sb_s_scalar_offset:
				; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1sb_d_scalar_offset:
				; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SH
				define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gld1sh_s_scalar_offset:
				; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1sh_d_scalar_offset:
				; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1SW
				define <vscale x 2 x i64> @gld1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gld1sw_d_scalar_offset:
				; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LD1B/LD1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1H/LD1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LD1W/LD1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; LD1D
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll

This file was deleted.

	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	;
	; LD1B, LD1W, LD1H, LD1D: vector + immediate (index)
	; e.g. ld1h { z0.s }, p0/z, [z0.s, #16]
	;

	; LD1B
	define <vscale x 4 x i32> @gld1b_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1b_s_imm:
	; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1b_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1b_d_imm:
	; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1H
	define <vscale x 4 x i32> @gld1h_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1h_s_imm:
	; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1h_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1h_d_imm:
	; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1W
	define <vscale x 4 x i32> @gld1w_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1w_s_imm:
	; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret <vscale x 4 x i32> %load
	}

	define <vscale x 2 x i64> @gld1w_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1w_d_imm:
	; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	define <vscale x 4 x float> @gld1w_s_imm_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1w_s_imm_float:
	; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret <vscale x 4 x float> %load
	}

	; LD1D
	define <vscale x 2 x i64> @gld1d_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1d_d_imm:
	; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret <vscale x 2 x i64> %load
	}

	define <vscale x 2 x double> @gld1d_d_imm_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1d_d_imm_double:
	; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret <vscale x 2 x double> %load
	}

	; LD1SB, LD1SW, LD1SH: vector + immediate (index)
	; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16]
	;

	; LD1SB
	define <vscale x 4 x i32> @gld1sb_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1sb_s_imm:
	; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1sb_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1sb_d_imm:
	; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1SH
	define <vscale x 4 x i32> @gld1sh_s_imm(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: gld1sh_s_imm:
	; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
	ret <vscale x 4 x i32> %res
	}

	define <vscale x 2 x i64> @gld1sh_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1sh_d_imm:
	; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1SW
	define <vscale x 2 x i64> @gld1sw_d_imm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: gld1sw_d_imm:
	; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16]
	; CHECK-NEXT: ret
	%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
	ret <vscale x 2 x i64> %res
	}

	; LD1B/LD1SB
	declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; LD1H/LD1SH
	declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; LD1W/LD1SW
	declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

	; LD1D
	declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: vector base + immediate offset
				; e.g. st1h { z0.s }, p0, [z1.s, #16]
				;

				; ST1B
				define void @sst1b_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1b_s_imm_offset:
				; CHECK: st1b { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				sdesmalenUnsubmitted Not Done Reply Inline Actions Maybe worth adding one test-case with a negative value (that should be covered by the use of `getZExtValue()`) ? sdesmalen: Maybe worth adding one test-case with a negative value (that should be covered by the use of…
				}

				define void @sst1b_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1b_d_imm_offset:
				; CHECK: st1b { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1H
				define void @sst1h_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1h_s_imm_offset:
				; CHECK: st1h { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1h_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1h_d_imm_offset:
				; CHECK: st1h { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				; ST1W
				define void @sst1w_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset:
				; CHECK: st1w { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				define void @sst1w_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1w_d_imm_offset:
				; CHECK: st1w { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				define void @sst1w_s_imm_offset_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset_float:
				; CHECK: st1w { z0.s }, p0, [z1.s, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 16)
				ret void
				}

				; ST1D
				define void @sst1d_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset:
				; CHECK: st1d { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				define void @sst1d_d_imm_offset_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset_double:
				; CHECK: st1d { z0.d }, p0, [z1.d, #16]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 16)
				ret void
				}

				;
				; ST1B, ST1W, ST1H, ST1D: vector base + out of range immediate offset
				; e.g. st1h { z0.s }, p0, [z1.s, #16]
				;

				; ST1B
				define void @sst1b_s_imm_offset_oor(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1b_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret void
				}

				define void @sst1b_d_imm_offset_oor(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1b_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret void
				}

				; ST1H
				define void @sst1h_s_imm_offset_oor(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1h_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret void
				}

				define void @sst1h_d_imm_offset_oor(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1h_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret void
				}

				; ST1W
				define void @sst1w_s_imm_offset_oor(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret void
				}

				define void @sst1w_d_imm_offset_oor(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1w_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret void
				}

				define void @sst1w_s_imm_offset_float_oor(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
				; CHECK-LABEL: sst1w_s_imm_offset_float_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 32)
				ret void
				}

				; ST1D
				define void @sst1d_d_imm_offset_oor(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret void
				}

				define void @sst1d_d_imm_offset_double_oor(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
				; CHECK-LABEL: sst1d_d_imm_offset_double_oor:
				; CHECK: mov w8, #32
				; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 32)
				ret void
				}

				; ST1B
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; ST1B, ST1W, ST1H, ST1D: vector base + scalar offset
				; e.g. st1h { z0.s }, p0, [x0, z1.d]
				;

				; ST1B
				define void @sst1b_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1b_s_scalar_offset:
				; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @sst1b_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1b_d_scalar_offset:
				; CHECK: st1b { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; ST1H
				define void @sst1h_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1h_s_scalar_offset:
				; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @sst1h_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1h_d_scalar_offset:
				; CHECK: st1h { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; ST1W
				define void @sst1w_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1w_s_scalar_offset:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @sst1w_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1w_d_scalar_offset:
				; CHECK: st1w { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				define void @sst1w_s_scalar_offset_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: sst1w_s_scalar_offset_float:
				; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				; ST1D
				define void @sst1d_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1d_d_scalar_offset:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				define void @sst1d_d_scalar_offset_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: sst1d_d_scalar_offset_double:
				; CHECK: st1d { z0.d }, p0, [x0, z1.d]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; ST1B
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1H
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; ST1W
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; ST1D
				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

This file was deleted.

	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	;
	; ST1B, ST1W, ST1H, ST1D: vector + immediate (index)
	; e.g. st1h { z0.s }, p0, [z1.s, #16]
	;

	; ST1B
	define void @sst1b_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1b_s_imm:
	; CHECK: st1b { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	define void @sst1b_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1b_d_imm:
	; CHECK: st1b { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	; ST1H
	define void @sst1h_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1h_s_imm:
	; CHECK: st1h { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	define void @sst1h_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1h_d_imm:
	; CHECK: st1h { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	; ST1W
	define void @sst1w_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1w_s_imm:
	; CHECK: st1w { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	define void @sst1w_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1w_d_imm:
	; CHECK: st1w { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	define void @sst1w_s_imm_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
	; CHECK-LABEL: sst1w_s_imm_float:
	; CHECK: st1w { z0.s }, p0, [z1.s, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
	<vscale x 4 x i1> %pg,
	<vscale x 4 x i32> %base,
	i64 16)
	ret void
	}

	; ST1D
	define void @sst1d_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1d_d_imm:
	; CHECK: st1d { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	define void @sst1d_d_imm_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
	; CHECK-LABEL: sst1d_d_imm_double:
	; CHECK: st1d { z0.d }, p0, [z1.d, #16]
	; CHECK-NEXT: ret
	call void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
	<vscale x 2 x i1> %pg,
	<vscale x 2 x i64> %base,
	i64 16)
	ret void
	}

	; ST1B
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; ST1H
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	; ST1W
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

	; ST1D
	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

	declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Update the definition of AdvSIMD_GatherLoad_VecTorBase_Intrinsic
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 238322

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Update the definition of AdvSIMD_GatherLoad_VecTorBase_IntrinsicClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 238322

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll

[AArch64][SVE] Update the definition of AdvSIMD_GatherLoad_VecTorBase_Intrinsic
ClosedPublic