Diff 228018

clang/include/clang/Basic/arm_mve.td

	Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines
	def vcvt#half#q_m_f16: Intrinsic<			def vcvt#half#q_m_f16: Intrinsic<
	VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a, PredOf<f32>:$pred),			VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a, PredOf<f32>:$pred),
	(IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;			(IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;

	} // params = [f32], pnt = PNT_None			} // params = [f32], pnt = PNT_None

	} // loop over half = "b", "t"			} // loop over half = "b", "t"

	let params = T.All32, pnt = PNT_None in			multiclass gather_base<list<Type> types, int size> {
	def vldrwq_gather_base_wb: Intrinsic<			let params = types, pnt = PNT_None in {
	Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<4>:$offset),			def _gather_base: Intrinsic<
	(seq (IRInt<"vldr_gather_base_wb", [Vector, VecOf<Unsigned<Scalar>>]>			Vector, (args UVector:$addr, imm_mem7bit<size>:$offset),
				(IRInt<"vldr_gather_base", [Vector, UVector]> $addr, $offset)>;

				def _gather_base_z: Intrinsic<
				Vector, (args UVector:$addr, imm_mem7bit<size>:$offset, Predicate:$pred),
				(IRInt<"vldr_gather_base_predicated", [Vector, UVector, Predicate]>
				$addr, $offset, $pred)>;

				def _gather_base_wb: Intrinsic<
				Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset),
				(seq (IRInt<"vldr_gather_base_wb", [Vector, UVector]>
	(load $addr), $offset):$pair,			(load $addr), $offset):$pair,
	(store (xval $pair, 1), $addr),			(store (xval $pair, 1), $addr),
	(xval $pair, 0))>;			(xval $pair, 0))>;

	let params = T.All64, pnt = PNT_None in			def _gather_base_wb_z: Intrinsic<
				dmgreenUnsubmitted Not Done Reply Inline Actions Is it worth giving a name to VecOf<Unsigned<Scalar>>? dmgreen: Is it worth giving a name to VecOf<Unsigned<Scalar>>?
	def vldrdq_gather_base_wb_z: Intrinsic<			Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset,
	Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<8>:$offset,
	Predicate:$pred),			Predicate:$pred),
	(seq (IRInt<"vldr_gather_base_wb_predicated", [Vector, VecOf<Unsigned<Scalar>>, Predicate]>			(seq (IRInt<"vldr_gather_base_wb_predicated",
				[Vector, UVector, Predicate]>
	(load $addr), $offset, $pred):$pair,			(load $addr), $offset, $pred):$pair,
	(store (xval $pair, 1), $addr),			(store (xval $pair, 1), $addr),
	(xval $pair, 0))>;			(xval $pair, 0))>;
				}
				}

				defm vldrwq: gather_base<T.All32, 4>;
				defm vldrdq: gather_base<T.All64, 8>;

				multiclass scatter_base<list<Type> types, int size> {
				let params = types in {
				def _scatter_base: Intrinsic<
				Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data),
				(IRInt<"vstr_scatter_base", [UVector, Vector]> $addr, $offset, $data)>;

				def _scatter_base_p: Intrinsic<
				Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data,
				Predicate:$pred),
				(IRInt<"vstr_scatter_base_predicated", [UVector, Vector, Predicate]>
				$addr, $offset, $data, $pred)>;

				def _scatter_base_wb: Intrinsic<
				Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset, Vector:$data),
				(seq (IRInt<"vstr_scatter_base_wb", [UVector, Vector]>
				(load $addr), $offset, $data):$wbaddr,
				(store $wbaddr, $addr))>;

				def _scatter_base_wb_p: Intrinsic<
				Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset,
				Vector:$data, Predicate:$pred),
				(seq (IRInt<"vstr_scatter_base_wb_predicated",
				[UVector, Vector, Predicate]>
				(load $addr), $offset, $data, $pred):$wbaddr,
				(store $wbaddr, $addr))>;
				}
				}

				defm vstrwq: scatter_base<T.All32, 4>;
				defm vstrdq: scatter_base<T.All64, 8>;

				multiclass gather_offset_unshifted<list<Type> types, PrimitiveType memtype> {
				let params = types in {
				def _gather_offset: Intrinsic<
				Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets),
				(IRInt<"vldr_gather_offset",
				[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]>
				$base, $offsets, memtype.size, 0, (unsignedflag Scalar))>;
				def _gather_offset_z: Intrinsic<
				Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
				Predicate:$pred),
				(IRInt<"vldr_gather_offset_predicated",
				[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]>
				$base, $offsets, memtype.size, 0, (unsignedflag Scalar), $pred)>;
				}
				}

				multiclass gather_offset_shifted<list<Type> types, PrimitiveType memtype,
				int shift> {
				let params = types in {
				def _gather_shifted_offset: Intrinsic<
				Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets),
				(IRInt<"vldr_gather_offset",
				[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]>
				$base, $offsets, memtype.size, shift, (unsignedflag Scalar))>;
				def _gather_shifted_offset_z: Intrinsic<
				Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
				Predicate:$pred),
				(IRInt<"vldr_gather_offset_predicated",
				[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]>
				$base, $offsets, memtype.size, shift, (unsignedflag Scalar), $pred)>;
				}
				}

				multiclass gather_offset_both<list<Type> types, PrimitiveType memtype,
				int shift> {
				defm "": gather_offset_unshifted<types, memtype>;
				defm "": gather_offset_shifted<types, memtype, shift>;
				}

				defm vldrbq: gather_offset_unshifted<!listconcat(T.All8, T.Int16, T.Int32), u8>;
				defm vldrhq: gather_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
				defm vldrwq: gather_offset_both<T.All32, u32, 2>;
				defm vldrdq: gather_offset_both<T.Int64, u64, 3>;

				multiclass scatter_offset_unshifted<list<Type> types, PrimitiveType memtype> {
				let params = types in {
				def _scatter_offset: Intrinsic<
				Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
				Vector:$data),
				(IRInt<"vstr_scatter_offset",
				[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]>
				$base, $offsets, $data, memtype.size, 0)>;
				def _scatter_offset_p: Intrinsic<
				Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
				Vector:$data, Predicate:$pred),
				(IRInt<"vstr_scatter_offset_predicated",
				[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]>
				$base, $offsets, $data, memtype.size, 0, $pred)>;
				}
				}

				multiclass scatter_offset_shifted<list<Type> types, PrimitiveType memtype,
				int shift> {
				let params = types in {
				def _scatter_shifted_offset: Intrinsic<
				Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
				Vector:$data),
				(IRInt<"vstr_scatter_offset",
				[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]>
				$base, $offsets, $data, memtype.size, shift)>;
				def _scatter_shifted_offset_p: Intrinsic<
				Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
				Vector:$data, Predicate:$pred),
				(IRInt<"vstr_scatter_offset_predicated",
				[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]>
				$base, $offsets, $data, memtype.size, shift, $pred)>;
				}
				}

				multiclass scatter_offset_both<list<Type> types, PrimitiveType memtype,
				int shift> {
				defm "": scatter_offset_unshifted<types, memtype>;
				defm "": scatter_offset_shifted<types, memtype, shift>;
				}

				defm vstrbq: scatter_offset_unshifted<!listconcat(T.All8,T.Int16,T.Int32), u8>;
				defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
				defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
				defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;

	let params = [Void], pnt = PNT_None in			let params = [Void], pnt = PNT_None in
	def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift),			def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift),
	(seq (u32 (lshr $value, (u64 32))):$hi,			(seq (u32 (lshr $value, (u64 32))):$hi,
	(u32 $value):$lo,			(u32 $value):$lo,
	(IRInt<"urshrl"> $lo, $hi, $shift):$pair,			(IRInt<"urshrl"> $lo, $hi, $shift):$pair,
	(or (shl (u64 (xval $pair, 1)), (u64 32)),			(or (shl (u64 (xval $pair, 1)), (u64 32)),
	(u64 (xval $pair, 0))))>;			(u64 (xval $pair, 0))))>;
	Show All 23 Lines

clang/include/clang/Basic/arm_mve_defs.td

	Show First 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	// (add $a, $b))			// (add $a, $b))
	//			//
	// defines the name $a to refer to the return value of the 'foo' operation;			// defines the name $a to refer to the return value of the 'foo' operation;
	// then the 'bar' operation uses $a as one of its arguments, and the return			// then the 'bar' operation uses $a as one of its arguments, and the return
	// value of that is assigned the name $b; finally, $a and $b are added to give			// value of that is assigned the name $b; finally, $a and $b are added to give
	// the return value of the seq construction as a whole.			// the return value of the seq construction as a whole.
	def seq;			def seq;

				// Another magic operation is 'unsignedflag', which you give a scalar
				// _type_ as an argument, and it expands into 1 for an unsigned type
				// and 0 for a signed (or floating) one.
				def unsignedflag;

	// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it			// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
	// indicates that the IR generation for that intrinsic is done by handwritten			// indicates that the IR generation for that intrinsic is done by handwritten
	// C++ and not autogenerated at all. The effect in the MVE builtin codegen			// C++ and not autogenerated at all. The effect in the MVE builtin codegen
	// function is to break out of the main switch and fall through to the			// function is to break out of the main switch and fall through to the
	// manual-codegen cases below it, having set the CustomCodeGenType enumerated			// manual-codegen cases below it, having set the CustomCodeGenType enumerated
	// variable to the value given by the 'type' string here.			// variable to the value given by the 'type' string here.
	class CustomCodegen<string type_> { string type = type_; }			class CustomCodegen<string type_> { string type = type_; }

	Show All 11 Lines
	// only used in the definitions below. Actual intrinsic definitions in			// only used in the definitions below. Actual intrinsic definitions in
	// arm_mve.td will use the defs defined below here.			// arm_mve.td will use the defs defined below here.
	class ComplexTypeOp;			class ComplexTypeOp;
	def CTO_Parameter: ComplexTypeOp;			def CTO_Parameter: ComplexTypeOp;
	def CTO_Vec: ComplexTypeOp;			def CTO_Vec: ComplexTypeOp;
	def CTO_Pred: ComplexTypeOp;			def CTO_Pred: ComplexTypeOp;
	class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }			class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
	class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }			class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
	class CTO_Sign<bit signed_>: ComplexTypeOp { bit signed = signed_; }			def CTO_CopyKind: ComplexTypeOp;

	// -----------------------------------------------------------------------------			// -----------------------------------------------------------------------------
	// Instances of Type intended to be used directly in the specification of an			// Instances of Type intended to be used directly in the specification of an
	// intrinsic in arm_mve.td.			// intrinsic in arm_mve.td.

	// The type Void can be used for the return type of an intrinsic, and as the			// The type Void can be used for the return type of an intrinsic, and as the
	// parameter type for intrinsics that aren't actually parameterised by any kind			// parameter type for intrinsics that aren't actually parameterised by any kind
	// of _s32 / _f16 / _u8 suffix.			// of _s32 / _f16 / _u8 suffix.
	▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	// used by vld2q and friends, which don't need that generality.)			// used by vld2q and friends, which don't need that generality.)
	class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;			class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;

	// Ptr<t> and CPtr<t> expand to a pointer to t, or a pointer to const t,			// Ptr<t> and CPtr<t> expand to a pointer to t, or a pointer to const t,
	// respectively.			// respectively.
	class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;			class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;
	class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;			class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;

	// Unsigned<t> expects t to be a scalar, and expands to the unsigned integer			// CopyKind<s,k> expects s and k to be scalar types. It returns a scalar type
				dmgreenUnsubmitted Not Done Reply Inline Actions Should the t and u be s and k? dmgreen: Should the t and u be s and k?
				simon_tathamAuthorUnsubmitted Done Reply Inline Actions Note to self: never change your mind about the variable names half way through writing a comment... simon_tatham: Note to self: never change your mind about the variable names half way through writing a…
	// scalar of the same size. So it returns u16 if you give it s16 or f16 (or			// whose kind (signed, unsigned or float) matches that of k, and whose size
	// u16 itself).			// matches that of s.
	class Unsigned<Type t>: ComplexType<(CTO_Sign<0> t)>;			class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>;

				// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
				// integer scalar of the same size. So it returns u16 if you give it s16 or
				// f16 (or u16 itself).
				class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;

				// UScalar and UVector expand to the unsigned-integer versions of
				// Scalar and Vector.
				def UScalar: Unsigned<Scalar>;
				def UVector: VecOf<UScalar>;

	// -----------------------------------------------------------------------------			// -----------------------------------------------------------------------------
	// Internal definitions for specifying immediate arguments for an intrinsic.			// Internal definitions for specifying immediate arguments for an intrinsic.

	class ImmediateBounds;			class ImmediateBounds;
	class Immediate<Type type_, ImmediateBounds bounds_>: Type {			class Immediate<Type type_, ImmediateBounds bounds_>: Type {
	Type type = type_;			Type type = type_;
	ImmediateBounds bounds = bounds_;			ImmediateBounds bounds = bounds_;
	▲ Show 20 Lines • Show All 152 Lines • Show Last 20 Lines

clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c

This file was added.

				// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
				// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s
				// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s
				dmgreenUnsubmitted Not Done Reply Inline Actions -DPOLYMORPHIC line dmgreen: -DPOLYMORPHIC line
				simon_tathamAuthorUnsubmitted Done Reply Inline Actions D'oh, that's what I get for copy-pasting the header from the one test file that doesn't have that extra line. simon_tatham: D'oh, that's what I get for copy-pasting the header from the one test file that //doesn't//…

				#include <arm_mve.h>

				// CHECK-LABEL: @test_vldrbq_gather_offset_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 8, i32 0, i32 0)
				// CHECK-NEXT: ret <8 x i16> [[TMP0]]
				//
				int16x8_t test_vldrbq_gather_offset_s16(const int8_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_s16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 8, i32 0, i32 0)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				int32x4_t test_vldrbq_gather_offset_s32(const int8_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_s32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8 [[BASE:%.]], <16 x i8> [[OFFSET:%.]], i32 8, i32 0, i32 0)
				// CHECK-NEXT: ret <16 x i8> [[TMP0]]
				//
				int8x16_t test_vldrbq_gather_offset_s8(const int8_t *base, uint8x16_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_s8(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 8, i32 0, i32 1)
				// CHECK-NEXT: ret <8 x i16> [[TMP0]]
				//
				uint16x8_t test_vldrbq_gather_offset_u16(const uint8_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_u16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 8, i32 0, i32 1)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				uint32x4_t test_vldrbq_gather_offset_u32(const uint8_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_u32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8 [[BASE:%.]], <16 x i8> [[OFFSET:%.]], i32 8, i32 0, i32 1)
				// CHECK-NEXT: ret <16 x i8> [[TMP0]]
				//
				uint8x16_t test_vldrbq_gather_offset_u8(const uint8_t *base, uint8x16_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_u8(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_z_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 8, i32 0, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x i16> [[TMP2]]
				//
				int16x8_t test_vldrbq_gather_offset_z_s16(const int8_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_z_s16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 8, i32 0, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				int32x4_t test_vldrbq_gather_offset_z_s32(const int8_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_z_s32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_z_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8 [[BASE:%.]], <16 x i8> [[OFFSET:%.]], i32 8, i32 0, i32 0, <16 x i1> [[TMP1]])
				// CHECK-NEXT: ret <16 x i8> [[TMP2]]
				//
				int8x16_t test_vldrbq_gather_offset_z_s8(const int8_t *base, uint8x16_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_z_s8(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_z_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 8, i32 0, i32 1, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x i16> [[TMP2]]
				//
				uint16x8_t test_vldrbq_gather_offset_z_u16(const uint8_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_z_u16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 8, i32 0, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				uint32x4_t test_vldrbq_gather_offset_z_u32(const uint8_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_z_u32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrbq_gather_offset_z_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8 [[BASE:%.]], <16 x i8> [[OFFSET:%.]], i32 8, i32 0, i32 1, <16 x i1> [[TMP1]])
				// CHECK-NEXT: ret <16 x i8> [[TMP2]]
				//
				uint8x16_t test_vldrbq_gather_offset_z_u8(const uint8_t *base, uint8x16_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrbq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrbq_gather_offset_z_u8(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.]], i32 616)
				// CHECK-NEXT: ret <2 x i64> [[TMP0]]
				//
				int64x2_t test_vldrdq_gather_base_s64(uint64x2_t addr)
				{
				return vldrdq_gather_base_s64(addr, 0x268);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.]], i32 336)
				// CHECK-NEXT: ret <2 x i64> [[TMP0]]
				//
				uint64x2_t test_vldrdq_gather_base_u64(uint64x2_t addr)
				{
				return vldrdq_gather_base_u64(addr, 0x150);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_wb_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 576)
				// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1
				// CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0
				// CHECK-NEXT: ret <2 x i64> [[TMP3]]
				//
				int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr)
				{
				return vldrdq_gather_base_wb_s64(addr, 0x240);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_wb_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 328)
				// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1
				// CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0
				// CHECK-NEXT: ret <2 x i64> [[TMP3]]
				//
				uint64x2_t test_vldrdq_gather_base_wb_u64(uint64x2_t *addr)
				{
				return vldrdq_gather_base_wb_u64(addr, 0x148);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 664, <4 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1
				// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0
				// CHECK-NEXT: ret <2 x i64> [[TMP5]]
				//
				int64x2_t test_vldrdq_gather_base_wb_z_s64(uint64x2_t *addr, mve_pred16_t p)
				{
				return vldrdq_gather_base_wb_z_s64(addr, 0x298, p);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 656, <4 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1
				// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0
				// CHECK-NEXT: ret <2 x i64> [[TMP5]]
				//
				uint64x2_t test_vldrdq_gather_base_wb_z_u64(uint64x2_t *addr, mve_pred16_t p)
				{
				return vldrdq_gather_base_wb_z_u64(addr, 0x290, p);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_z_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.]], i32 888, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <2 x i64> [[TMP2]]
				//
				int64x2_t test_vldrdq_gather_base_z_s64(uint64x2_t addr, mve_pred16_t p)
				{
				return vldrdq_gather_base_z_s64(addr, 0x378, p);
				}

				// CHECK-LABEL: @test_vldrdq_gather_base_z_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.]], i32 1000, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <2 x i64> [[TMP2]]
				//
				uint64x2_t test_vldrdq_gather_base_z_u64(uint64x2_t addr, mve_pred16_t p)
				{
				return vldrdq_gather_base_z_u64(addr, 0x3e8, p);
				}

				// CHECK-LABEL: @test_vldrdq_gather_offset_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 0, i32 0)
				// CHECK-NEXT: ret <2 x i64> [[TMP0]]
				//
				int64x2_t test_vldrdq_gather_offset_s64(const int64_t *base, uint64x2_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrdq_gather_offset_s64(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_offset_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 0, i32 1)
				// CHECK-NEXT: ret <2 x i64> [[TMP0]]
				//
				uint64x2_t test_vldrdq_gather_offset_u64(const uint64_t *base, uint64x2_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrdq_gather_offset_u64(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_offset_z_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 0, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <2 x i64> [[TMP2]]
				//
				int64x2_t test_vldrdq_gather_offset_z_s64(const int64_t *base, uint64x2_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrdq_gather_offset_z_s64(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_offset_z_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 0, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <2 x i64> [[TMP2]]
				//
				uint64x2_t test_vldrdq_gather_offset_z_u64(const uint64_t *base, uint64x2_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrdq_gather_offset_z_u64(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 3, i32 0)
				// CHECK-NEXT: ret <2 x i64> [[TMP0]]
				//
				int64x2_t test_vldrdq_gather_shifted_offset_s64(const int64_t *base, uint64x2_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrdq_gather_shifted_offset_s64(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 3, i32 1)
				// CHECK-NEXT: ret <2 x i64> [[TMP0]]
				//
				uint64x2_t test_vldrdq_gather_shifted_offset_u64(const uint64_t *base, uint64x2_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrdq_gather_shifted_offset_u64(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_z_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 3, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <2 x i64> [[TMP2]]
				//
				int64x2_t test_vldrdq_gather_shifted_offset_z_s64(const int64_t *base, uint64x2_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrdq_gather_shifted_offset_z_s64(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrdq_gather_shifted_offset_z_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64 [[BASE:%.]], <2 x i64> [[OFFSET:%.]], i32 64, i32 3, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <2 x i64> [[TMP2]]
				//
				uint64x2_t test_vldrdq_gather_shifted_offset_z_u64(const uint64_t *base, uint64x2_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrdq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrdq_gather_shifted_offset_z_u64(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 0, i32 0)
				// CHECK-NEXT: ret <8 x half> [[TMP0]]
				//
				float16x8_t test_vldrhq_gather_offset_f16(const float16_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_f16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 0, i32 0)
				// CHECK-NEXT: ret <8 x i16> [[TMP0]]
				//
				int16x8_t test_vldrhq_gather_offset_s16(const int16_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_s16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 0, i32 0)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				int32x4_t test_vldrhq_gather_offset_s32(const int16_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_s32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 0, i32 1)
				// CHECK-NEXT: ret <8 x i16> [[TMP0]]
				//
				uint16x8_t test_vldrhq_gather_offset_u16(const uint16_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_u16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 0, i32 1)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				uint32x4_t test_vldrhq_gather_offset_u32(const uint16_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_u32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_z_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 0, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x half> [[TMP2]]
				//
				float16x8_t test_vldrhq_gather_offset_z_f16(const float16_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_z_f16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_z_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 0, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x i16> [[TMP2]]
				//
				int16x8_t test_vldrhq_gather_offset_z_s16(const int16_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_z_s16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 0, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				int32x4_t test_vldrhq_gather_offset_z_s32(const int16_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_z_s32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_z_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 0, i32 1, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x i16> [[TMP2]]
				//
				uint16x8_t test_vldrhq_gather_offset_z_u16(const uint16_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_z_u16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_offset_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 0, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				uint32x4_t test_vldrhq_gather_offset_z_u32(const uint16_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_offset_z_u32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 1, i32 0)
				// CHECK-NEXT: ret <8 x half> [[TMP0]]
				//
				float16x8_t test_vldrhq_gather_shifted_offset_f16(const float16_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_f16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 1, i32 0)
				// CHECK-NEXT: ret <8 x i16> [[TMP0]]
				//
				int16x8_t test_vldrhq_gather_shifted_offset_s16(const int16_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_s16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 1, i32 0)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				int32x4_t test_vldrhq_gather_shifted_offset_s32(const int16_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_s32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 1, i32 1)
				// CHECK-NEXT: ret <8 x i16> [[TMP0]]
				//
				uint16x8_t test_vldrhq_gather_shifted_offset_u16(const uint16_t *base, uint16x8_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_u16(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 1, i32 1)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				uint32x4_t test_vldrhq_gather_shifted_offset_u32(const uint16_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_u32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 1, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x half> [[TMP2]]
				//
				float16x8_t test_vldrhq_gather_shifted_offset_z_f16(const float16_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_z_f16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 1, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x i16> [[TMP2]]
				//
				int16x8_t test_vldrhq_gather_shifted_offset_z_s16(const int16_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_z_s16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 1, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				int32x4_t test_vldrhq_gather_shifted_offset_z_s32(const int16_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_z_s32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16 [[BASE:%.]], <8 x i16> [[OFFSET:%.]], i32 16, i32 1, i32 1, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret <8 x i16> [[TMP2]]
				//
				uint16x8_t test_vldrhq_gather_shifted_offset_z_u16(const uint16_t *base, uint16x8_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_z_u16(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrhq_gather_shifted_offset_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 16, i32 1, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				uint32x4_t test_vldrhq_gather_shifted_offset_z_u32(const uint16_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrhq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrhq_gather_shifted_offset_z_u32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> [[ADDR:%.]], i32 12)
				// CHECK-NEXT: ret <4 x float> [[TMP0]]
				//
				float32x4_t test_vldrwq_gather_base_f32(uint32x4_t addr)
				{
				return vldrwq_gather_base_f32(addr, 0xc);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[ADDR:%.]], i32 400)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				int32x4_t test_vldrwq_gather_base_s32(uint32x4_t addr)
				{
				return vldrwq_gather_base_s32(addr, 0x190);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[ADDR:%.]], i32 284)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				uint32x4_t test_vldrwq_gather_base_u32(uint32x4_t addr)
				{
				return vldrwq_gather_base_u32(addr, 0x11c);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_wb_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> [[TMP0]], i32 64)
				// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 1
				// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 0
				// CHECK-NEXT: ret <4 x float> [[TMP3]]
				//
				float32x4_t test_vldrwq_gather_base_wb_f32(uint32x4_t *addr)
				{
				return vldrwq_gather_base_wb_f32(addr, 0x40);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_wb_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 80)
				// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1
				// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0
				// CHECK-NEXT: ret <4 x i32> [[TMP3]]
				//
				int32x4_t test_vldrwq_gather_base_wb_s32(uint32x4_t *addr)
				{
				return vldrwq_gather_base_wb_s32(addr, 0x50);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_wb_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 480)
				// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1
				// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0
				// CHECK-NEXT: ret <4 x i32> [[TMP3]]
				//
				uint32x4_t test_vldrwq_gather_base_wb_u32(uint32x4_t *addr)
				{
				return vldrwq_gather_base_wb_u32(addr, 0x1e0);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 352, <4 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP3]], 1
				// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP3]], 0
				// CHECK-NEXT: ret <4 x float> [[TMP5]]
				//
				float32x4_t test_vldrwq_gather_base_wb_z_f32(uint32x4_t *addr, mve_pred16_t p)
				{
				return vldrwq_gather_base_wb_z_f32(addr, 0x160, p);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 276, <4 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 1
				// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 0
				// CHECK-NEXT: ret <4 x i32> [[TMP5]]
				//
				int32x4_t test_vldrwq_gather_base_wb_z_s32(uint32x4_t *addr, mve_pred16_t p)
				{
				return vldrwq_gather_base_wb_z_s32(addr, 0x114, p);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_wb_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 88, <4 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 1
				// CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP3]], 0
				// CHECK-NEXT: ret <4 x i32> [[TMP5]]
				//
				uint32x4_t test_vldrwq_gather_base_wb_z_u32(uint32x4_t *addr, mve_pred16_t p)
				{
				return vldrwq_gather_base_wb_z_u32(addr, 0x58, p);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_z_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> [[ADDR:%.]], i32 300, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x float> [[TMP2]]
				//
				float32x4_t test_vldrwq_gather_base_z_f32(uint32x4_t addr, mve_pred16_t p)
				{
				return vldrwq_gather_base_z_f32(addr, 0x12c, p);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.]], i32 440, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				int32x4_t test_vldrwq_gather_base_z_s32(uint32x4_t addr, mve_pred16_t p)
				{
				return vldrwq_gather_base_z_s32(addr, 0x1b8, p);
				}

				// CHECK-LABEL: @test_vldrwq_gather_base_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.]], i32 300, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				uint32x4_t test_vldrwq_gather_base_z_u32(uint32x4_t addr, mve_pred16_t p)
				{
				return vldrwq_gather_base_z_u32(addr, 0x12c, p);
				}

				// CHECK-LABEL: @test_vldrwq_gather_offset_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 0, i32 0)
				// CHECK-NEXT: ret <4 x float> [[TMP0]]
				//
				float32x4_t test_vldrwq_gather_offset_f32(const float32_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrwq_gather_offset_f32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 0, i32 0)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				int32x4_t test_vldrwq_gather_offset_s32(const int32_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrwq_gather_offset_s32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 0, i32 1)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				uint32x4_t test_vldrwq_gather_offset_u32(const uint32_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrwq_gather_offset_u32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_offset_z_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 0, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x float> [[TMP2]]
				//
				float32x4_t test_vldrwq_gather_offset_z_f32(const float32_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrwq_gather_offset_z_f32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_offset_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 0, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				int32x4_t test_vldrwq_gather_offset_z_s32(const int32_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrwq_gather_offset_z_s32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_offset_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 0, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				uint32x4_t test_vldrwq_gather_offset_z_u32(const uint32_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrwq_gather_offset_z_u32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 2, i32 0)
				// CHECK-NEXT: ret <4 x float> [[TMP0]]
				//
				float32x4_t test_vldrwq_gather_shifted_offset_f32(const float32_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrwq_gather_shifted_offset_f32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 2, i32 0)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				int32x4_t test_vldrwq_gather_shifted_offset_s32(const int32_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrwq_gather_shifted_offset_s32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 2, i32 1)
				// CHECK-NEXT: ret <4 x i32> [[TMP0]]
				//
				uint32x4_t test_vldrwq_gather_shifted_offset_u32(const uint32_t *base, uint32x4_t offset)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_shifted_offset(base, offset);
				#else /* POLYMORPHIC */
				return vldrwq_gather_shifted_offset_u32(base, offset);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 2, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x float> [[TMP2]]
				//
				float32x4_t test_vldrwq_gather_shifted_offset_z_f32(const float32_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrwq_gather_shifted_offset_z_f32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 2, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				int32x4_t test_vldrwq_gather_shifted_offset_z_s32(const int32_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrwq_gather_shifted_offset_z_s32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vldrwq_gather_shifted_offset_z_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32 [[BASE:%.]], <4 x i32> [[OFFSET:%.]], i32 32, i32 2, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret <4 x i32> [[TMP2]]
				//
				uint32x4_t test_vldrwq_gather_shifted_offset_z_u32(const uint32_t *base, uint32x4_t offset, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				return vldrwq_gather_shifted_offset_z(base, offset, p);
				#else /* POLYMORPHIC */
				return vldrwq_gather_shifted_offset_z_u32(base, offset, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_p_s16(int8_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_p_s16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_p_s32(int8_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_p_s32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_p_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* [[BASE:%.]], <16 x i8> [[OFFSET:%.]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0, <16 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_p_s8(int8_t *base, uint8x16_t offset, int8x16_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_p_s8(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_p_u16(uint8_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_p_u16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_p_u32(uint8_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_p_u32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_p_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* [[BASE:%.]], <16 x i8> [[OFFSET:%.]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0, <16 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_p_u8(uint8_t *base, uint8x16_t offset, uint8x16_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_p_u8(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_s16(int8_t *base, uint16x8_t offset, int16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_s16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_s32(int8_t *base, uint32x4_t offset, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_s32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* [[BASE:%.]], <16 x i8> [[OFFSET:%.]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_s8(int8_t *base, uint8x16_t offset, int8x16_t value)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_s8(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 8, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_u16(uint8_t *base, uint16x8_t offset, uint16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_u16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 8, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_u32(uint8_t *base, uint32x4_t offset, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_u32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrbq_scatter_offset_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* [[BASE:%.]], <16 x i8> [[OFFSET:%.]], <16 x i8> [[VALUE:%.*]], i32 8, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrbq_scatter_offset_u8(uint8_t *base, uint8x16_t offset, uint8x16_t value)
				{
				#ifdef POLYMORPHIC
				vstrbq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrbq_scatter_offset_u8(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_p_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.]], i32 888, <2 x i64> [[VALUE:%.]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_p_s64(uint64x2_t addr, int64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base_p(addr, 0x378, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_p_s64(addr, 0x378, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_p_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> [[ADDR:%.]], i32 264, <2 x i64> [[VALUE:%.]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_p_u64(uint64x2_t addr, uint64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base_p(addr, 0x108, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_p_u64(addr, 0x108, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> [[ADDR:%.]], i32 408, <2 x i64> [[VALUE:%.]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_s64(uint64x2_t addr, int64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base(addr, 0x198, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_s64(addr, 0x198, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> [[ADDR:%.]], i32 472, <2 x i64> [[VALUE:%.]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_u64(uint64x2_t addr, uint64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base(addr, 0x1d8, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_u64(addr, 0x1d8, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.]], <4 x i1> [[TMP2]])
				// CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_wb_p_s64(uint64x2_t *addr, int64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base_wb_p(addr, 0xf8, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_wb_p_s64(addr, 0xf8, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 136, <2 x i64> [[VALUE:%.]], <4 x i1> [[TMP2]])
				// CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_wb_p_u64(uint64x2_t *addr, uint64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base_wb_p(addr, 0x88, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_wb_p_u64(addr, 0x88, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_wb_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 208, <2 x i64> [[VALUE:%.]])
				// CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_wb_s64(uint64x2_t *addr, int64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base_wb(addr, 0xd0, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_wb_s64(addr, 0xd0, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_base_wb_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <2 x i64>, <2 x i64> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 168, <2 x i64> [[VALUE:%.]])
				// CHECK-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_base_wb_u64(uint64x2_t *addr, uint64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_base_wb(addr, 0xa8, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_base_wb_u64(addr, 0xa8, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_offset_p_s64(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_offset_p_s64(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_offset_p_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_offset_p_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_offset_p_u64(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_offset_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_offset_s64(int64_t *base, uint64x2_t offset, int64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_offset_s64(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_offset_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_offset_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_offset_u64(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_p_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_shifted_offset_p_s64(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_shifted_offset_p_s64(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_p_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_shifted_offset_p_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrdq_scatter_shifted_offset_p_u64(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_s64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3)
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_shifted_offset_s64(int64_t *base, uint64x2_t offset, int64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_shifted_offset_s64(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrdq_scatter_shifted_offset_u64(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* [[BASE:%.]], <2 x i64> [[OFFSET:%.]], <2 x i64> [[VALUE:%.*]], i32 64, i32 3)
				// CHECK-NEXT: ret void
				//
				void test_vstrdq_scatter_shifted_offset_u64(uint64_t *base, uint64x2_t offset, uint64x2_t value)
				{
				#ifdef POLYMORPHIC
				vstrdq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrdq_scatter_shifted_offset_u64(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x half> [[VALUE:%.*]], i32 16, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_f16(float16_t *base, uint16x8_t offset, float16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_f16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_p_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x half> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_p_f16(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_p_f16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_p_s16(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_p_s16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_p_s32(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_p_s32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_p_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_p_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_p_u16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_p_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_p_u32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_s16(int16_t *base, uint16x8_t offset, int16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_s16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_s32(int16_t *base, uint32x4_t offset, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_s32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_u16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_offset_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_offset_u32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x half> [[VALUE:%.*]], i32 16, i32 1)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_f16(float16_t *base, uint16x8_t offset, float16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_f16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x half> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_p_f16(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_p_f16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_p_s16(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_p_s16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_p_s32(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_p_s32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1, <8 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_p_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_p_u16(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_p_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_p_u32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_s16(int16_t *base, uint16x8_t offset, int16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_s16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_s32(int16_t *base, uint32x4_t offset, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_s32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* [[BASE:%.]], <8 x i16> [[OFFSET:%.]], <8 x i16> [[VALUE:%.*]], i32 16, i32 1)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_u16(uint16_t *base, uint16x8_t offset, uint16x8_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_u16(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrhq_scatter_shifted_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 16, i32 1)
				// CHECK-NEXT: ret void
				//
				void test_vstrhq_scatter_shifted_offset_u32(uint16_t *base, uint32x4_t offset, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrhq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrhq_scatter_shifted_offset_u32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> [[ADDR:%.]], i32 380, <4 x float> [[VALUE:%.]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_f32(uint32x4_t addr, float32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base(addr, 0x17c, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_f32(addr, 0x17c, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> [[ADDR:%.]], i32 400, <4 x float> [[VALUE:%.]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_p_f32(uint32x4_t addr, float32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_p(addr, 0x190, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_p_f32(addr, 0x190, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.]], i32 48, <4 x i32> [[VALUE:%.]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_p_s32(uint32x4_t addr, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_p(addr, 0x30, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_p_s32(addr, 0x30, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> [[ADDR:%.]], i32 376, <4 x i32> [[VALUE:%.]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_p_u32(uint32x4_t addr, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_p(addr, 0x178, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_p_u32(addr, 0x178, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> [[ADDR:%.]], i32 156, <4 x i32> [[VALUE:%.]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_s32(uint32x4_t addr, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base(addr, 0x9c, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_s32(addr, 0x9c, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> [[ADDR:%.]], i32 212, <4 x i32> [[VALUE:%.]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_u32(uint32x4_t addr, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base(addr, 0xd4, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_u32(addr, 0xd4, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_wb_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> [[TMP0]], i32 412, <4 x float> [[VALUE:%.]])
				// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_wb_f32(uint32x4_t *addr, float32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_wb(addr, 0x19c, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_wb_f32(addr, 0x19c, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> [[TMP0]], i32 236, <4 x float> [[VALUE:%.]], <4 x i1> [[TMP2]])
				// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_wb_p_f32(uint32x4_t *addr, float32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_wb_p(addr, 0xec, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_wb_p_f32(addr, 0xec, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 328, <4 x i32> [[VALUE:%.]], <4 x i1> [[TMP2]])
				// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_wb_p_s32(uint32x4_t *addr, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_wb_p(addr, 0x148, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_wb_p_s32(addr, 0x148, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_wb_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> [[TMP0]], i32 412, <4 x i32> [[VALUE:%.]], <4 x i1> [[TMP2]])
				// CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_wb_p_u32(uint32x4_t *addr, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_wb_p(addr, 0x19c, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_wb_p_u32(addr, 0x19c, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_wb_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 152, <4 x i32> [[VALUE:%.]])
				// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_wb_s32(uint32x4_t *addr, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_wb(addr, 0x98, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_wb_s32(addr, 0x98, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_base_wb_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = load <4 x i32>, <4 x i32> [[ADDR:%.*]], align 8
				// CHECK-NEXT: [[TMP1:%.]] = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 64, <4 x i32> [[VALUE:%.]])
				// CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[ADDR]], align 8
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_base_wb_u32(uint32x4_t *addr, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_base_wb(addr, 0x40, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_base_wb_u32(addr, 0x40, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_offset_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x float> [[VALUE:%.*]], i32 32, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_offset_f32(float32_t *base, uint32x4_t offset, float32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_offset_f32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_offset_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x float> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_offset_p_f32(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_offset_p_f32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_offset_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_offset_p_s32(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_offset_p_s32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_offset_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_offset_p_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_offset_p_u32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_offset_s32(int32_t *base, uint32x4_t offset, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_offset_s32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 0)
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_offset_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_offset_u32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x float> [[VALUE:%.*]], i32 32, i32 2)
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_shifted_offset_f32(float32_t *base, uint32x4_t offset, float32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_shifted_offset_f32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x float> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_shifted_offset_p_f32(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_shifted_offset_p_f32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_shifted_offset_p_s32(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_shifted_offset_p_s32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_shifted_offset_p_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_shifted_offset_p(base, offset, value, p);
				#else /* POLYMORPHIC */
				vstrwq_scatter_shifted_offset_p_u32(base, offset, value, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2)
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_shifted_offset_s32(int32_t *base, uint32x4_t offset, int32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_shifted_offset_s32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vstrwq_scatter_shifted_offset_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* [[BASE:%.]], <4 x i32> [[OFFSET:%.]], <4 x i32> [[VALUE:%.*]], i32 32, i32 2)
				// CHECK-NEXT: ret void
				//
				void test_vstrwq_scatter_shifted_offset_u32(uint32_t *base, uint32x4_t offset, uint32x4_t value)
				{
				#ifdef POLYMORPHIC
				vstrwq_scatter_shifted_offset(base, offset, value);
				#else /* POLYMORPHIC */
				vstrwq_scatter_shifted_offset_u32(base, offset, value);
				#endif /* POLYMORPHIC */
				}

clang/test/Sema/arm-mve-immediates.c

This file was added.

				// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -verify -fsyntax-only %s

				#include <arm_mve.h>

				void test_load_offsets(uint32x4_t addr32, uint64x2_t addr64)
				{
				// Offsets that should be a multiple of 8 times 0,1,...,127
				vldrdq_gather_base_s64(addr64, 0);
				vldrdq_gather_base_s64(addr64, 8);
				vldrdq_gather_base_s64(addr64, 2*8);
				vldrdq_gather_base_s64(addr64, 125*8);
				vldrdq_gather_base_s64(addr64, 126*8);
				vldrdq_gather_base_s64(addr64, 127*8);
				vldrdq_gather_base_s64(addr64, -8); // expected-error {{argument value -8 is outside the valid range [0, 1016]}}
				vldrdq_gather_base_s64(addr64, 128*8); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}}
				vldrdq_gather_base_s64(addr64, 4); // expected-error {{argument should be a multiple of 8}}
				vldrdq_gather_base_s64(addr64, 1); // expected-error {{argument should be a multiple of 8}}

				// Offsets that should be a multiple of 4 times 0,1,...,127
				vldrwq_gather_base_s32(addr32, 0);
				vldrwq_gather_base_s32(addr32, 4);
				vldrwq_gather_base_s32(addr32, 2*4);
				vldrwq_gather_base_s32(addr32, 125*4);
				vldrwq_gather_base_s32(addr32, 126*4);
				vldrwq_gather_base_s32(addr32, 127*4);
				vldrwq_gather_base_s32(addr32, -4); // expected-error {{argument value -4 is outside the valid range [0, 508]}}
				vldrwq_gather_base_s32(addr32, 128*4); // expected-error {{argument value 512 is outside the valid range [0, 508]}}
				vldrwq_gather_base_s32(addr32, 2); // expected-error {{argument should be a multiple of 4}}
				vldrwq_gather_base_s32(addr32, 1); // expected-error {{argument should be a multiple of 4}}

				// Show that the polymorphic store intrinsics get the right set of
				// error checks after overload resolution. These ones expand to the
				// 8-byte granular versions...
				vstrdq_scatter_base(addr64, 0, addr64);
				vstrdq_scatter_base(addr64, 8, addr64);
				vstrdq_scatter_base(addr64, 2*8, addr64);
				vstrdq_scatter_base(addr64, 125*8, addr64);
				vstrdq_scatter_base(addr64, 126*8, addr64);
				vstrdq_scatter_base(addr64, 127*8, addr64);
				vstrdq_scatter_base(addr64, -8, addr64); // expected-error {{argument value -8 is outside the valid range [0, 1016]}}
				vstrdq_scatter_base(addr64, 128*8, addr64); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}}
				vstrdq_scatter_base(addr64, 4, addr64); // expected-error {{argument should be a multiple of 8}}
				vstrdq_scatter_base(addr64, 1, addr64); // expected-error {{argument should be a multiple of 8}}

				/// ... and these ones to the 4-byte.
				vstrwq_scatter_base(addr32, 0, addr32);
				vstrwq_scatter_base(addr32, 4, addr32);
				vstrwq_scatter_base(addr32, 2*4, addr32);
				vstrwq_scatter_base(addr32, 125*4, addr32);
				vstrwq_scatter_base(addr32, 126*4, addr32);
				vstrwq_scatter_base(addr32, 127*4, addr32);
				vstrwq_scatter_base(addr32, -4, addr32); // expected-error {{argument value -4 is outside the valid range [0, 508]}}
				vstrwq_scatter_base(addr32, 128*4, addr32); // expected-error {{argument value 512 is outside the valid range [0, 508]}}
				vstrwq_scatter_base(addr32, 2, addr32); // expected-error {{argument should be a multiple of 4}}
				vstrwq_scatter_base(addr32, 1, addr32); // expected-error {{argument should be a multiple of 4}}
				}

clang/utils/TableGen/MveEmitter.cpp

Show First 20 Lines • Show All 198 Lines • ▼ Show 20 Lines	std::string cName() const override {
// itself a pointer. The MVE intrinsics don't contain any double		// itself a pointer. The MVE intrinsics don't contain any double
// pointers, so we don't need to worry about that wrinkle.		// pointers, so we don't need to worry about that wrinkle.
assert(!isa<PointerType>(Pointee) && "Pointer to pointer not supported");		assert(!isa<PointerType>(Pointee) && "Pointer to pointer not supported");

if (Const)		if (Const)
Name = "const " + Name;		Name = "const " + Name;
return Name + " *";		return Name + " *";
}		}
		std::string llvmName() const override {
		return "llvm::PointerType::getUnqual(" + Pointee->llvmName() + ")";
		}

static bool classof(const Type *T) {		static bool classof(const Type *T) {
return T->typeKind() == TypeKind::Pointer;		return T->typeKind() == TypeKind::Pointer;
}		}
};		};

// Base class for all the types that have a name of the form		// Base class for all the types that have a name of the form
// [prefix][numbers]_t, like int32_t, uint16x8_t, float32x4x2_t.		// [prefix][numbers]_t, like int32_t, uint16x8_t, float32x4x2_t.
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	public:
// that it can be left out of the final generated code.		// that it can be left out of the final generated code.
std::string varname() {		std::string varname() {
VarNameUsed = true;		VarNameUsed = true;
return VarName;		return VarName;
}		}
void setVarname(const StringRef s) { VarName = s; }		void setVarname(const StringRef s) { VarName = s; }
bool varnameUsed() const { return VarNameUsed; }		bool varnameUsed() const { return VarNameUsed; }

		// Emit code to generate this result as a Value *.
		virtual std::string asValue() {
		return varname();
		}

// Code generation happens in multiple passes. This method tracks whether a		// Code generation happens in multiple passes. This method tracks whether a
// Result has yet been visited in a given pass, without the need for a		// Result has yet been visited in a given pass, without the need for a
// tedious loop in between passes that goes through and resets a 'visited'		// tedious loop in between passes that goes through and resets a 'visited'
// flag back to false: you just set Pass=1 the first time round, and Pass=2		// flag back to false: you just set Pass=1 the first time round, and Pass=2
// the second time.		// the second time.
bool needsVisiting(unsigned Pass) {		bool needsVisiting(unsigned Pass) {
bool ToRet = Visited < Pass;		bool ToRet = Visited < Pass;
Visited = Pass;		Visited = Pass;
Show All 19 Lines	BuiltinArgResult(unsigned ArgNum, bool AddressType)
: ArgNum(ArgNum), AddressType(AddressType) {}		: ArgNum(ArgNum), AddressType(AddressType) {}
void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {		void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {
OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")		OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
<< "(E->getArg(" << ArgNum << "))";		<< "(E->getArg(" << ArgNum << "))";
}		}
std::string typeName() const override {		std::string typeName() const override {
return AddressType ? "Address" : Result::typeName();		return AddressType ? "Address" : Result::typeName();
}		}
		// Emit code to generate this result as a Value *.
		std::string asValue() override {
		if (AddressType)
		return "(" + varname() + ".getPointer())";
		return Result::asValue();
		}
};		};

// Result subclass for an integer literal appearing in Tablegen. This may need		// Result subclass for an integer literal appearing in Tablegen. This may need
// to be turned into an llvm::Result by means of llvm::ConstantInt::get(), or		// to be turned into an llvm::Result by means of llvm::ConstantInt::get(), or
// it may be used directly as an integer, depending on which IRBuilder method		// it may be used directly as an integer, depending on which IRBuilder method
// it's being passed to.		// it's being passed to.
class IntLiteralResult : public Result {		class IntLiteralResult : public Result {
public:		public:
▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines	if (!ParamTypes.empty()) {
OS << Sep << ParamAlloc.allocParam("llvm::Type *", T->llvmName());		OS << Sep << ParamAlloc.allocParam("llvm::Type *", T->llvmName());
Sep = ", ";		Sep = ", ";
}		}
OS << "}";		OS << "}";
}		}
OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {";		OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {";
const char *Sep = "";		const char *Sep = "";
for (auto Arg : Args) {		for (auto Arg : Args) {
OS << Sep << Arg->varname();		OS << Sep << Arg->asValue();
Sep = ", ";		Sep = ", ";
}		}
OS << "})";		OS << "})";
}		}
void morePrerequisites(std::vector<Ptr> &output) const override {		void morePrerequisites(std::vector<Ptr> &output) const override {
output.insert(output.end(), Args.begin(), Args.end());		output.insert(output.end(), Args.begin(), Args.end());
}		}
};		};
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	if (Op->isSubClassOf("CTO_Tuple")) {
return getMultiVectorType(Registers, cast<VectorType>(Element));		return getMultiVectorType(Registers, cast<VectorType>(Element));
}		}

if (Op->isSubClassOf("CTO_Pointer")) {		if (Op->isSubClassOf("CTO_Pointer")) {
const Type *Pointee = getType(D->getArg(0), Param);		const Type *Pointee = getType(D->getArg(0), Param);
return getPointerType(Pointee, Op->getValueAsBit("const"));		return getPointerType(Pointee, Op->getValueAsBit("const"));
}		}

if (Op->isSubClassOf("CTO_Sign")) {		if (Op->getName() == "CTO_CopyKind") {
const ScalarType *ST = cast<ScalarType>(getType(D->getArg(0), Param));		const ScalarType *STSize = cast<ScalarType>(getType(D->getArg(0), Param));
ScalarTypeKind NewKind = Op->getValueAsBit("signed")		const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(1), Param));
? ScalarTypeKind::SignedInt
: ScalarTypeKind::UnsignedInt;
for (const auto &kv : ScalarTypes) {		for (const auto &kv : ScalarTypes) {
const ScalarType *RT = kv.second.get();		const ScalarType *RT = kv.second.get();
if (RT->kind() == NewKind && RT->sizeInBits() == ST->sizeInBits())		if (RT->kind() == STKind->kind() && RT->sizeInBits() == STSize->sizeInBits())
return RT;		return RT;
}		}
PrintFatalError("Cannot change sign of this type");		PrintFatalError("Cannot find a type to satisfy CopyKind");
}		}

PrintFatalError("Bad operator in type dag expression");		PrintFatalError("Bad operator in type dag expression");
}		}

Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,		Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
const Type *Param) {		const Type *Param) {
Record *Op = cast<DefInit>(D->getOperator())->getDef();		Record *Op = cast<DefInit>(D->getOperator())->getDef();
Show All 24 Lines	if (const auto *ST = dyn_cast<ScalarType>(CastType)) {
if (Arg->hasIntegerConstantValue())		if (Arg->hasIntegerConstantValue())
return std::make_shared<IntLiteralResult>(		return std::make_shared<IntLiteralResult>(
ST, Arg->integerConstantValue());		ST, Arg->integerConstantValue());
else		else
return std::make_shared<IntCastResult>(ST, Arg);		return std::make_shared<IntCastResult>(ST, Arg);
}		}
}		}
PrintFatalError("Unsupported type cast");		PrintFatalError("Unsupported type cast");
		} else if (Op->getName() == "unsignedflag") {
		if (D->getNumArgs() != 1)
		PrintFatalError("unsignedflag should have exactly one argument");
		Record *TypeRec = cast<DefInit>(D->getArg(0))->getDef();
		if (!TypeRec->isSubClassOf("Type"))
		PrintFatalError("unsignedflag's argument should be a type");
		if (const auto *ST = dyn_cast<ScalarType>(getType(TypeRec, Param))) {
		return std::make_shared<IntLiteralResult>(
		getScalarType("u32"), ST->kind() == ScalarTypeKind::UnsignedInt);
		} else {
		PrintFatalError("unsignedflag's argument should be a scalar type");
		}
} else {		} else {
std::vector<Result::Ptr> Args;		std::vector<Result::Ptr> Args;
for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)		for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
Args.push_back(getCodeForDagArg(D, i, Scope, Param));		Args.push_back(getCodeForDagArg(D, i, Scope, Param));
if (Op->isSubClassOf("IRBuilder")) {		if (Op->isSubClassOf("IRBuilder")) {
std::set<unsigned> AddressArgs;		std::set<unsigned> AddressArgs;
for (unsigned i : Op->getValueAsListOfInts("address_params"))		for (unsigned i : Op->getValueAsListOfInts("address_params"))
AddressArgs.insert(i);		AddressArgs.insert(i);
▲ Show 20 Lines • Show All 664 Lines • Show Last 20 Lines

llvm/include/llvm/IR/IntrinsicsARM.td

Show First 20 Lines • Show All 806 Lines • ▼ Show 20 Lines	def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],		[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;		[IntrNoMem]>;

defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty],		defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty],
[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;		[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty],		defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty],
[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;		[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;

def int_arm_mve_vcvt_narrow: Intrinsic<[llvm_v8f16_ty],		multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
[llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;		LLVMType pred, list<IntrinsicProperty> props = []> {
def int_arm_mve_vcvt_narrow_predicated: Intrinsic<[llvm_v8f16_ty],		def "": Intrinsic<rets, params, props>;
[llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem]>;		def _predicated: Intrinsic<rets, params # [pred], props>;
		}
		dmgreenUnsubmitted Not Done Reply Inline Actions This looks useful. dmgreen: This looks useful.
		simon_tathamAuthorUnsubmitted Done Reply Inline Actions Eventually we'll probably need another one alongside it, for the many intrinsics that add an `inactive` parameter as well as a predicate mask. But for the moment I'm just adding what I need to use right now. simon_tatham: Eventually we'll probably need another one alongside it, for the many intrinsics that add an…

def int_arm_mve_vldr_gather_base_wb: Intrinsic<		defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
[llvm_anyvector_ty, llvm_anyvector_ty],		[llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty, [IntrNoMem]>;
[LLVMMatchType<1>, llvm_i32_ty], [IntrReadMem]>;
def int_arm_mve_vldr_gather_base_wb_predicated: Intrinsic<		defm int_arm_mve_vldr_gather_base: MVEPredicated<
		[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
		llvm_anyvector_ty, [IntrReadMem]>;
		defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
[llvm_anyvector_ty, llvm_anyvector_ty],		[llvm_anyvector_ty, llvm_anyvector_ty],
[LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty], [IntrReadMem]>;		[LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
		defm int_arm_mve_vstr_scatter_base: MVEPredicated<
		[], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
		llvm_anyvector_ty, [IntrWriteMem]>;
		defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
		[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
		llvm_anyvector_ty, [IntrWriteMem]>;

		// gather_offset takes three i32 parameters. The first is the size of
		// memory element loaded, in bits. The second is a left bit shift to
		// apply to each offset in the vector parameter (must be either 0, or
		// correspond to the element size of the destination vector type). The
		// last is 1 to indicate zero extension (if the load is widening), or
		// 0 for sign extension.
		//
		// scatter_offset has the first two of those parameters, but since it
		// narrows rather than widening, it doesn't have the last one.
		defm int_arm_mve_vldr_gather_offset: MVEPredicated<
		[llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
		llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
		defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
		[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
		llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;

def int_arm_mve_urshrl: Intrinsic<		def int_arm_mve_urshrl: Intrinsic<
[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],		[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;		[IntrNoMem]>;

def int_arm_mve_vadc: Intrinsic<		def int_arm_mve_vadc: Intrinsic<
[llvm_anyvector_ty, llvm_i32_ty],		[llvm_anyvector_ty, llvm_i32_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;		[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
Show All 16 Lines

llvm/lib/Target/ARM/ARMInstrMVE.td

Show First 20 Lines • Show All 272 Lines • ▼ Show 20 Lines	let ParserMatchClass =
!cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");		!cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");
let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";		let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";
let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);		let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
}		}

// A family of classes wrapping up information about the vector types		// A family of classes wrapping up information about the vector types
// used by MVE.		// used by MVE.
class MVEVectorVTInfo<ValueType vec, ValueType pred, bits<2> size,		class MVEVectorVTInfo<ValueType vec, ValueType pred, bits<2> size,
string suffix, bit unsigned> {		string suffixletter, bit unsigned> {
// The LLVM ValueType representing the vector, so we can use it in		// The LLVM ValueType representing the vector, so we can use it in
// ISel patterns.		// ISel patterns.
ValueType Vec = vec;		ValueType Vec = vec;

// An LLVM ValueType representing a corresponding vector of		// An LLVM ValueType representing a corresponding vector of
// predicate bits, for use in ISel patterns that handle an IR		// predicate bits, for use in ISel patterns that handle an IR
// intrinsic describing the predicated form of the instruction.		// intrinsic describing the predicated form of the instruction.
//		//
Show All 9 Lines	class MVEVectorVTInfo<ValueType vec, ValueType pred, bits<2> size,
// instruction encodings: a 2-bit value V representing an (8<<V)-bit		// instruction encodings: a 2-bit value V representing an (8<<V)-bit
// vector element.		// vector element.
bits<2> Size = size;		bits<2> Size = size;

// For vectors explicitly mentioning a signedness of integers: 0 for		// For vectors explicitly mentioning a signedness of integers: 0 for
// signed and 1 for unsigned. For anything else, undefined.		// signed and 1 for unsigned. For anything else, undefined.
bit Unsigned = unsigned;		bit Unsigned = unsigned;

// The suffix used on the instruction in assembly language.		// The number of bits in a vector element, in integer form.
string Suffix = suffix;		int LaneBits = !shl(8, Size);
		dmgreenUnsubmitted Not Done Reply Inline Actions Very nice. Like it. dmgreen: Very nice. Like it.

		// The suffix used in assembly language on an instruction operating
		// on this lane if it only cares about number of bits.
		string BitsSuffix = !cast<string>(LaneBits);

		// The suffix used on an instruction that mentions the whole type.
		string Suffix = suffixletter ## BitsSuffix;
}		}

// Integer vector types that don't treat signed and unsigned differently.		// Integer vector types that don't treat signed and unsigned differently.
def MVE_v16i8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "i8", ?>;		def MVE_v16i8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "i", ?>;
def MVE_v8i16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "i16", ?>;		def MVE_v8i16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "i", ?>;
def MVE_v4i32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "i32", ?>;		def MVE_v4i32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "i", ?>;
def MVE_v2i64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "i64", ?>;		def MVE_v2i64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "i", ?>;

// Explicitly signed and unsigned integer vectors. They map to the		// Explicitly signed and unsigned integer vectors. They map to the
// same set of LLVM ValueTypes as above, but are represented		// same set of LLVM ValueTypes as above, but are represented
// differently in assembly and instruction encodings.		// differently in assembly and instruction encodings.
def MVE_v16s8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "s8", 0b0>;		def MVE_v16s8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "s", 0b0>;
def MVE_v8s16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "s16", 0b0>;		def MVE_v8s16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "s", 0b0>;
def MVE_v4s32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "s32", 0b0>;		def MVE_v4s32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "s", 0b0>;
def MVE_v2s64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "s64", 0b0>;		def MVE_v2s64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "s", 0b0>;
def MVE_v16u8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "u8", 0b1>;		def MVE_v16u8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "u", 0b1>;
def MVE_v8u16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "u16", 0b1>;		def MVE_v8u16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "u", 0b1>;
def MVE_v4u32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "u32", 0b1>;		def MVE_v4u32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "u", 0b1>;
def MVE_v2u64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "u64", 0b1>;		def MVE_v2u64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "u", 0b1>;

// FP vector types.		// FP vector types.
def MVE_v8f16 : MVEVectorVTInfo<v8f16, v8i1, 0b01, "f16", ?>;		def MVE_v8f16 : MVEVectorVTInfo<v8f16, v8i1, 0b01, "f", ?>;
def MVE_v4f32 : MVEVectorVTInfo<v4f32, v4i1, 0b10, "f32", ?>;		def MVE_v4f32 : MVEVectorVTInfo<v4f32, v4i1, 0b10, "f", ?>;
def MVE_v2f64 : MVEVectorVTInfo<v2f64, v4i1, 0b11, "f64", ?>;		def MVE_v2f64 : MVEVectorVTInfo<v2f64, v4i1, 0b11, "f", ?>;

// --------- Start of base classes for the instructions themselves		// --------- Start of base classes for the instructions themselves

class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,		class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
string ops, string cstr, list<dag> pattern>		string ops, string cstr, list<dag> pattern>
: Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,		: Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
pattern>,		pattern>,
Requires<[HasMVEInt]> {		Requires<[HasMVEInt]> {
▲ Show 20 Lines • Show All 4,268 Lines • ▼ Show 20 Lines
// Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,		// Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,
// for use when the memory size is one byte, so there's no 'scaled'		// for use when the memory size is one byte, so there's no 'scaled'
// version of the instruction at all. (This is encoded as if it were		// version of the instruction at all. (This is encoded as if it were
// unscaled, but named in the default way with no _u suffix.)		// unscaled, but named in the default way with no _u suffix.)
class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,		class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
string asm, string suffix, bit U, bits<2> size>		string asm, string suffix, bit U, bits<2> size>
: MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;		: MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;

		// Multiclasses wrapping that to add ISel patterns for intrinsics.
		multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
		defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
		VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
		foreach VTI = VTIs in
		foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding),
		[0,1], [VTI.Unsigned]) in {
		def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)),
		(VTI.Vec (!cast<Instruction>(NAME#"_u") GPR:$base, MQPR:$offsets))>;
		def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
		(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets))>;
		def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
		(VTI.Vec (!cast<Instruction>(NAME#"_u") GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>;
		def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
		(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>;
		}
		}
		multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
		def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb",
		VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
		foreach VTI = VTIs in {
		def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
		(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets))>;
		def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
		(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>;
		}
		}
		multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
		defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
		VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
		foreach VTI = VTIs in {
		def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0),
		dmgreenUnsubmitted Not Done Reply Inline Actions Is UnsignedFlag used here, in the scatters? dmgreen: Is UnsignedFlag used here, in the scatters?
		(!cast<Instruction>(NAME#"_u") MQPR:$data, GPR:$base, MQPR:$offsets)>;
		def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
		(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets)>;
		def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
		(!cast<Instruction>(NAME#"_u") MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>;
		def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
		(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>;
		}
		}
		multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
		def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb",
		VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
		foreach VTI = VTIs in {
		def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
		(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets)>;
		def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
		(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>;
		}
		}

// Actually define all the loads and stores in this family.		// Actually define all the loads and stores in this family.

def MVE_VLDRBU8_rq : MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u8", 1,0b00>;		defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>;
def MVE_VLDRBU16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u16", 1,0b01>;		defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>;
def MVE_VLDRBS16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s16", 0,0b01>;		defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>;
def MVE_VLDRBU32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u32", 1,0b10>;		defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>;
def MVE_VLDRBS32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s32", 0,0b10>;		defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>;

defm MVE_VLDRHU16_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u16", 1,0b01>;		defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>;
defm MVE_VLDRHU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u32", 1,0b10>;		defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>;
defm MVE_VLDRHS32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","s32", 0,0b10>;		defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>;
defm MVE_VLDRWU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memW, "vldrw","u32", 1,0b10>;		defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>;
defm MVE_VLDRDU64_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memD, "vldrd","u64", 1,0b11>;		defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>;

def MVE_VSTRB8_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","8", 0,0b00>;		defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>;
def MVE_VSTRB16_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","16", 0,0b01>;		defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>;
def MVE_VSTRB32_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","32", 0,0b10>;		defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>;

defm MVE_VSTRH16_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","16", 0,0b01>;		defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>;
defm MVE_VSTRH32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","32", 0,0b10>;		defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>;
defm MVE_VSTRW32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memW, "vstrw","32", 0,0b10>;		defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>;
defm MVE_VSTRD64_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memD, "vstrd","64", 0,0b11>;		defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>;

// Gather loads / scatter stores whose address operand is of the form		// Gather loads / scatter stores whose address operand is of the form
// [Qm,#imm], i.e. a vector containing a full base address for each		// [Qm,#imm], i.e. a vector containing a full base address for each
// loaded item, plus an immediate offset applied consistently to all		// loaded item, plus an immediate offset applied consistently to all
// of them. ('Load/store the same field from this vector of pointers		// of them. ('Load/store the same field from this vector of pointers
// to a structure type.')		// to a structure type.')
//		//
// This family requires the vector lane size to be at least 32 bits		// This family requires the vector lane size to be at least 32 bits
Show All 22 Lines	multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
string asm, string suffix> {		string asm, string suffix> {
def "" : MVE_VLDRSTR_qi<dir, memsz, 0, (outs), asm, "", suffix>;		def "" : MVE_VLDRSTR_qi<dir, memsz, 0, (outs), asm, "", suffix>;
def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,		def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,
"$addr.base = $wb"> {		"$addr.base = $wb"> {
let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";		let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";
}		}
}		}

		// Multiclasses wrapping that one, adding selection patterns for the
		// non-writeback loads and all the stores. (The writeback loads must
		// deliver multiple output values, so they have to be selected by C++
		// code.)
		multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
		list<MVEVectorVTInfo> DVTIs> {
		defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
		"u" # memsz.TypeBits>;

		foreach DVTI = DVTIs in {
		def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset))),
		(DVTI.Vec (!cast<Instruction>(NAME)
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
		def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
		(DVTI.Vec (!cast<Instruction>(NAME)
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred))>;
		}
		}
		multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
		list<MVEVectorVTInfo> DVTIs> {
		defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
		!cast<string>(memsz.TypeBits)>;

		foreach DVTI = DVTIs in {
		def : Pat<(int_arm_mve_vstr_scatter_base
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)),
		(!cast<Instruction>(NAME)
		(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset))>;
		def : Pat<(int_arm_mve_vstr_scatter_base_predicated
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
		(!cast<Instruction>(NAME)
		(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred)>;
		def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
		(AVTI.Vec (!cast<Instruction>(NAME # "_pre")
		(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
		def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
		(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
		(AVTI.Vec (!cast<Instruction>(NAME # "_pre")
		(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred))>;
		}
		}

// Actual instruction definitions.		// Actual instruction definitions.
defm MVE_VLDRWU32_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memW, "vldrw", "u32">;		defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
defm MVE_VLDRDU64_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memD, "vldrd", "u64">;		defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
defm MVE_VSTRW32_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memW, "vstrw", "32">;		defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;		defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;

// Define aliases for all the instructions where memory size and		// Define aliases for all the instructions where memory size and
// vector lane size are the same. These are mnemonic aliases, so they		// vector lane size are the same. These are mnemonic aliases, so they
// apply consistently across all of the above families - contiguous		// apply consistently across all of the above families - contiguous
// loads, and both the rq and qi types of gather/scatter.		// loads, and both the rq and qi types of gather/scatter.
//		//
// Rationale: As long as you're loading (for example) 16-bit memory		// Rationale: As long as you're loading (for example) 16-bit memory
// values into 16-bit vector lanes, you can think of them as signed or		// values into 16-bit vector lanes, you can think of them as signed or
▲ Show 20 Lines • Show All 849 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s \| FileCheck %s

				define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrbq_gather_offset_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrb.s16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
				ret <8 x i16> %0
				}

				declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrbq_gather_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrb.s32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0)
				ret <4 x i32> %0
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32)

				define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) {
				; CHECK-LABEL: test_vldrbq_gather_offset_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0)
				ret <16 x i8> %0
				}

				declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32)

				define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrbq_gather_offset_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrb.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1)
				ret <8 x i16> %0
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrbq_gather_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrb.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) {
				; CHECK-LABEL: test_vldrbq_gather_offset_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1)
				ret <16 x i8> %0
				}

				define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrbq_gather_offset_z_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrbt.s16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1)
				ret <8 x i16> %2
				}

				declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)

				declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrbq_gather_offset_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrbt.s32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
				ret <4 x i32> %2
				}

				declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)

				declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrbq_gather_offset_z_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
				%2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1)
				ret <16 x i8> %2
				}

				declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)

				declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>)

				define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrbq_gather_offset_z_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrbt.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1)
				ret <8 x i16> %2
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrbq_gather_offset_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrbt.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrbq_gather_offset_z_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
				%2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1)
				ret <16 x i8> %2
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
				; CHECK-LABEL: test_vldrdq_gather_base_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrd.u64 q1, [q0, #616]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
				ret <2 x i64> %0
				}

				declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) {
				; CHECK-LABEL: test_vldrdq_gather_base_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrd.u64 q1, [q0, #336]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 336)
				ret <2 x i64> %0
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) {
				; CHECK-LABEL: test_vldrdq_gather_base_wb_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vldrd.u64 q1, [q0, #576]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
				%2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
				store <2 x i64> %2, <2 x i64>* %addr, align 8
				%3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
				ret <2 x i64> %3
				}

				declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) {
				; CHECK-LABEL: test_vldrdq_gather_base_wb_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vldrd.u64 q1, [q0, #328]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 328)
				%2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
				store <2 x i64> %2, <2 x i64>* %addr, align 8
				%3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
				ret <2 x i64> %3
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [q0, #664]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
				%4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
				store <2 x i64> %4, <2 x i64>* %addr, align 8
				%5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
				ret <2 x i64> %5
				}

				declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [q0, #656]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2)
				%4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
				store <2 x i64> %4, <2 x i64>* %addr, align 8
				%5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
				ret <2 x i64> %5
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_base_z_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [q0, #888]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1)
				ret <2 x i64> %2
				}

				declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_base_z_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [q0, #1000]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 1000, <4 x i1> %1)
				ret <2 x i64> %2
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) {
				; CHECK-LABEL: test_vldrdq_gather_offset_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0)
				ret <2 x i64> %0
				}

				declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32)

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) {
				; CHECK-LABEL: test_vldrdq_gather_offset_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1)
				ret <2 x i64> %0
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_offset_z_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
				ret <2 x i64> %2
				}

				declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_offset_z_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1)
				ret <2 x i64> %2
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) {
				; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0)
				ret <2 x i64> %0
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) {
				; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1)
				ret <2 x i64> %0
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1)
				ret <2 x i64> %2
				}

				define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1)
				ret <2 x i64> %2
				}

				define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_offset_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
				ret <8 x half> %0
				}

				declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32)

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_offset_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
				ret <8 x i16> %0
				}

				declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.s32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0)
				ret <4 x i32> %0
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32)

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_offset_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1)
				ret <8 x i16> %0
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_offset_z_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
				ret <8 x half> %2
				}

				declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>)

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_offset_z_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
				ret <8 x i16> %2
				}

				declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_offset_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.s32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1)
				ret <4 x i32> %2
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_offset_z_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1)
				ret <8 x i16> %2
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_offset_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
				ret <8 x half> %0
				}

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
				ret <8 x i16> %0
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1)
				ret <8 x i16> %0
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
				ret <8 x half> %2
				}

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
				ret <8 x i16> %2
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				%2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1)
				ret <8 x i16> %2
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) {
				; CHECK-LABEL: test_vldrwq_gather_base_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [q0, #12]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12)
				ret <4 x float> %0
				}

				declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) {
				; CHECK-LABEL: test_vldrwq_gather_base_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [q0, #400]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400)
				ret <4 x i32> %0
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) {
				; CHECK-LABEL: test_vldrwq_gather_base_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [q0, #284]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) {
				; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vldrw.u32 q1, [q0, #64]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 64)
				%2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
				store <4 x i32> %2, <4 x i32>* %addr, align 8
				%3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
				ret <4 x float> %3
				}

				declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) {
				; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vldrw.u32 q1, [q0, #80]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
				%2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
				store <4 x i32> %2, <4 x i32>* %addr, align 8
				%3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
				ret <4 x i32> %3
				}

				declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) {
				; CHECK-LABEL: test_vldrwq_gather_base_wb_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vldrw.u32 q1, [q0, #480]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480)
				%2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
				store <4 x i32> %2, <4 x i32>* %addr, align 8
				%3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
				ret <4 x i32> %3
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [q0, #352]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 352, <4 x i1> %2)
				%4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
				store <4 x i32> %4, <4 x i32>* %addr, align 8
				%5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
				ret <4 x float> %5
				}

				declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [q0, #276]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2)
				%4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
				store <4 x i32> %4, <4 x i32>* %addr, align 8
				%5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
				ret <4 x i32> %5
				}

				declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vldrw.u32 q0, [r0]
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [q0, #88]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2)
				%4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
				store <4 x i32> %4, <4 x i32>* %addr, align 8
				%5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
				ret <4 x i32> %5
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_base_z_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [q0, #300]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
				ret <4 x float> %2
				}

				declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_base_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [q0, #440]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1)
				ret <4 x i32> %2
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_base_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [q0, #300]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrwq_gather_offset_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
				ret <4 x float> %0
				}

				declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrwq_gather_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
				ret <4 x i32> %0
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrwq_gather_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_offset_z_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
				ret <4 x float> %2
				}

				declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_offset_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
				ret <4 x i32> %2
				}

				declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_offset_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
				ret <4 x float> %0
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) {
				; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1)
				ret <4 x i32> %0
				}

				define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
				ret <4 x float> %2
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
				; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: vmov q0, q1
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1)
				ret <4 x i32> %2
				}

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrb.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrb.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrb.8 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrb.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrb.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
				; CHECK-LABEL: test_vstrbq_scatter_offset_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrb.8 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_base_p_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q1, [q0, #888]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_base_p_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q1, [q0, #264]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_base_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrd.64 q1, [q0, #408]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_base_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrd.64 q1, [q0, #472]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 472, <2 x i64> %value)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q0, [q1, #248]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
				store <2 x i64> %3, <2 x i64>* %addr, align 8
				ret void
				}

				declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q0, [q1, #136]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2)
				store <2 x i64> %3, <2 x i64>* %addr, align 8
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vstrd.64 q0, [q1, #208]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
				store <2 x i64> %1, <2 x i64>* %addr, align 8
				ret void
				}

				declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vstrd.64 q0, [q1, #168]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <2 x i64>, <2 x i64>* %addr, align 8
				%1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 168, <2 x i64> %value)
				store <2 x i64> %1, <2 x i64>* %addr, align 8
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_offset_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrd.64 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_offset_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrd.64 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
				; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>)

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.16 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_base_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [q0, #380]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_base_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [q0, #400]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 400, <4 x float> %value, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_base_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [q0, #48]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_base_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [q0, #376]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 376, <4 x i32> %value, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_base_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [q0, #156]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_base_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [q0, #212]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vstrw.32 q0, [q1, #412]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 412, <4 x float> %value)
				store <4 x i32> %1, <4 x i32>* %addr, align 8
				ret void
				}

				declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q0, [q1, #236]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2)
				store <4 x i32> %3, <4 x i32>* %addr, align 8
				ret void
				}

				declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q0, [q1, #328]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2)
				store <4 x i32> %3, <4 x i32>* %addr, align 8
				ret void
				}

				declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q0, [q1, #412]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = zext i16 %p to i32
				%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
				%3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2)
				store <4 x i32> %3, <4 x i32>* %addr, align 8
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vstrw.32 q0, [q1, #152]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 152, <4 x i32> %value)
				store <4 x i32> %1, <4 x i32>* %addr, align 8
				ret void
				}

				declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vldrw.u32 q1, [r0]
				; CHECK-NEXT: vstrw.32 q0, [q1, #64]!
				; CHECK-NEXT: vstrw.32 q1, [r0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = load <4 x i32>, <4 x i32>* %addr, align 8
				%1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value)
				store <4 x i32> %1, <4 x i32>* %addr, align 8
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_offset_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
				ret void
				}

				declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32)

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [r0, q0]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
				; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
				ret void
				}

				define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
				; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
				; CHECK-NEXT: bx lr
				entry:
				call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[ARM,MVE] Add intrinsics for gather/scatter load/stores.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 228018

clang/include/clang/Basic/arm_mve.td

clang/include/clang/Basic/arm_mve_defs.td

clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c

clang/test/Sema/arm-mve-immediates.c

clang/utils/TableGen/MveEmitter.cpp

llvm/include/llvm/IR/IntrinsicsARM.td

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM,MVE] Add intrinsics for gather/scatter load/stores.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 228018

clang/include/clang/Basic/arm_mve.td

clang/include/clang/Basic/arm_mve_defs.td

clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c

clang/test/Sema/arm-mve-immediates.c

clang/utils/TableGen/MveEmitter.cpp

llvm/include/llvm/IR/IntrinsicsARM.td

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll

[ARM,MVE] Add intrinsics for gather/scatter load/stores.
ClosedPublic