This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
clang/
-
include/clang/Basic/
-
clang/
-
Basic/
-
arm_mve.td
-
test/CodeGen/arm-mve-intrinsics/
-
CodeGen/
-
arm-mve-intrinsics/
-
vminvq.c
-
llvm/
-
include/llvm/IR/
-
llvm/
-
IR/
-
IntrinsicsARM.td
-
lib/Target/ARM/
-
Target/
-
ARM/
-
ARMISelLowering.cpp
-
ARMInstrMVE.td
-
test/CodeGen/Thumb2/mve-intrinsics/
-
CodeGen/
-
Thumb2/
-
mve-intrinsics/
-
vminvq.ll

Differential D76490

[ARM,MVE] Add ACLE intrinsics for the vminv/vmaxv family.
ClosedPublic

Authored by simon_tatham on Mar 20 2020, 5:22 AM.

Download Raw Diff

Details

Reviewers

dmgreen
MarkMurrayARM
miyuki
ostannard

Commits

rG45a9945b9ea9: [ARM,MVE] Add ACLE intrinsics for the vminv/vmaxv family.

Summary

I've implemented these as target-specific IR intrinsics, because
they're not quite enough like @llvm.experimental.vector.reduce.min
(which doesn't take the extra scalar parameter). Also this keeps the
predicated and unpredicated versions looking similar, and the
floating-point minnm/maxnm versions fold into the same schema.

We had a couple of min/max reductions already implemented, from the
initial pathfinding exercise in D67158. Those were done by having
separate IR intrinsic names for the signed and unsigned integer
versions; as part of this commit, I've changed them to use a flag
parameter indicating signedness, which is how we ended up deciding
that the rest of the MVE intrinsics family ought to work. So now
hopefully the ewhole lot is consistent.

In the new llc test, the output code from the v8f16 test functions
looks quite unpleasant, but most of it is PCS lowering (you can't pass
a half directly in or out of a function). In other circumstances,
where you do something else with your half in the same function, it
doesn't look nearly as nasty.

Diff Detail

Repository: rG LLVM Github Monorepo

Unit TestsFailed

	Time	Test
	410 ms	Clang.CodeGen/arm-mve-intrinsics::Unknown Unit Message ("")

Event Timeline

simon_tatham created this revision.Mar 20 2020, 5:22 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 20 2020, 5:22 AM

Herald added subscribers: cfe-commits, hiraditya, kristof.beyls. · View Herald Transcript

Harbormaster failed remote builds in B49875: Diff 251616!Mar 20 2020, 5:55 AM

LGTM

This revision is now accepted and ready to land.Mar 20 2020, 6:42 AM

Update tests to fix CI failure (oops). Also added opt -sroa to make the incidental half/float argument marshalling a bit less verbose.

Harbormaster completed remote builds in B49883: Diff 251626.Mar 20 2020, 8:05 AM

Closed by commit rG45a9945b9ea9: [ARM,MVE] Add ACLE intrinsics for the vminv/vmaxv family. (authored by simon_tatham). · Explain WhyMar 20 2020, 9:11 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

clang/

include/

clang/

Basic/

arm_mve.td

39 lines

test/

CodeGen/

arm-mve-intrinsics/

vminvq.c

868 lines

llvm/

include/

llvm/

IR/

IntrinsicsARM.td

26 lines

lib/

Target/

ARM/

ARMISelLowering.cpp

17 lines

ARMInstrMVE.td

99 lines

test/

CodeGen/

Thumb2/

mve-intrinsics/

vminvq.ll

849 lines

Diff 251616

clang/include/clang/Basic/arm_mve.td

Show First 20 Lines • Show All 530 Lines • ▼ Show 20 Lines	let params = T.Float in {
defm vminnmq : VectorVectorArithmetic<"min_predicated", (? (u32 0))>;		defm vminnmq : VectorVectorArithmetic<"min_predicated", (? (u32 0))>;
defm vmaxnmq : VectorVectorArithmetic<"max_predicated", (? (u32 0))>;		defm vmaxnmq : VectorVectorArithmetic<"max_predicated", (? (u32 0))>;
def vminnmaq_m: Intrinsic<Vector, (args Vector:$a, Vector:$b, Predicate:$pred),		def vminnmaq_m: Intrinsic<Vector, (args Vector:$a, Vector:$b, Predicate:$pred),
(IRInt<"vminnma_predicated", [Vector,Predicate]> $a, $b, $pred)>;		(IRInt<"vminnma_predicated", [Vector,Predicate]> $a, $b, $pred)>;
def vmaxnmaq_m: Intrinsic<Vector, (args Vector:$a, Vector:$b, Predicate:$pred),		def vmaxnmaq_m: Intrinsic<Vector, (args Vector:$a, Vector:$b, Predicate:$pred),
(IRInt<"vmaxnma_predicated", [Vector,Predicate]> $a, $b, $pred)>;		(IRInt<"vmaxnma_predicated", [Vector,Predicate]> $a, $b, $pred)>;
}		}

		multiclass Reduction<Type Accumulator, string basename, list<Type> basetypes,
		bit needSign = 0,
		dag postCG = (seq (id $ret)),
		dag accArg = (args Accumulator:$prev),
		dag preCG = (seq)> {
		defvar intArgsBase = (? $prev, $vec);
		defvar intArgsUnpred = !con(intArgsBase,
		!if(needSign, (? (unsignedflag Scalar)), (?)));
		defvar intArgsPred = !con(intArgsUnpred, (? $pred));
		defvar intUnpred = !setop(intArgsUnpred, IRInt<basename, basetypes>);
		defvar intPred = !setop(intArgsPred, IRInt<
		basename#"_predicated", !listconcat(basetypes, [Predicate])>);

		def "": Intrinsic<
		Accumulator, !con(accArg, (args Vector:$vec)),
		!con(preCG, (seq intUnpred:$ret), postCG)>;
		def _p: Intrinsic<
		Accumulator, !con(accArg, (args Vector:$vec, Predicate:$pred)),
		!con(preCG, (seq intPred:$ret), postCG)>;
		}

let params = T.Int in {		let params = T.Int in {
def vminvq: Intrinsic<Scalar, (args Scalar:$prev, Vector:$vec),		defm vminvq: Reduction<Scalar, "minv", [Vector], 1, (seq (Scalar $ret))>;
(Scalar (IRInt<"minv", [Vector], 1> $prev, $vec))>;		defm vmaxvq: Reduction<Scalar, "maxv", [Vector], 1, (seq (Scalar $ret))>;
def vmaxvq: Intrinsic<Scalar, (args Scalar:$prev, Vector:$vec),		}
(Scalar (IRInt<"maxv", [Vector], 1> $prev, $vec))>;
		let params = T.Signed in {
		defm vminavq: Reduction<UScalar, "minav", [Vector], 0, (seq (UScalar $ret))>;
		defm vmaxavq: Reduction<UScalar, "maxav", [Vector], 0, (seq (UScalar $ret))>;
		}

		let params = T.Float in {
		defm vminnmvq: Reduction<Scalar, "minnmv", [Scalar, Vector]>;
		defm vmaxnmvq: Reduction<Scalar, "maxnmv", [Scalar, Vector]>;
		defm vminnmavq: Reduction<Scalar, "minnmav", [Scalar, Vector]>;
		defm vmaxnmavq: Reduction<Scalar, "maxnmav", [Scalar, Vector]>;
}		}

foreach half = [ "b", "t" ] in {		foreach half = [ "b", "t" ] in {
defvar halfconst = !if(!eq(half, "b"), 0, 1);		defvar halfconst = !if(!eq(half, "b"), 0, 1);

let params = [f32], pnt = PNT_None in {		let params = [f32], pnt = PNT_None in {
def vcvt#half#q_f16: Intrinsic<		def vcvt#half#q_f16: Intrinsic<
VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a),		VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a),
▲ Show 20 Lines • Show All 1,000 Lines • Show Last 20 Lines

clang/test/CodeGen/arm-mve-intrinsics/vminvq.c

	// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py			// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
	// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s			// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s
	// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s			// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s

	#include <arm_mve.h>			#include <arm_mve.h>

	// CHECK-LABEL: @test_vminvq_s8(			// CHECK-LABEL: @test_vminvq_s8(
	// CHECK-NEXT: entry:			// CHECK-NEXT: entry:
	// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32			// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
	// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.s.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]])			// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 0)
	// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8			// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
	// CHECK-NEXT: ret i8 [[TMP2]]			// CHECK-NEXT: ret i8 [[TMP2]]
	//			//
	int8_t test_vminvq_s8(int8_t a, int8x16_t b)			int8_t test_vminvq_s8(int8_t a, int8x16_t b) {
	{
	#ifdef POLYMORPHIC			#ifdef POLYMORPHIC
	return vminvq(a, b);			return vminvq(a, b);
	#else /* POLYMORPHIC */			#else /* POLYMORPHIC */
	return vminvq_s8(a, b);			return vminvq_s8(a, b);
	#endif /* POLYMORPHIC */			#endif /* POLYMORPHIC */
	}			}

	// CHECK-LABEL: @test_vminvq_s16(			// CHECK-LABEL: @test_vminvq_s16(
	// CHECK-NEXT: entry:			// CHECK-NEXT: entry:
	// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32			// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
	// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.s.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]])			// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 0)
	// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16			// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
	// CHECK-NEXT: ret i16 [[TMP2]]			// CHECK-NEXT: ret i16 [[TMP2]]
	//			//
	int16_t test_vminvq_s16(int16_t a, int16x8_t b)			int16_t test_vminvq_s16(int16_t a, int16x8_t b) {
	{
	#ifdef POLYMORPHIC			#ifdef POLYMORPHIC
	return vminvq(a, b);			return vminvq(a, b);
	#else /* POLYMORPHIC */			#else /* POLYMORPHIC */
	return vminvq_s16(a, b);			return vminvq_s16(a, b);
	#endif /* POLYMORPHIC */			#endif /* POLYMORPHIC */
	}			}

	// CHECK-LABEL: @test_vminvq_s32(			// CHECK-LABEL: @test_vminvq_s32(
	// CHECK-NEXT: entry:			// CHECK-NEXT: entry:
	// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.minv.s.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]])			// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.minv.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 0)
	// CHECK-NEXT: ret i32 [[TMP0]]			// CHECK-NEXT: ret i32 [[TMP0]]
	//			//
	int32_t test_vminvq_s32(int32_t a, int32x4_t b)			int32_t test_vminvq_s32(int32_t a, int32x4_t b) {
	{
	#ifdef POLYMORPHIC			#ifdef POLYMORPHIC
	return vminvq(a, b);			return vminvq(a, b);
	#else /* POLYMORPHIC */			#else /* POLYMORPHIC */
	return vminvq_s32(a, b);			return vminvq_s32(a, b);
	#endif /* POLYMORPHIC */			#endif /* POLYMORPHIC */
	}			}

	// CHECK-LABEL: @test_vminvq_u8(			// CHECK-LABEL: @test_vminvq_u8(
	// CHECK-NEXT: entry:			// CHECK-NEXT: entry:
	// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32			// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
	// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.u.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]])			// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 1)
	// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8			// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
	// CHECK-NEXT: ret i8 [[TMP2]]			// CHECK-NEXT: ret i8 [[TMP2]]
	//			//
	uint8_t test_vminvq_u8(uint8_t a, uint8x16_t b)			uint8_t test_vminvq_u8(uint8_t a, uint8x16_t b) {
	{
	#ifdef POLYMORPHIC			#ifdef POLYMORPHIC
	return vminvq(a, b);			return vminvq(a, b);
	#else /* POLYMORPHIC */			#else /* POLYMORPHIC */
	return vminvq_u8(a, b);			return vminvq_u8(a, b);
	#endif /* POLYMORPHIC */			#endif /* POLYMORPHIC */
	}			}

	// CHECK-LABEL: @test_vminvq_u16(			// CHECK-LABEL: @test_vminvq_u16(
	// CHECK-NEXT: entry:			// CHECK-NEXT: entry:
	// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32			// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
	// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.u.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]])			// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minv.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 1)
	// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16			// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
	// CHECK-NEXT: ret i16 [[TMP2]]			// CHECK-NEXT: ret i16 [[TMP2]]
	//			//
	uint16_t test_vminvq_u16(uint16_t a, uint16x8_t b)			uint16_t test_vminvq_u16(uint16_t a, uint16x8_t b) {
	{
	#ifdef POLYMORPHIC			#ifdef POLYMORPHIC
	return vminvq(a, b);			return vminvq(a, b);
	#else /* POLYMORPHIC */			#else /* POLYMORPHIC */
	return vminvq_u16(a, b);			return vminvq_u16(a, b);
	#endif /* POLYMORPHIC */			#endif /* POLYMORPHIC */
	}			}

	// CHECK-LABEL: @test_vminvq_u32(			// CHECK-LABEL: @test_vminvq_u32(
	// CHECK-NEXT: entry:			// CHECK-NEXT: entry:
	// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.minv.u.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]])			// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.minv.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 1)
	// CHECK-NEXT: ret i32 [[TMP0]]			// CHECK-NEXT: ret i32 [[TMP0]]
	//			//
	uint32_t test_vminvq_u32(uint32_t a, uint32x4_t b)			uint32_t test_vminvq_u32(uint32_t a, uint32x4_t b) {
	{
	#ifdef POLYMORPHIC			#ifdef POLYMORPHIC
	return vminvq(a, b);			return vminvq(a, b);
	#else /* POLYMORPHIC */			#else /* POLYMORPHIC */
	return vminvq_u32(a, b);			return vminvq_u32(a, b);
	#endif /* POLYMORPHIC */			#endif /* POLYMORPHIC */
	}			}

				// CHECK-LABEL: @test_vmaxvq_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.maxv.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 0)
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
				// CHECK-NEXT: ret i8 [[TMP2]]
				//
				int8_t test_vmaxvq_s8(int8_t a, int8x16_t b) {
				#ifdef POLYMORPHIC
				return vmaxvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxvq_s8(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.maxv.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 0)
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
				// CHECK-NEXT: ret i16 [[TMP2]]
				//
				int16_t test_vmaxvq_s16(int16_t a, int16x8_t b) {
				#ifdef POLYMORPHIC
				return vmaxvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxvq_s16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.maxv.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 0)
				// CHECK-NEXT: ret i32 [[TMP0]]
				//
				int32_t test_vmaxvq_s32(int32_t a, int32x4_t b) {
				#ifdef POLYMORPHIC
				return vmaxvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxvq_s32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.maxv.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 1)
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
				// CHECK-NEXT: ret i8 [[TMP2]]
				//
				uint8_t test_vmaxvq_u8(uint8_t a, uint8x16_t b) {
				#ifdef POLYMORPHIC
				return vmaxvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxvq_u8(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.maxv.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 1)
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
				// CHECK-NEXT: ret i16 [[TMP2]]
				//
				uint16_t test_vmaxvq_u16(uint16_t a, uint16x8_t b) {
				#ifdef POLYMORPHIC
				return vmaxvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxvq_u16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.maxv.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 1)
				// CHECK-NEXT: ret i32 [[TMP0]]
				//
				uint32_t test_vmaxvq_u32(uint32_t a, uint32x4_t b) {
				#ifdef POLYMORPHIC
				return vmaxvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxvq_u32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminavq_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minav.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]])
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
				// CHECK-NEXT: ret i8 [[TMP2]]
				//
				uint8_t test_vminavq_s8(uint8_t a, int8x16_t b) {
				#ifdef POLYMORPHIC
				return vminavq(a, b);
				#else /* POLYMORPHIC */
				return vminavq_s8(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminavq_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.minav.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]])
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
				// CHECK-NEXT: ret i16 [[TMP2]]
				//
				uint16_t test_vminavq_s16(uint16_t a, int16x8_t b) {
				#ifdef POLYMORPHIC
				return vminavq(a, b);
				#else /* POLYMORPHIC */
				return vminavq_s16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminavq_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.minav.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]])
				// CHECK-NEXT: ret i32 [[TMP0]]
				//
				uint32_t test_vminavq_s32(uint32_t a, int32x4_t b) {
				#ifdef POLYMORPHIC
				return vminavq(a, b);
				#else /* POLYMORPHIC */
				return vminavq_s32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxavq_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.maxav.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.]])
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
				// CHECK-NEXT: ret i8 [[TMP2]]
				//
				uint8_t test_vmaxavq_s8(uint8_t a, int8x16_t b) {
				#ifdef POLYMORPHIC
				return vmaxavq(a, b);
				#else /* POLYMORPHIC */
				return vmaxavq_s8(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxavq_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.arm.mve.maxav.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.]])
				// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
				// CHECK-NEXT: ret i16 [[TMP2]]
				//
				uint16_t test_vmaxavq_s16(uint16_t a, int16x8_t b) {
				#ifdef POLYMORPHIC
				return vmaxavq(a, b);
				#else /* POLYMORPHIC */
				return vmaxavq_s16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxavq_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call i32 @llvm.arm.mve.maxav.v4i32(i32 [[A:%.]], <4 x i32> [[B:%.*]])
				// CHECK-NEXT: ret i32 [[TMP0]]
				//
				uint32_t test_vmaxavq_s32(uint32_t a, int32x4_t b) {
				#ifdef POLYMORPHIC
				return vmaxavq(a, b);
				#else /* POLYMORPHIC */
				return vmaxavq_s32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmvq_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = call half @llvm.arm.mve.minnmv.f16.v8f16(half [[A1]], <8 x half> [[B:%.]])
				// CHECK-NEXT: store half [[TMP2]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP3:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP4:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 2 [[TMP4]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP5:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP5]]
				//
				float16_t test_vminnmvq_f16(float16_t a, float16x8_t b) {
				#ifdef POLYMORPHIC
				return vminnmvq(a, b);
				#else /* POLYMORPHIC */
				return vminnmvq_f16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmvq_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call float @llvm.arm.mve.minnmv.f32.v4f32(float [[A:%.]], <4 x float> [[B:%.*]])
				// CHECK-NEXT: ret float [[TMP0]]
				//
				float32_t test_vminnmvq_f32(float32_t a, float32x4_t b) {
				#ifdef POLYMORPHIC
				return vminnmvq(a, b);
				#else /* POLYMORPHIC */
				return vminnmvq_f32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmavq_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = call half @llvm.arm.mve.minnmav.f16.v8f16(half [[A1]], <8 x half> [[B:%.]])
				// CHECK-NEXT: store half [[TMP2]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP3:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP4:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 2 [[TMP4]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP5:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP5]]
				//
				float16_t test_vminnmavq_f16(float16_t a, float16x8_t b) {
				#ifdef POLYMORPHIC
				return vminnmavq(a, b);
				#else /* POLYMORPHIC */
				return vminnmavq_f16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmavq_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call float @llvm.arm.mve.minnmav.f32.v4f32(float [[A:%.]], <4 x float> [[B:%.*]])
				// CHECK-NEXT: ret float [[TMP0]]
				//
				float32_t test_vminnmavq_f32(float32_t a, float32x4_t b) {
				#ifdef POLYMORPHIC
				return vminnmavq(a, b);
				#else /* POLYMORPHIC */
				return vminnmavq_f32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmvq_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = call half @llvm.arm.mve.maxnmv.f16.v8f16(half [[A1]], <8 x half> [[B:%.]])
				// CHECK-NEXT: store half [[TMP2]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP3:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP4:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 2 [[TMP4]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP5:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP5]]
				//
				float16_t test_vmaxnmvq_f16(float16_t a, float16x8_t b) {
				#ifdef POLYMORPHIC
				return vmaxnmvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxnmvq_f16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmvq_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call float @llvm.arm.mve.maxnmv.f32.v4f32(float [[A:%.]], <4 x float> [[B:%.*]])
				// CHECK-NEXT: ret float [[TMP0]]
				//
				float32_t test_vmaxnmvq_f32(float32_t a, float32x4_t b) {
				#ifdef POLYMORPHIC
				return vmaxnmvq(a, b);
				#else /* POLYMORPHIC */
				return vmaxnmvq_f32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmavq_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = call half @llvm.arm.mve.maxnmav.f16.v8f16(half [[A1]], <8 x half> [[B:%.]])
				// CHECK-NEXT: store half [[TMP2]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP3:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP4:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 2 [[TMP4]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP5:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP5]]
				//
				float16_t test_vmaxnmavq_f16(float16_t a, float16x8_t b) {
				#ifdef POLYMORPHIC
				return vmaxnmavq(a, b);
				#else /* POLYMORPHIC */
				return vmaxnmavq_f16(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmavq_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = call float @llvm.arm.mve.maxnmav.f32.v4f32(float [[A:%.]], <4 x float> [[B:%.*]])
				// CHECK-NEXT: ret float [[TMP0]]
				//
				float32_t test_vmaxnmavq_f32(float32_t a, float32x4_t b) {
				#ifdef POLYMORPHIC
				return vmaxnmavq(a, b);
				#else /* POLYMORPHIC */
				return vmaxnmavq_f32(a, b);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminvq_p_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 0, <16 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
				// CHECK-NEXT: ret i8 [[TMP4]]
				//
				int8_t test_vminvq_p_s8(int8_t a, int8x16_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminvq_p_s8(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminvq_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 0, <8 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
				// CHECK-NEXT: ret i16 [[TMP4]]
				//
				int16_t test_vminvq_p_s16(int16_t a, int16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminvq_p_s16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminvq_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret i32 [[TMP2]]
				//
				int32_t test_vminvq_p_s32(int32_t a, int32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminvq_p_s32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminvq_p_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 1, <16 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
				// CHECK-NEXT: ret i8 [[TMP4]]
				//
				uint8_t test_vminvq_p_u8(uint8_t a, uint8x16_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminvq_p_u8(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminvq_p_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 1, <8 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
				// CHECK-NEXT: ret i16 [[TMP4]]
				//
				uint16_t test_vminvq_p_u16(uint16_t a, uint16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminvq_p_u16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminvq_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret i32 [[TMP2]]
				//
				uint32_t test_vminvq_p_u32(uint32_t a, uint32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminvq_p_u32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_p_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 0, <16 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
				// CHECK-NEXT: ret i8 [[TMP4]]
				//
				int8_t test_vmaxvq_p_s8(int8_t a, int8x16_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxvq_p_s8(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 0, <8 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
				// CHECK-NEXT: ret i16 [[TMP4]]
				//
				int16_t test_vmaxvq_p_s16(int16_t a, int16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxvq_p_s16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret i32 [[TMP2]]
				//
				int32_t test_vmaxvq_p_s32(int32_t a, int32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxvq_p_s32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_p_u8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 [[TMP0]], <16 x i8> [[B:%.]], i32 1, <16 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
				// CHECK-NEXT: ret i8 [[TMP4]]
				//
				uint8_t test_vmaxvq_p_u8(uint8_t a, uint8x16_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxvq_p_u8(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_p_u16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 [[TMP0]], <8 x i16> [[B:%.]], i32 1, <8 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
				// CHECK-NEXT: ret i16 [[TMP4]]
				//
				uint16_t test_vmaxvq_p_u16(uint16_t a, uint16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxvq_p_u16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxvq_p_u32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 [[A:%.]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret i32 [[TMP2]]
				//
				uint32_t test_vmaxvq_p_u32(uint32_t a, uint32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxvq_p_u32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminavq_p_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32 [[TMP0]], <16 x i8> [[B:%.]], <16 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
				// CHECK-NEXT: ret i8 [[TMP4]]
				//
				uint8_t test_vminavq_p_s8(uint8_t a, int8x16_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminavq_p_s8(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminavq_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32 [[TMP0]], <8 x i16> [[B:%.]], <8 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
				// CHECK-NEXT: ret i16 [[TMP4]]
				//
				uint16_t test_vminavq_p_s16(uint16_t a, int16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminavq_p_s16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminavq_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32 [[A:%.]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret i32 [[TMP2]]
				//
				uint32_t test_vminavq_p_s32(uint32_t a, int32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminavq_p_s32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxavq_p_s8(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i8 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32 [[TMP0]], <16 x i8> [[B:%.]], <16 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
				// CHECK-NEXT: ret i8 [[TMP4]]
				//
				uint8_t test_vmaxavq_p_s8(uint8_t a, int8x16_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxavq_p_s8(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxavq_p_s16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[A:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
				// CHECK-NEXT: [[TMP3:%.]] = call i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32 [[TMP0]], <8 x i16> [[B:%.]], <8 x i1> [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
				// CHECK-NEXT: ret i16 [[TMP4]]
				//
				uint16_t test_vmaxavq_p_s16(uint16_t a, int16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxavq_p_s16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxavq_p_s32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32 [[A:%.]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret i32 [[TMP2]]
				//
				uint32_t test_vmaxavq_p_s32(uint32_t a, int32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxavq_p_s32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmvq_p_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.]] = call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half [[A1]], <8 x half> [[B:%.]], <8 x i1> [[TMP3]])
				// CHECK-NEXT: store half [[TMP4]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP5:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP6:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 2 [[TMP6]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP7:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP7]]
				//
				float16_t test_vminnmvq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminnmvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminnmvq_p_f16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmvq_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float [[A:%.]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret float [[TMP2]]
				//
				float32_t test_vminnmvq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminnmvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminnmvq_p_f32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmavq_p_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.]] = call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half [[A1]], <8 x half> [[B:%.]], <8 x i1> [[TMP3]])
				// CHECK-NEXT: store half [[TMP4]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP5:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP6:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 2 [[TMP6]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP7:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP7]]
				//
				float16_t test_vminnmavq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminnmavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminnmavq_p_f16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vminnmavq_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float [[A:%.]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret float [[TMP2]]
				//
				float32_t test_vminnmavq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vminnmavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vminnmavq_p_f32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmvq_p_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.]] = call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half [[A1]], <8 x half> [[B:%.]], <8 x i1> [[TMP3]])
				// CHECK-NEXT: store half [[TMP4]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP5:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP6:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 2 [[TMP6]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP7:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP7]]
				//
				float16_t test_vmaxnmvq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxnmvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxnmvq_p_f16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmvq_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float [[A:%.]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret float [[TMP2]]
				//
				float32_t test_vmaxnmvq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxnmvq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxnmvq_p_f32(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmavq_p_f16(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[A:%.*]] = alloca half, align 2
				// CHECK-NEXT: [[TMP:%.*]] = alloca float, align 4
				// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
				// CHECK-NEXT: store float [[A_COERCE:%.]], float [[TMP]], align 4
				// CHECK-NEXT: [[TMP0:%.]] = bitcast float [[TMP]] to i8*
				// CHECK-NEXT: [[TMP1:%.]] = bitcast half [[A]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP1]], i8* align 4 [[TMP0]], i32 2, i1 false)
				// CHECK-NEXT: [[A1:%.]] = load half, half [[A]], align 2
				// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
				// CHECK-NEXT: [[TMP4:%.]] = call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half [[A1]], <8 x half> [[B:%.]], <8 x i1> [[TMP3]])
				// CHECK-NEXT: store half [[TMP4]], half* [[RETVAL]], align 2
				// CHECK-NEXT: [[TMP5:%.]] = bitcast float [[TMP2]] to i8*
				// CHECK-NEXT: [[TMP6:%.]] = bitcast half [[RETVAL]] to i8*
				// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 2 [[TMP6]], i32 2, i1 false)
				// CHECK-NEXT: [[TMP7:%.]] = load float, float [[TMP2]], align 4
				// CHECK-NEXT: ret float [[TMP7]]
				//
				float16_t test_vmaxnmavq_p_f16(float16_t a, float16x8_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxnmavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxnmavq_p_f16(a, b, p);
				#endif /* POLYMORPHIC */
				}

				// CHECK-LABEL: @test_vmaxnmavq_p_f32(
				// CHECK-NEXT: entry:
				// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32
				// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
				// CHECK-NEXT: [[TMP2:%.]] = call float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float [[A:%.]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
				// CHECK-NEXT: ret float [[TMP2]]
				//
				float32_t test_vmaxnmavq_p_f32(float32_t a, float32x4_t b, mve_pred16_t p) {
				#ifdef POLYMORPHIC
				return vmaxnmavq_p(a, b, p);
				#else /* POLYMORPHIC */
				return vmaxnmavq_p_f32(a, b, p);
				#endif /* POLYMORPHIC */
				}

llvm/include/llvm/IR/IntrinsicsARM.td

Show First 20 Lines • Show All 792 Lines • ▼ Show 20 Lines

def int_arm_mve_pred_i2v : Intrinsic<		def int_arm_mve_pred_i2v : Intrinsic<
[llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;		[llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_pred_v2i : Intrinsic<		def int_arm_mve_pred_v2i : Intrinsic<
[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;		[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vreinterpretq : Intrinsic<		def int_arm_mve_vreinterpretq : Intrinsic<
[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;		[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;

multiclass IntrinsicSignSuffix<list<LLVMType> rets, list<LLVMType> params = [],
list<IntrinsicProperty> props = [],
string name = "",
list<SDNodeProperty> sdprops = []> {
def _s: Intrinsic<rets, params, props, name, sdprops>;
def _u: Intrinsic<rets, params, props, name, sdprops>;
}

def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],		def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,		[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
llvm_anyvector_ty, LLVMMatchType<0>],		llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;		[IntrNoMem]>;
def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],		def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,		[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
llvm_anyvector_ty, LLVMMatchType<0>],		llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;		[IntrNoMem]>;
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_arm_mve_vminnma_predicated: Intrinsic<[llvm_anyvector_ty],		def int_arm_mve_vminnma_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],		[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;
def int_arm_mve_vmaxnma_predicated: Intrinsic<[llvm_anyvector_ty],		def int_arm_mve_vmaxnma_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],		[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;

defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty],
[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty],
[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;

multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,		multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
LLVMType pred = llvm_anyvector_ty,		LLVMType pred = llvm_anyvector_ty,
list<IntrinsicProperty> props = [IntrNoMem]> {		list<IntrinsicProperty> props = [IntrNoMem]> {
def "": Intrinsic<rets, params, props>;		def "": Intrinsic<rets, params, props>;
def _predicated: Intrinsic<rets, params # [pred], props>;		def _predicated: Intrinsic<rets, params # [pred], props>;
}		}
multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,		multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
LLVMType pred = llvm_anyvector_ty,		LLVMType pred = llvm_anyvector_ty,
list<IntrinsicProperty> props = [IntrNoMem]> {		list<IntrinsicProperty> props = [IntrNoMem]> {
def "": Intrinsic<rets, params, props>;		def "": Intrinsic<rets, params, props>;
def _predicated: Intrinsic<rets, params # [pred,		def _predicated: Intrinsic<rets, params # [pred,
!if(!eq(!cast<string>(rets[0]), "llvm_anyvector_ty"),		!if(!eq(!cast<string>(rets[0]), "llvm_anyvector_ty"),
LLVMMatchType<0>, rets[0])], props>;		LLVMMatchType<0>, rets[0])], props>;
}		}

		multiclass MVE_minmaxv {
		defm v: MVEPredicated<[llvm_i32_ty],
		[llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
		defm av: MVEPredicated<[llvm_i32_ty],
		[llvm_i32_ty, llvm_anyvector_ty]>;
		defm nmv: MVEPredicated<[llvm_anyfloat_ty],
		[LLVMMatchType<0>, llvm_anyvector_ty]>;
		defm nmav: MVEPredicated<[llvm_anyfloat_ty],
		[LLVMMatchType<0>, llvm_anyvector_ty]>;
		}
		defm int_arm_mve_min: MVE_minmaxv;
		defm int_arm_mve_max: MVE_minmaxv;

// Intrinsic with a predicated and a non-predicated case. The predicated case		// Intrinsic with a predicated and a non-predicated case. The predicated case
// has two additional parameters: inactive (the value for inactive lanes, can		// has two additional parameters: inactive (the value for inactive lanes, can
// be undef) and predicate.		// be undef) and predicate.
multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,		multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
list<LLVMType> params, LLVMType inactive,		list<LLVMType> params, LLVMType inactive,
LLVMType predicate,		LLVMType predicate,
list<IntrinsicProperty> props = [IntrNoMem]> {		list<IntrinsicProperty> props = [IntrNoMem]> {
def "": Intrinsic<rets, flags # params, props>;		def "": Intrinsic<rets, flags # params, props>;
▲ Show 20 Lines • Show All 362 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 14,338 Lines • ▼ Show 20 Lines	case Intrinsic::arm_mve_vqrdmlash_predicated: {
// any bits of that operand above that point, which allows us to eliminate		// any bits of that operand above that point, which allows us to eliminate
// uxth/sxth.		// uxth/sxth.
unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();		unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);		APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))		if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
return SDValue();		return SDValue();
break;		break;
}		}

		case Intrinsic::arm_mve_minv:
		case Intrinsic::arm_mve_maxv:
		case Intrinsic::arm_mve_minav:
		case Intrinsic::arm_mve_maxav:
		case Intrinsic::arm_mve_minv_predicated:
		case Intrinsic::arm_mve_maxv_predicated:
		case Intrinsic::arm_mve_minav_predicated:
		case Intrinsic::arm_mve_maxav_predicated: {
		// These intrinsics all take an i32 scalar operand which is narrowed to the
		// size of a single lane of the vector type they take as the other input.
		unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
		APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
		if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
		return SDValue();
		break;
		}
}		}

return SDValue();		return SDValue();
}		}

/// PerformShiftCombine - Checks for immediate versions of vector shifts and		/// PerformShiftCombine - Checks for immediate versions of vector shifts and
/// lowers them. As with the vector shift intrinsics, this is done during DAG		/// lowers them. As with the vector shift intrinsics, this is done during DAG
/// combining instead of DAG legalizing because the build_vectors for 64-bit		/// combining instead of DAG legalizing because the build_vectors for 64-bit
▲ Show 20 Lines • Show All 3,620 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMInstrMVE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 736 Lines • ▼ Show 20 Lines	class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
let Inst{7} = bit_7;		let Inst{7} = bit_7;
let Inst{6-5} = 0b00;		let Inst{6-5} = 0b00;
let Inst{3-1} = Qm{2-0};		let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;		let Inst{0} = 0b0;

let Predicates = [HasMVEFloat];		let Predicates = [HasMVEFloat];
}		}

multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {		multiclass MVE_VMINMAXNMV_p<string iname, bit notAbs, bit isMin,
def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;		MVEVectorVTInfo VTI, string intrBaseName,
def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;		ValueType Scalar, RegisterClass ScalarReg> {
}		def "": MVE_VMINMAXNMV<iname, VTI.Suffix, VTI.Size{0}, notAbs, isMin>;
		defvar Inst = !cast<Instruction>(NAME);
defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;		defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;		defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");

multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {		let Predicates = [HasMVEFloat] in {
def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;		def : Pat<(Scalar (unpred_intr (Scalar ScalarReg:$prev),
def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;		(VTI.Vec MQPR:$vec))),
		(COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
		(VTI.Vec MQPR:$vec)),
		ScalarReg)>;
		def : Pat<(Scalar (pred_intr (Scalar ScalarReg:$prev),
		(VTI.Vec MQPR:$vec),
		(VTI.Pred VCCR:$pred))),
		(COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
		(VTI.Vec MQPR:$vec),
		ARMVCCThen, (VTI.Pred VCCR:$pred)),
		ScalarReg)>;
		}
}		}

defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;		multiclass MVE_VMINMAXNMV_fty<string iname, bit notAbs, bit isMin,
defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;		string intrBase> {
		defm f32 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v4f32, intrBase,
		f32, SPR>;
		defm f16 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v8f16, intrBase,
		f16, HPR>;
		}

		defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 1, 1, "int_arm_mve_minnmv">;
		defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 1, 0, "int_arm_mve_maxnmv">;
		defm MVE_VMINNMAV: MVE_VMINMAXNMV_fty<"vminnmav", 0, 1, "int_arm_mve_minnmav">;
		defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">;

class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,		class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
bit bit_17, bit bit_7, list<dag> pattern=[]>		bit bit_17, bit bit_7, list<dag> pattern=[]>
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,		: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {		iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
bits<3> Qm;		bits<3> Qm;
bits<4> RdaDest;		bits<4> RdaDest;

let Inst{28} = U;		let Inst{28} = U;
let Inst{22-20} = 0b110;		let Inst{22-20} = 0b110;
let Inst{19-18} = size{1-0};		let Inst{19-18} = size{1-0};
let Inst{17} = bit_17;		let Inst{17} = bit_17;
let Inst{16} = 0b0;		let Inst{16} = 0b0;
let Inst{15-12} = RdaDest{3-0};		let Inst{15-12} = RdaDest{3-0};
let Inst{8} = 0b1;		let Inst{8} = 0b1;
let Inst{7} = bit_7;		let Inst{7} = bit_7;
let Inst{6-5} = 0b00;		let Inst{6-5} = 0b00;
let Inst{3-1} = Qm{2-0};		let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;		let Inst{0} = 0b0;
}		}

multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,		multiclass MVE_VMINMAXV_p<string iname, bit notAbs, bit isMin,
MVEVectorVTInfo VTI, Intrinsic intr> {		MVEVectorVTInfo VTI, string intrBaseName> {
def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,		def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
bit_17, bit_7>;		notAbs, isMin>;
defvar Inst = !cast<Instruction>(NAME);		defvar Inst = !cast<Instruction>(NAME);
		defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
		defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
		defvar base_args = (? (i32 rGPR:$prev), (VTI.Vec MQPR:$vec));
		defvar args = !if(notAbs, !con(base_args, (? (i32 VTI.Unsigned))),
		base_args);

let Predicates = [HasMVEInt] in		let Predicates = [HasMVEInt] in {
def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),		def : Pat<(i32 !con(args, (unpred_intr))),
(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;		(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
		def : Pat<(i32 !con(args, (pred_intr (VTI.Pred VCCR:$pred)))),
		(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec),
		ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
		}
}		}

multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,		multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
Intrinsic intr_s, Intrinsic intr_u> {		defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;
defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;		defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;
defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;		defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;
defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;		defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;
defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;		defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;
defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;		defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
}		}

defm MVE_VMINV : MVE_VMINMAXV_ty<		defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
"vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;		defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
defm MVE_VMAXV : MVE_VMINMAXV_ty<
"vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;

let Predicates = [HasMVEInt] in {		let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;		(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;		(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),
(i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;		(i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;
Show All 14 Lines	def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;		(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;		(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;		(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;

}		}

multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {		multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;		defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;
def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;		defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;
def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;		defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;
}		}

defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;		defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;		defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">;

class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,		class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>		bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,		: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
"$RdaDest, $Qn, $Qm", cstr, []> {		"$RdaDest, $Qn, $Qm", cstr, []> {
bits<4> RdaDest;		bits<4> RdaDest;
bits<3> Qm;		bits<3> Qm;
bits<3> Qn;		bits<3> Qn;
▲ Show 20 Lines • Show All 6,128 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s \| FileCheck %s			; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s \| FileCheck %s

				define arm_aapcs_vfpcc signext i8 @test_vminvq_s8(i8 signext %a, <16 x i8> %b) {
				; CHECK-LABEL: test_vminvq_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 0)
				%2 = trunc i32 %1 to i8
				ret i8 %2
				}

				define arm_aapcs_vfpcc signext i16 @test_vminvq_s16(i16 signext %a, <8 x i16> %b) {
				; CHECK-LABEL: test_vminvq_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 0)
				%2 = trunc i32 %1 to i16
				ret i16 %2
				}

				define arm_aapcs_vfpcc i32 @test_vminvq_s32(i32 %a, <4 x i32> %b) {
				; CHECK-LABEL: test_vminvq_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 0)
				ret i32 %0
				}

				define arm_aapcs_vfpcc zeroext i8 @test_vminvq_u8(i8 zeroext %a, <16 x i8> %b) {
				; CHECK-LABEL: test_vminvq_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 1)
				%2 = trunc i32 %1 to i8
				ret i8 %2
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vminvq_u16(i16 zeroext %a, <8 x i16> %b) {
				; CHECK-LABEL: test_vminvq_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 1)
				%2 = trunc i32 %1 to i16
				ret i16 %2
				}

	define arm_aapcs_vfpcc i32 @test_vminvq_u32(i32 %a, <4 x i32> %b) {			define arm_aapcs_vfpcc i32 @test_vminvq_u32(i32 %a, <4 x i32> %b) {
	; CHECK-LABEL: test_vminvq_u32:			; CHECK-LABEL: test_vminvq_u32:
	; CHECK: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vminv.u32 r0, q0			; CHECK-NEXT: vminv.u32 r0, q0
	; CHECK-NEXT: bx lr			; CHECK-NEXT: bx lr
	entry:			entry:
	%0 = tail call i32 @llvm.arm.mve.minv.u.v4i32(i32 %a, <4 x i32> %b)			%0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 1)
				ret i32 %0
				}

				define arm_aapcs_vfpcc signext i8 @test_vmaxvq_s8(i8 signext %a, <16 x i8> %b) {
				; CHECK-LABEL: test_vmaxvq_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 0)
				%2 = trunc i32 %1 to i8
				ret i8 %2
				}

				define arm_aapcs_vfpcc signext i16 @test_vmaxvq_s16(i16 signext %a, <8 x i16> %b) {
				; CHECK-LABEL: test_vmaxvq_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 0)
				%2 = trunc i32 %1 to i16
				ret i16 %2
				}

				define arm_aapcs_vfpcc i32 @test_vmaxvq_s32(i32 %a, <4 x i32> %b) {
				; CHECK-LABEL: test_vmaxvq_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 0)
	ret i32 %0			ret i32 %0
	}			}

	define arm_aapcs_vfpcc i32 @test_vmaxvq_u8(i32 %a, <16 x i8> %b) {			define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_u8(i8 zeroext %a, <16 x i8> %b) {
	; CHECK-LABEL: test_vmaxvq_u8:			; CHECK-LABEL: test_vmaxvq_u8:
	; CHECK: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmaxv.u8 r0, q0			; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 1)
				%2 = trunc i32 %1 to i8
				ret i8 %2
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_u16(i16 zeroext %a, <8 x i16> %b) {
				; CHECK-LABEL: test_vmaxvq_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 1)
				%2 = trunc i32 %1 to i16
				ret i16 %2
				}

				define arm_aapcs_vfpcc i32 @test_vmaxvq_u32(i32 %a, <4 x i32> %b) {
				; CHECK-LABEL: test_vmaxvq_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxv.u32 r0, q0
	; CHECK-NEXT: bx lr			; CHECK-NEXT: bx lr
	entry:			entry:
	%0 = tail call i32 @llvm.arm.mve.maxv.u.v16i8(i32 %a, <16 x i8> %b)			%0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 1)
	ret i32 %0			ret i32 %0
	}			}

	define arm_aapcs_vfpcc i32 @test_vminvq_s16(i32 %a, <8 x i16> %b) {			define arm_aapcs_vfpcc zeroext i8 @test_vminavq_s8(i8 zeroext %a, <16 x i8> %b) {
	; CHECK-LABEL: test_vminvq_s16:			; CHECK-LABEL: test_vminavq_s8:
	; CHECK: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vminv.s16 r0, q0			; CHECK-NEXT: vminav.s8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = tail call i32 @llvm.arm.mve.minav.v16i8(i32 %0, <16 x i8> %b)
				%2 = trunc i32 %1 to i8
				ret i8 %2
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vminavq_s16(i16 zeroext %a, <8 x i16> %b) {
				; CHECK-LABEL: test_vminavq_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminav.s16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = tail call i32 @llvm.arm.mve.minav.v8i16(i32 %0, <8 x i16> %b)
				%2 = trunc i32 %1 to i16
				ret i16 %2
				}

				define arm_aapcs_vfpcc i32 @test_vminavq_s32(i32 %a, <4 x i32> %b) {
				; CHECK-LABEL: test_vminavq_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vminav.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call i32 @llvm.arm.mve.minav.v4i32(i32 %a, <4 x i32> %b)
				ret i32 %0
				}

				define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_s8(i8 zeroext %a, <16 x i8> %b) {
				; CHECK-LABEL: test_vmaxavq_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxav.s8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = tail call i32 @llvm.arm.mve.maxav.v16i8(i32 %0, <16 x i8> %b)
				%2 = trunc i32 %1 to i8
				ret i8 %2
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_s16(i16 zeroext %a, <8 x i16> %b) {
				; CHECK-LABEL: test_vmaxavq_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxav.s16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = tail call i32 @llvm.arm.mve.maxav.v8i16(i32 %0, <8 x i16> %b)
				%2 = trunc i32 %1 to i16
				ret i16 %2
				}

				define arm_aapcs_vfpcc i32 @test_vmaxavq_s32(i32 %a, <4 x i32> %b) {
				; CHECK-LABEL: test_vmaxavq_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmaxav.s32 r0, q0
	; CHECK-NEXT: bx lr			; CHECK-NEXT: bx lr
	entry:			entry:
	%0 = tail call i32 @llvm.arm.mve.minv.s.v8i16(i32 %a, <8 x i16> %b)			%0 = tail call i32 @llvm.arm.mve.maxav.v4i32(i32 %a, <4 x i32> %b)
	ret i32 %0			ret i32 %0
	}			}

	declare i32 @llvm.arm.mve.minv.u.v4i32(i32, <4 x i32>)			define arm_aapcs_vfpcc float @test_vminnmvq_f16(float %a.coerce, <8 x half> %b) {
	declare i32 @llvm.arm.mve.maxv.u.v16i8(i32, <16 x i8>)			; CHECK-LABEL: test_vminnmvq_f16:
	declare i32 @llvm.arm.mve.minv.s.v8i16(i32, <8 x i16>)			; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vminnmv.f16 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = tail call half @llvm.arm.mve.minnmv.f16.v8f16(half %1, <8 x half> %b)
				%3 = bitcast half %2 to i16
				%tmp2.0.insert.ext = zext i16 %3 to i32
				%4 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %4
				}

				define arm_aapcs_vfpcc float @test_vminnmvq_f32(float %a, <4 x float> %b) {
				; CHECK-LABEL: test_vminnmvq_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vminnmv.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call float @llvm.arm.mve.minnmv.f32.v4f32(float %a, <4 x float> %b)
				ret float %0
				}

				define arm_aapcs_vfpcc float @test_vminnmavq_f16(float %a.coerce, <8 x half> %b) {
				; CHECK-LABEL: test_vminnmavq_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vminnmav.f16 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = tail call half @llvm.arm.mve.minnmav.f16.v8f16(half %1, <8 x half> %b)
				%3 = bitcast half %2 to i16
				%tmp2.0.insert.ext = zext i16 %3 to i32
				%4 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %4
				}

				define arm_aapcs_vfpcc float @test_vminnmavq_f32(float %a, <4 x float> %b) {
				; CHECK-LABEL: test_vminnmavq_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vminnmav.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call float @llvm.arm.mve.minnmav.f32.v4f32(float %a, <4 x float> %b)
				ret float %0
				}

				define arm_aapcs_vfpcc float @test_vmaxnmvq_f16(float %a.coerce, <8 x half> %b) {
				; CHECK-LABEL: test_vmaxnmvq_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vmaxnmv.f16 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = tail call half @llvm.arm.mve.maxnmv.f16.v8f16(half %1, <8 x half> %b)
				%3 = bitcast half %2 to i16
				%tmp2.0.insert.ext = zext i16 %3 to i32
				%4 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %4
				}

				define arm_aapcs_vfpcc float @test_vmaxnmvq_f32(float %a, <4 x float> %b) {
				; CHECK-LABEL: test_vmaxnmvq_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vmaxnmv.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call float @llvm.arm.mve.maxnmv.f32.v4f32(float %a, <4 x float> %b)
				ret float %0
				}

				define arm_aapcs_vfpcc float @test_vmaxnmavq_f16(float %a.coerce, <8 x half> %b) {
				; CHECK-LABEL: test_vmaxnmavq_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vmaxnmav.f16 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = tail call half @llvm.arm.mve.maxnmav.f16.v8f16(half %1, <8 x half> %b)
				%3 = bitcast half %2 to i16
				%tmp2.0.insert.ext = zext i16 %3 to i32
				%4 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %4
				}

				define arm_aapcs_vfpcc float @test_vmaxnmavq_f32(float %a, <4 x float> %b) {
				; CHECK-LABEL: test_vmaxnmavq_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vmaxnmav.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = tail call float @llvm.arm.mve.maxnmav.f32.v4f32(float %a, <4 x float> %b)
				ret float %0
				}

				define arm_aapcs_vfpcc signext i8 @test_vminvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminvq_p_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminvt.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2)
				%4 = trunc i32 %3 to i8
				ret i8 %4
				}

				define arm_aapcs_vfpcc signext i16 @test_vminvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminvq_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminvt.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2)
				%4 = trunc i32 %3 to i16
				ret i16 %4
				}

				define arm_aapcs_vfpcc i32 @test_vminvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminvq_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminvt.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1)
				ret i32 %2
				}

				define arm_aapcs_vfpcc zeroext i8 @test_vminvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminvq_p_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminvt.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2)
				%4 = trunc i32 %3 to i8
				ret i8 %4
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vminvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminvq_p_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminvt.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2)
				%4 = trunc i32 %3 to i16
				ret i16 %4
				}

				define arm_aapcs_vfpcc i32 @test_vminvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminvq_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminvt.u32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1)
				ret i32 %2
				}

				define arm_aapcs_vfpcc signext i8 @test_vmaxvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxvq_p_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxvt.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2)
				%4 = trunc i32 %3 to i8
				ret i8 %4
				}

				define arm_aapcs_vfpcc signext i16 @test_vmaxvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxvq_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxvt.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2)
				%4 = trunc i32 %3 to i16
				ret i16 %4
				}

				define arm_aapcs_vfpcc i32 @test_vmaxvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxvq_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxvt.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1)
				ret i32 %2
				}

				define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxvq_p_u8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxvt.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2)
				%4 = trunc i32 %3 to i8
				ret i8 %4
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxvq_p_u16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxvt.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2)
				%4 = trunc i32 %3 to i16
				ret i16 %4
				}

				define arm_aapcs_vfpcc i32 @test_vmaxvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxvq_p_u32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxvt.u32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1)
				ret i32 %2
				}

				define arm_aapcs_vfpcc zeroext i8 @test_vminavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminavq_p_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminavt.s8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2)
				%4 = trunc i32 %3 to i8
				ret i8 %4
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vminavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminavq_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminavt.s16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2)
				%4 = trunc i32 %3 to i16
				ret i16 %4
				}

				define arm_aapcs_vfpcc i32 @test_vminavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminavq_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminavt.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1)
				ret i32 %2
				}

				define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxavq_p_s8:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxavt.s8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i8 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2)
				%4 = trunc i32 %3 to i8
				ret i8 %4
				}

				define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxavq_p_s16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxavt.s16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %a to i32
				%1 = zext i16 %p to i32
				%2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
				%3 = tail call i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2)
				%4 = trunc i32 %3 to i16
				ret i16 %4
				}

				define arm_aapcs_vfpcc i32 @test_vmaxavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxavq_p_s32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r1
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxavt.s32 r0, q0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1)
				ret i32 %2
				}

				define arm_aapcs_vfpcc float @test_vminnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminnmvq_p_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r1, s0
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminnmvt.f16 r1, q1
				; CHECK-NEXT: vmov s0, r1
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = zext i16 %p to i32
				%3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
				%4 = tail call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
				%5 = bitcast half %4 to i16
				%tmp2.0.insert.ext = zext i16 %5 to i32
				%6 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %6
				}

				define arm_aapcs_vfpcc float @test_vminnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminnmvq_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminnmvt.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
				ret float %2
				}

				define arm_aapcs_vfpcc float @test_vminnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminnmavq_p_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r1, s0
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminnmavt.f16 r1, q1
				; CHECK-NEXT: vmov s0, r1
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = zext i16 %p to i32
				%3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
				%4 = tail call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
				%5 = bitcast half %4 to i16
				%tmp2.0.insert.ext = zext i16 %5 to i32
				%6 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %6
				}

				define arm_aapcs_vfpcc float @test_vminnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vminnmavq_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vminnmavt.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
				ret float %2
				}

				define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxnmvq_p_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r1, s0
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxnmvt.f16 r1, q1
				; CHECK-NEXT: vmov s0, r1
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = zext i16 %p to i32
				%3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
				%4 = tail call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
				%5 = bitcast half %4 to i16
				%tmp2.0.insert.ext = zext i16 %5 to i32
				%6 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %6
				}

				define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxnmvq_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxnmvt.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
				ret float %2
				}

				define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxnmavq_p_f16:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: sub sp, #4
				; CHECK-NEXT: vmov r1, s0
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxnmavt.f16 r1, q1
				; CHECK-NEXT: vmov s0, r1
				; CHECK-NEXT: vstr.16 s0, [sp, #2]
				; CHECK-NEXT: ldrh.w r0, [sp, #2]
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: add sp, #4
				; CHECK-NEXT: bx lr
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = zext i16 %p to i32
				%3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
				%4 = tail call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
				%5 = bitcast half %4 to i16
				%tmp2.0.insert.ext = zext i16 %5 to i32
				%6 = bitcast i32 %tmp2.0.insert.ext to float
				ret float %6
				}

				define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
				; CHECK-LABEL: test_vmaxnmavq_p_f32:
				; CHECK: @ %bb.0: @ %entry
				; CHECK-NEXT: vmsr p0, r0
				; CHECK-NEXT: vmov r0, s0
				; CHECK-NEXT: vpst
				; CHECK-NEXT: vmaxnmavt.f32 r0, q1
				; CHECK-NEXT: vmov s0, r0
				; CHECK-NEXT: bx lr
				entry:
				%0 = zext i16 %p to i32
				%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
				%2 = tail call float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
				ret float %2
				}

				declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
				declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
				declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)

				declare i32 @llvm.arm.mve.minv.v16i8(i32, <16 x i8>, i32)
				declare i32 @llvm.arm.mve.minv.v8i16(i32, <8 x i16>, i32)
				declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32)
				declare i32 @llvm.arm.mve.maxv.v16i8(i32, <16 x i8>, i32)
				declare i32 @llvm.arm.mve.maxv.v8i16(i32, <8 x i16>, i32)
				declare i32 @llvm.arm.mve.maxv.v4i32(i32, <4 x i32>, i32)
				declare i32 @llvm.arm.mve.minav.v16i8(i32, <16 x i8>)
				declare i32 @llvm.arm.mve.minav.v8i16(i32, <8 x i16>)
				declare i32 @llvm.arm.mve.minav.v4i32(i32, <4 x i32>)
				declare i32 @llvm.arm.mve.maxav.v16i8(i32, <16 x i8>)
				declare i32 @llvm.arm.mve.maxav.v8i16(i32, <8 x i16>)
				declare i32 @llvm.arm.mve.maxav.v4i32(i32, <4 x i32>)
				declare i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>)
				declare i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>)
				declare i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>)
				declare i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>)
				declare i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>)
				declare i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>)
				declare i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>)
				declare i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>)
				declare i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>)
				declare i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>)
				declare i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>)
				declare i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>)

				declare half @llvm.arm.mve.minnmv.f16.v8f16(half, <8 x half>)
				declare half @llvm.arm.mve.minnmav.f16.v8f16(half, <8 x half>)
				declare half @llvm.arm.mve.maxnmv.f16.v8f16(half, <8 x half>)
				declare half @llvm.arm.mve.maxnmav.f16.v8f16(half, <8 x half>)
				declare half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
				declare half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
				declare half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
				declare half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)

				declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>)
				declare float @llvm.arm.mve.minnmav.f32.v4f32(float, <4 x float>)
				declare float @llvm.arm.mve.maxnmv.f32.v4f32(float, <4 x float>)
				declare float @llvm.arm.mve.maxnmav.f32.v4f32(float, <4 x float>)
				declare float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
				declare float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
				declare float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
				declare float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)