Diff 274151

clang/include/clang/Basic/arm_sve.td

	Show First 20 Lines • Show All 719 Lines • ▼ Show 20 Lines
	def SVADRW : SInst<"svadrw[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrw">;			def SVADRW : SInst<"svadrw[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrw">;
	def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrd">;			def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrd">;

	////////////////////////////////////////////////////////////////////////////////			////////////////////////////////////////////////////////////////////////////////
	// Scalar to vector			// Scalar to vector

	def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone>;			def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone>;
	def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone>;			def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone>;
				let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
				c-rhodesUnsubmitted Done Reply Inline Actions `__ARM_FEATURE_SVE_BF16` will imply `__ARM_FEATURE_BF16_SCALAR_ARITHMETIC` so guarding only on the former should be sufficient. Same applies below c-rhodes: `__ARM_FEATURE_SVE_BF16` will imply `__ARM_FEATURE_BF16_SCALAR_ARITHMETIC` so guarding only on…
				def SVDUPQ_BF16 : SInst<"svdupq[_n]_{d}", "dssssssss", "b", MergeNone>;
				}
	def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>;			def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>;
	def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>;			def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>;

	def SVDUP : SInst<"svdup[_n]_{d}", "ds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dup_x">;			multiclass svdup_base<string n, string p, MergeType mt, string i> {
	def SVDUP_M : SInst<"svdup[_n]_{d}", "ddPs", "csilUcUsUiUlhfd", MergeOp1, "aarch64_sve_dup">;			def NAME : SInst<n, p, "csilUcUsUiUlhfd", mt, i>;
	def SVDUP_X : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeAnyExp, "aarch64_sve_dup">;			let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
	def SVDUP_Z : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeZeroExp, "aarch64_sve_dup">;			def _BF16: SInst<n, p, "b", mt, i>;
				c-rhodesUnsubmitted Done Reply Inline Actions nit: could you fix the spacing? I don't think it's worth trying to keep the two defs inline, single spaces everywhere would do c-rhodes: nit: could you fix the spacing? I don't think it's worth trying to keep the two defs inline…
				}
				}

				defm SVDUP : svdup_base<"svdup[_n]_{d}", "ds", MergeNone, "aarch64_sve_dup_x">;
				defm SVDUP_M : svdup_base<"svdup[_n]_{d}", "ddPs", MergeOp1, "aarch64_sve_dup">;
				defm SVDUP_X : svdup_base<"svdup[_n]_{d}", "dPs", MergeAnyExp, "aarch64_sve_dup">;
				defm SVDUP_Z : svdup_base<"svdup[_n]_{d}", "dPs", MergeZeroExp, "aarch64_sve_dup">;

	def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">;			def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">;

	// Integer arithmetic			// Integer arithmetic

	multiclass SInstZPZ<string name, string types, string intrinsic, list<FlagType> flags=[]> {			multiclass SInstZPZ<string name, string types, string intrinsic, list<FlagType> flags=[]> {
	def _M : SInst<name # "[_{d}]", "ddPd", types, MergeOp1, intrinsic, flags>;			def _M : SInst<name # "[_{d}]", "ddPd", types, MergeOp1, intrinsic, flags>;
	def _X : SInst<name # "[_{d}]", "dPd", types, MergeAnyExp, intrinsic, flags>;			def _X : SInst<name # "[_{d}]", "dPd", types, MergeAnyExp, intrinsic, flags>;
	▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines

	defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">;			defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">;
	defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">;			defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">;
	defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">;			defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">;

	def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;			def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
	def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;			def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
	def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;			def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
	def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">;

				def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">;
				c-rhodesUnsubmitted Done Reply Inline Actions nit: remove double spaces c-rhodes: nit: remove double spaces
				let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
				def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr">;
				}

	////////////////////////////////////////////////////////////////////////////////			////////////////////////////////////////////////////////////////////////////////
	// Integer reductions			// Integer reductions

	def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv">;			def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv">;
	def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv">;			def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv">;
	def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv">;			def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv">;
	def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv">;			def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv">;
	▲ Show 20 Lines • Show All 305 Lines • ▼ Show 20 Lines
	def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone]>;			def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone]>;
	// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp			// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp

	}			}

	////////////////////////////////////////////////////////////////////////////////			////////////////////////////////////////////////////////////////////////////////
	// Permutations and selection			// Permutations and selection

	def SVCLASTA : SInst<"svclasta[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta">;			multiclass SVEPerm<string name, string proto, string i> {
	def SVCLASTA_N : SInst<"svclasta[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta_n">;			def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i>;
	def SVCLASTB : SInst<"svclastb[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb">;			let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
	def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">;			def: SInst<name, proto, "b", MergeNone, i>;
				}
				}

				defm SVCLASTA : SVEPerm<"svclasta[_{d}]", "dPdd", "aarch64_sve_clasta">;
				defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">;
				defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">;
				defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">;

	def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">;			def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">;
	// Note: svdup_lane is implemented using the intrinsic for TBL to represent a			// Note: svdup_lane is implemented using the intrinsic for TBL to represent a
	// splat of any possible lane. It is upto LLVM to pick a more efficient			// splat of any possible lane. It is upto LLVM to pick a more efficient
	// instruction such as DUP (indexed) if the lane index fits the range of the			// instruction such as DUP (indexed) if the lane index fits the range of the
	// instruction's immediate.			// instruction's immediate.
	def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">;			def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">;
	def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">;			def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">;
				let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
				c-rhodesUnsubmitted Done Reply Inline Actions `__ARM_FEATURE_BF16_SCALAR_ARITHMETIC` can be removed c-rhodes: `__ARM_FEATURE_BF16_SCALAR_ARITHMETIC` can be removed
				def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">;
				}
	def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>;			def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>;
	def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">;			defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">;
	def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">;			defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">;
	def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">;			def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">;
	def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">;			def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">;
	def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">;			def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">;
	def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">;			def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">;

	let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {			let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
	def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">;			def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">;
	}			}
	▲ Show 20 Lines • Show All 837 Lines • Show Last 20 Lines

clang/lib/CodeGen/CGBuiltin.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,738 Lines • ▼ Show 20 Lines	case SVETypeFlags::EltTyBool8:
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);		return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
case SVETypeFlags::EltTyBool16:		case SVETypeFlags::EltTyBool16:
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);		return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
case SVETypeFlags::EltTyBool32:		case SVETypeFlags::EltTyBool32:
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);		return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
case SVETypeFlags::EltTyBool64:		case SVETypeFlags::EltTyBool64:
return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);		return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
}		}
}		}

		c-rhodesUnsubmitted Done Reply Inline Actions already added in D82399, you should see it when rebasing c-rhodes: already added in D82399, you should see it when rebasing
// Return the llvm vector type corresponding to the specified element TypeFlags.		// Return the llvm vector type corresponding to the specified element TypeFlags.
llvm::ScalableVectorType *		llvm::ScalableVectorType *
CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {		CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
switch (TypeFlags.getEltType()) {		switch (TypeFlags.getEltType()) {
default:		default:
llvm_unreachable("Invalid SVETypeFlag!");		llvm_unreachable("Invalid SVETypeFlag!");

case SVETypeFlags::EltTyInt8:		case SVETypeFlags::EltTyInt8:
▲ Show 20 Lines • Show All 624 Lines • ▼ Show 20 Lines	Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svdupq_n_b64:		case SVE::BI__builtin_sve_svdupq_n_b64:
case SVE::BI__builtin_sve_svdupq_n_u8:		case SVE::BI__builtin_sve_svdupq_n_u8:
case SVE::BI__builtin_sve_svdupq_n_s8:		case SVE::BI__builtin_sve_svdupq_n_s8:
case SVE::BI__builtin_sve_svdupq_n_u64:		case SVE::BI__builtin_sve_svdupq_n_u64:
case SVE::BI__builtin_sve_svdupq_n_f64:		case SVE::BI__builtin_sve_svdupq_n_f64:
case SVE::BI__builtin_sve_svdupq_n_s64:		case SVE::BI__builtin_sve_svdupq_n_s64:
case SVE::BI__builtin_sve_svdupq_n_u16:		case SVE::BI__builtin_sve_svdupq_n_u16:
case SVE::BI__builtin_sve_svdupq_n_f16:		case SVE::BI__builtin_sve_svdupq_n_f16:
		case SVE::BI__builtin_sve_svdupq_n_bf16:
case SVE::BI__builtin_sve_svdupq_n_s16:		case SVE::BI__builtin_sve_svdupq_n_s16:
case SVE::BI__builtin_sve_svdupq_n_u32:		case SVE::BI__builtin_sve_svdupq_n_u32:
case SVE::BI__builtin_sve_svdupq_n_f32:		case SVE::BI__builtin_sve_svdupq_n_f32:
case SVE::BI__builtin_sve_svdupq_n_s32: {		case SVE::BI__builtin_sve_svdupq_n_s32: {
// These builtins are implemented by storing each element to an array and using		// These builtins are implemented by storing each element to an array and using
// ld1rq to materialize a vector.		// ld1rq to materialize a vector.
unsigned NumOpnds = Ops.size();		unsigned NumOpnds = Ops.size();

▲ Show 20 Lines • Show All 8,447 Lines • Show Last 20 Lines

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) {
				// CHECK-LABEL: test_svclasta_bf16
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %fallback, <vscale x 8 x bfloat> %data)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svclasta_bf16'}}
				return SVE_ACLE_FUNC(svclasta, _bf16, , )(pg, fallback, data);
				}

				bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) {
				// CHECK-LABEL: test_svclasta_n_bf16
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %[[PG]], bfloat %fallback, <vscale x 8 x bfloat> %data)
				// CHECK: ret bfloat %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svclasta_n_bf16'}}
				return SVE_ACLE_FUNC(svclasta, _n_bf16, , )(pg, fallback, data);
				}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) {
				// CHECK-LABEL: test_svclastb_bf16
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %fallback, <vscale x 8 x bfloat> %data)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svclastb_bf16'}}
				return SVE_ACLE_FUNC(svclastb, _bf16, , )(pg, fallback, data);
				}

				bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) {
				// CHECK-LABEL: test_svclastb_n_bf16
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %[[PG]], bfloat %fallback, <vscale x 8 x bfloat> %data)
				// CHECK: ret bfloat %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svclastb_n_bf16'}}
				return SVE_ACLE_FUNC(svclastb, _n_bf16, , )(pg, fallback, data);
				}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				svbfloat16_t test_svdup_n_bf16(bfloat16_t op) {
				// CHECK-LABEL: test_svdup_n_bf16
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %op)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16'}}
				return SVE_ACLE_FUNC(svdup, _n, _bf16, )(op);
				}

				svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) {
				// CHECK-LABEL: test_svdup_n_bf16_z
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %[[PG]], bfloat %op)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_z'}}
				return SVE_ACLE_FUNC(svdup, _n, _bf16_z, )(pg, op);
				}

				svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) {
				// CHECK-LABEL: test_svdup_n_bf16_m
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %[[PG]], bfloat %op)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_m'}}
				return SVE_ACLE_FUNC(svdup, _n, _bf16_m, )(inactive, pg, op);
				}

				svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) {
				// CHECK-LABEL: test_svdup_n_bf16_x
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %[[PG]], bfloat %op)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}}
				return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op);
				}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) {
				// CHECK-LABEL: test_svdupq_lane_bf16
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %data, i64 %index)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svdupq_lane_bf16'}}
				return SVE_ACLE_FUNC(svdupq_lane, _bf16, , )(data, index);
				}
				svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3,
				bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) {
				// CHECK-LABEL: test_svdupq_n_bf16
				// CHECK: %[[ALLOCA:.*]] = alloca [8 x bfloat], align 16
				// CHECK-DAG: %[[BASE:.]] = getelementptr inbounds [8 x bfloat], [8 x bfloat] %[[ALLOCA]], i64 0, i64 0
				// CHECK-DAG: store bfloat %x0, bfloat* %[[BASE]], align 16
				// <assume other stores>
				// CHECK-DAG: %[[GEP:.]] = getelementptr inbounds [8 x bfloat], [8 x bfloat] %[[ALLOCA]], i64 0, i64 7
				// CHECK: store bfloat %x7, bfloat* %[[GEP]], align 2
				// CHECK-NOT: store
				// CHECK: call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
				// CHECK: %[[LOAD:.]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %{{.}}, bfloat* nonnull %[[BASE]])
				// CHECK: ret <vscale x 8 x bfloat> %[[LOAD]]
				// expected-warning@+1 {{implicit declaration of function 'svdupq_n_bf16'}}
				return SVE_ACLE_FUNC(svdupq, _n, _bf16, )(x0, x1, x2, x3, x4, x5, x6, x7);
				}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) {
				// CHECK-LABEL: test_svinsr_n_bf16
				// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %op1, bfloat %op2)
				// CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svinsr_n_bf16'}}
				return SVE_ACLE_FUNC(svinsr, _n_bf16, , )(op1, op2);
				}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) {
				// CHECK-LABEL: test_svlasta_bf16
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op)
				// CHECK: ret bfloat %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svlasta_bf16'}}
				return SVE_ACLE_FUNC(svlasta, _bf16, , )(pg, op);
				}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c

This file was added.

				// REQUIRES: aarch64-registered-target
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
				// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
				// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s

				// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
				// ASM-NOT: warning
				#include <arm_sve.h>

				#ifdef SVE_OVERLOADED_FORMS
				// A simple used,unused... macro, long enough to represent any SVE builtin.
				#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
				#else
				#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
				#endif

				bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) {
				// CHECK-LABEL: test_svlastb_bf16
				// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
				// CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op)
				// CHECK: ret bfloat %[[INTRINSIC]]
				// expected-warning@+1 {{implicit declaration of function 'svlastb_bf16'}}
				return SVE_ACLE_FUNC(svlastb, _bf16, , )(pg, op);
				}

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 417 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE, HasBF16] in {
// Splat scalar register (unpredicated, GPR or vector + element index)		// Splat scalar register (unpredicated, GPR or vector + element index)
defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>;		defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>;
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;		defm DUP_ZZI : sve_int_perm_dup_i<"dup">;

// Splat scalar register (predicated)		// Splat scalar register (predicated)
defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;		defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;		defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;

		let Predicates = [HasSVE, HasBF16] in {
		def : Pat<(nxv8bf16 (AArch64dup_mt nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
		(CPY_ZPmV_H $passthru, $pg, $splat)>;
		}

// Duplicate FP scalar into all vector elements		// Duplicate FP scalar into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),		def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;		(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))),		def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;		(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))),		def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;		(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))),		def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))),
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;		(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))),		def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))),
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;		(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),		def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;		(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
		let Predicates = [HasSVE, HasBF16] in {
		def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
		(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
		c-rhodesUnsubmitted Done Reply Inline Actions I think we're missing a test for this pattern in `llvm/test/CodeGen/AArch64/sve-vector-splat.ll`? Same applies to dup 0 patterns below. c-rhodes: I think we're missing a test for this pattern in `llvm/test/CodeGen/AArch64/sve-vector-splat.
		fpetrogalliAuthorUnsubmitted Done Reply Inline Actions I have added these patters to allow adding the regression tests in this patch, so they are somehow guarded by the tests. I tried to add the test cases anyway in sve-vector-splat.ll, but the following one crashes the compiler, so the whole "splatting a bfloat constant" deserve a separate patch. define <vscale x 8 x bfloat> @splat_nxv8bf16_imm() #0 { ; CHECK-LABEL: splat_nxv8bf16_imm: ; CHECK: mov z0.h, #1.0 ; CHECK-NEXT: ret %1 = insertelement <vscale x 8 x bfloat> undef, bfloat 1.0, i32 0 %2 = shufflevector <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer ret <vscale x 8 x bfloat> %2 } I will create a new revision and make it a parent of this one. fpetrogalli: I have added these patters to allow adding the regression tests in this patch, so they are…
		fpetrogalliAuthorUnsubmitted Done Reply Inline Actions (facepalm) There is no "dup" instruction for bfloat immediates... that's why this is not working. I guess a separate patch is not needed, this one is enough... fpetrogalli: (facepalm) There is no "dup" instruction for bfloat immediates... that's why this is not…
		}

// Duplicate +0.0 into all vector elements		// Duplicate +0.0 into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;		def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;		def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;		def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;		def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;		def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;		def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
		let Predicates = [HasSVE, HasBF16] in {
		def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
		}

		c-rhodesUnsubmitted Done Reply Inline Actions formatting changes can be reverted c-rhodes: formatting changes can be reverted
// Duplicate Int immediate into all vector elements		// Duplicate Int immediate into all vector elements
def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),		def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
(DUP_ZI_B $a, $b)>;		(DUP_ZI_B $a, $b)>;
def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),		def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
(DUP_ZI_H $a, $b)>;		(DUP_ZI_H $a, $b)>;
def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),		def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
(DUP_ZI_S $a, $b)>;		(DUP_ZI_S $a, $b)>;
def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))),		def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))),
Show All 25 Lines	let Predicates = [HasSVE, HasBF16] in {
def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_splice, nxv8i1, nxv8bf16, nxv8bf16, SPLICE_ZPZ_H>;		def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_splice, nxv8i1, nxv8bf16, nxv8bf16, SPLICE_ZPZ_H>;
}		}

defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;		defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;		defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;		defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;		defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;

		let Predicates = [HasSVE, HasBF16] in {
		def : SVE_2_Op_Pat<nxv8bf16, AArch64insr, nxv8bf16, bf16, INSR_ZV_H>;
		}

defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;		defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;
defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;		defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;		defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;		defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;

defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;		defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;		defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;

▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE, HasBF16] in {

defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta", AArch64clasta_n>;		defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta", AArch64clasta_n>;
defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb", AArch64clastb_n>;		defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb", AArch64clastb_n>;
defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta", AArch64clasta_n>;		defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta", AArch64clasta_n>;
defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb", AArch64clastb_n>;		defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb", AArch64clastb_n>;
defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;		defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;
defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;		defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;

		let Predicates = [HasSVE, HasBF16] in {
		def : SVE_3_Op_Pat<bf16, AArch64clasta_n, nxv8i1, bf16, nxv8bf16, CLASTA_VPZ_H>;
		def : SVE_3_Op_Pat<bf16, AArch64clastb_n, nxv8i1, bf16, nxv8bf16, CLASTB_VPZ_H>;
		def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clasta, nxv8i1, nxv8bf16, nxv8bf16, CLASTA_ZPZ_H>;
		def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clastb, nxv8i1, nxv8bf16, nxv8bf16, CLASTB_ZPZ_H>;
		}

defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;		defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;
defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;		defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;
defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;		defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;
defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;		defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;

		let Predicates = [HasSVE, HasBF16] in {
		def : SVE_2_Op_Pat<bf16, AArch64lasta, nxv8i1, nxv8bf16, LASTA_VPZ_H>;
		def : SVE_2_Op_Pat<bf16, AArch64lastb, nxv8i1, nxv8bf16, LASTB_VPZ_H>;
		}

// continuous load with reg+immediate		// continuous load with reg+immediate
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;		defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;		defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>;		defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>;
defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>;		defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>;
defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>;		defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>;
defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>;		defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>;
defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>;		defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>;
▲ Show 20 Lines • Show All 924 Lines • ▼ Show 20 Lines	let Predicates = [IsLE] in {
def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>;		def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>;

def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;

		}

		let Predicates = [IsLE, HasBF16, HasSVE] in {
		def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
		def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
		def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
		c-rhodesUnsubmitted Done Reply Inline Actions missing tests in `llvm/test/CodeGen/AArch64/sve-bitcast.ll` c-rhodes: missing tests in `llvm/test/CodeGen/AArch64/sve-bitcast.ll`
		fpetrogalliAuthorUnsubmitted Done Reply Inline Actions The bitconvert patterns went in via D82501. This code is not present anymore in this patch. fpetrogalli: The bitconvert patterns went in via D82501. This code is not present anymore in this patch.
}		}

let Predicates = [IsLE, HasSVE, HasBF16] in {		let Predicates = [IsLE, HasSVE, HasBF16] in {
def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>;		def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;		def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;		def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;		def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;		def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
▲ Show 20 Lines • Show All 985 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll

Show First 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
define <vscale x 8 x half> @dup_f16(half %b) {		define <vscale x 8 x half> @dup_f16(half %b) {
; CHECK-LABEL: dup_f16:		; CHECK-LABEL: dup_f16:
; CHECK: mov z0.h, h0		; CHECK: mov z0.h, h0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)		%out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)
ret <vscale x 8 x half> %out		ret <vscale x 8 x half> %out
}		}

		define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
		; CHECK-LABEL: dup_bf16:
		; CHECK: mov z0.h, h0
		; CHECK-NEXT: ret
		%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
		ret <vscale x 8 x bfloat> %out
		}

define <vscale x 8 x half> @dup_imm_f16(half %b) {		define <vscale x 8 x half> @dup_imm_f16(half %b) {
; CHECK-LABEL: dup_imm_f16:		; CHECK-LABEL: dup_imm_f16:
; CHECK: mov z0.h, #16.00000000		; CHECK: mov z0.h, #16.00000000
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.)		%out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.)
ret <vscale x 8 x half> %out		ret <vscale x 8 x half> %out
}		}

Show All 29 Lines	; CHECK-NEXT: ret
ret <vscale x 2 x double> %out		ret <vscale x 2 x double> %out
}		}

declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8)		declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)		declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)		declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)		declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)		declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
		declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)		declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)		declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)

		; +bf16 is required for the bfloat version.
		attributes #0 = { "target-features"="+sve,+bf16" }

llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
; CHECK: clasta z0.h, p0, z0.h, z1.h		; CHECK: clasta z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,		%out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,		<vscale x 8 x half> %a,
<vscale x 8 x half> %b)		<vscale x 8 x half> %b)
ret <vscale x 8 x half> %out		ret <vscale x 8 x half> %out
}		}

		define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
		; CHECK-LABEL: clasta_bf16:
		; CHECK: clasta z0.h, p0, z0.h, z1.h
		; CHECK-NEXT: ret
		%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
		<vscale x 8 x bfloat> %a,
		<vscale x 8 x bfloat> %b)
		ret <vscale x 8 x bfloat> %out
		}

define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {		define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clasta_f32:		; CHECK-LABEL: clasta_f32:
; CHECK: clasta z0.s, p0, z0.s, z1.s		; CHECK: clasta z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,		%out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,		<vscale x 4 x float> %a,
<vscale x 4 x float> %b)		<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out		ret <vscale x 4 x float> %out
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
; CHECK: clasta h0, p0, h0, z1.h		; CHECK: clasta h0, p0, h0, z1.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,		%out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
half %a,		half %a,
<vscale x 8 x half> %b)		<vscale x 8 x half> %b)
ret half %out		ret half %out
}		}

		define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
		; CHECK-LABEL: clasta_n_bf16:
		; CHECK: clasta h0, p0, h0, z1.h
		; CHECK-NEXT: ret
		%out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
		bfloat %a,
		<vscale x 8 x bfloat> %b)
		ret bfloat %out
		}

define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {		define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clasta_n_f32:		; CHECK-LABEL: clasta_n_f32:
; CHECK: clasta s0, p0, s0, z1.s		; CHECK: clasta s0, p0, s0, z1.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,		%out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
float %a,		float %a,
<vscale x 4 x float> %b)		<vscale x 4 x float> %b)
ret float %out		ret float %out
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
; CHECK: clastb z0.h, p0, z0.h, z1.h		; CHECK: clastb z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,		%out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,		<vscale x 8 x half> %a,
<vscale x 8 x half> %b)		<vscale x 8 x half> %b)
ret <vscale x 8 x half> %out		ret <vscale x 8 x half> %out
}		}

		define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
		; CHECK-LABEL: clastb_bf16:
		; CHECK: clastb z0.h, p0, z0.h, z1.h
		; CHECK-NEXT: ret
		%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
		<vscale x 8 x bfloat> %a,
		<vscale x 8 x bfloat> %b)
		ret <vscale x 8 x bfloat> %out
		}

define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {		define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clastb_f32:		; CHECK-LABEL: clastb_f32:
; CHECK: clastb z0.s, p0, z0.s, z1.s		; CHECK: clastb z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,		%out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,		<vscale x 4 x float> %a,
<vscale x 4 x float> %b)		<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out		ret <vscale x 4 x float> %out
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
; CHECK: clastb h0, p0, h0, z1.h		; CHECK: clastb h0, p0, h0, z1.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,		%out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
half %a,		half %a,
<vscale x 8 x half> %b)		<vscale x 8 x half> %b)
ret half %out		ret half %out
}		}

		define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
		; CHECK-LABEL: clastb_n_bf16:
		; CHECK: clastb h0, p0, h0, z1.h
		; CHECK-NEXT: ret
		%out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
		bfloat %a,
		<vscale x 8 x bfloat> %b)
		c-rhodesUnsubmitted Done Reply Inline Actions nit: alignment c-rhodes: nit: alignment
		ret bfloat %out
		}

define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {		define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clastb_n_f32:		; CHECK-LABEL: clastb_n_f32:
; CHECK: clastb s0, p0, s0, z1.s		; CHECK: clastb s0, p0, s0, z1.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,		%out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
float %a,		float %a,
<vscale x 4 x float> %b)		<vscale x 4 x float> %b)
ret float %out		ret float %out
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines
define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {		define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: dupq_f16:		; CHECK-LABEL: dupq_f16:
; CHECK: mov z0.q, q0		; CHECK: mov z0.q, q0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)		%out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
ret <vscale x 8 x half> %out		ret <vscale x 8 x half> %out
}		}

		define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
		; CHECK-LABEL: dupq_bf16:
		; CHECK: mov z0.q, q0
		; CHECK-NEXT: ret
		%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
		ret <vscale x 8 x bfloat> %out
		}

define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {		define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: dupq_f32:		; CHECK-LABEL: dupq_f32:
; CHECK: mov z0.q, z0.q[1]		; CHECK: mov z0.q, z0.q[1]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)		%out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
ret <vscale x 4 x float> %out		ret <vscale x 4 x float> %out
}		}

▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines
; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d		; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
; CHECK: tbl z0.d, { z0.d }, [[Z4]].d		; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)		%out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
ret <vscale x 8 x half> %out		ret <vscale x 8 x half> %out
}		}

; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).		; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
		define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
		; CHECK-LABEL: dupq_lane_bf16:
		; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
		; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
		; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
		; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
		; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
		; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
		; CHECK-NEXT: ret
		%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
		ret <vscale x 8 x bfloat> %out
		}

		; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {		define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
; CHECK-LABEL: dupq_lane_f32:		; CHECK-LABEL: dupq_lane_f32:
; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1		; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1		; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0		; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]		; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d		; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
; CHECK: tbl z0.d, { z0.d }, [[Z4]].d		; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
▲ Show 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
; CHECK-LABEL: lasta_f16		; CHECK-LABEL: lasta_f16
; CHECK: lasta h0, p0, z0.h		; CHECK: lasta h0, p0, z0.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,		%res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a)		<vscale x 8 x half> %a)
ret half %res		ret half %res
}		}

		define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
		; CHECK-LABEL: lasta_bf16
		; CHECK: lasta h0, p0, z0.h
		; CHECK-NEXT: ret
		%res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
		<vscale x 8 x bfloat> %a)
		c-rhodesUnsubmitted Done Reply Inline Actions nit: alignment c-rhodes: nit: alignment
		ret bfloat %res
		}

define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {		define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
; CHECK-LABEL: lasta_f32		; CHECK-LABEL: lasta_f32
; CHECK: lasta s0, p0, z0.s		; CHECK: lasta s0, p0, z0.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,		%res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a)		<vscale x 4 x float> %a)
ret float %res		ret float %res
}		}
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
; CHECK-LABEL: lastb_f16		; CHECK-LABEL: lastb_f16
; CHECK: lastb h0, p0, z0.h		; CHECK: lastb h0, p0, z0.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,		%res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a)		<vscale x 8 x half> %a)
ret half %res		ret half %res
}		}

		define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
		; CHECK-LABEL: lastb_bf16
		; CHECK: lastb h0, p0, z0.h
		; CHECK-NEXT: ret
		%res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
		<vscale x 8 x bfloat> %a)
		ret bfloat %res
		}

define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {		define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
; CHECK-LABEL: lastb_f32		; CHECK-LABEL: lastb_f32
; CHECK: lastb s0, p0, z0.s		; CHECK: lastb s0, p0, z0.s
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,		%res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a)		<vscale x 4 x float> %a)
ret float %res		ret float %res
}		}
▲ Show 20 Lines • Show All 1,154 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret <vscale x 2 x double> %out		ret <vscale x 2 x double> %out
}		}

declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)		declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)		declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)		declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)		declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)		declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
		declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)		declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)		declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)

declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)		declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)		declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)		declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)		declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)		declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
		declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)		declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)		declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)		declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)		declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)		declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)		declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)		declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
		declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)		declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)		declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)

declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)		declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)		declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)		declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)		declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)		declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
		declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)		declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)		declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)

declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)		declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)		declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)		declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)		declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)		declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)		declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)		declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)		declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)		declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
		declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)		declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)		declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)

declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)		declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)		declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)		declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)		declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)		declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)		declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)		declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)		declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)

declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)		declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)		declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)		declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)		declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)		declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
		declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)		declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)		declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)		declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)

declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)		declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)		declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)		declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)		declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)		declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
		declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)		declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)		declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)		declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)

declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)		declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)		declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)		declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)		declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)
▲ Show 20 Lines • Show All 129 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll

	Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; CHECK: mov z0.h, p0/m, h1			; CHECK: mov z0.h, p0/m, h1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> %a,			%out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> %a,
	<vscale x 8 x i1> %pg,			<vscale x 8 x i1> %pg,
	half %b)			half %b)
	ret <vscale x 8 x half> %out			ret <vscale x 8 x half> %out
	}			}

				define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, bfloat %b) #0 {
				; CHECK-LABEL: dup_bf16:
				; CHECK: mov z0.h, p0/m, h1
				; CHECK-NEXT: ret
				%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %a,
				<vscale x 8 x i1> %pg,
				bfloat %b)
				c-rhodesUnsubmitted Done Reply Inline Actions nit: alignment c-rhodes: nit: alignment
				ret <vscale x 8 x bfloat> %out
				}

	define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, float %b) {			define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, float %b) {
	; CHECK-LABEL: dup_f32:			; CHECK-LABEL: dup_f32:
	; CHECK: mov z0.s, p0/m, s1			; CHECK: mov z0.s, p0/m, s1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %a,			%out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %a,
	<vscale x 4 x i1> %pg,			<vscale x 4 x i1> %pg,
	float %b)			float %b)
	ret <vscale x 4 x float> %out			ret <vscale x 4 x float> %out
	}			}

	define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, double %b) {			define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, double %b) {
	; CHECK-LABEL: dup_f64:			; CHECK-LABEL: dup_f64:
	; CHECK: mov z0.d, p0/m, d1			; CHECK: mov z0.d, p0/m, d1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %a,			%out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %a,
	<vscale x 2 x i1> %pg,			<vscale x 2 x i1> %pg,
	double %b)			double %b)
	ret <vscale x 2 x double> %out			ret <vscale x 2 x double> %out
	}			}

				define <vscale x 8 x bfloat> @test_svdup_n_bf16_z(<vscale x 8 x i1> %pg, bfloat %op) #0 {
				; CHECK-LABEL: test_svdup_n_bf16_z:
				; CHECK: mov z1.h, #0
				; CHECK: mov z1.h, p0/m, h0
				; CHECK: mov z0.d, z1.d
				; CHECK-NEXT: ret
				%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %pg, bfloat %op)
				ret <vscale x 8 x bfloat> %out
				}

				define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op) #0 {
				; CHECK-LABEL: test_svdup_n_bf16_m:
				; CHECK: mov z0.h, p0/m, h1
				; CHECK-NEXT: ret
				%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op)
				ret <vscale x 8 x bfloat> %out
				}


				define <vscale x 8 x bfloat> @test_svdup_n_bf16_x(<vscale x 8 x i1> %pg, bfloat %op) #0 {
				; CHECK-LABEL: test_svdup_n_bf16_x:
				; CHECK: mov z0.h, p0/m, h0
				; CHECK-NEXT: ret
				%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %pg, bfloat %op)
				ret <vscale x 8 x bfloat> %out
				}

	declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)			declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
	declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)			declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
	declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)			declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
	declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)			declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
	declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)			declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)
				declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat)
	declare <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)			declare <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)
	declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)			declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)

				; +bf16 is required for the bfloat version.
				attributes #0 = { "target-features"="+sve,+bf16" }

llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

	Show First 20 Lines • Show All 159 Lines • ▼ Show 20 Lines
	define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {			define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
	; CHECK-LABEL: insr_f16:			; CHECK-LABEL: insr_f16:
	; CHECK: insr z0.h, h1			; CHECK: insr z0.h, h1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)			%out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
	ret <vscale x 8 x half> %out			ret <vscale x 8 x half> %out
	}			}

				define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
				; CHECK-LABEL: insr_bf16:
				; CHECK: insr z0.h, h1
				; CHECK-NEXT: ret
				%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
				ret <vscale x 8 x bfloat> %out
				}

	define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {			define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
	; CHECK-LABEL: insr_f32:			; CHECK-LABEL: insr_f32:
	; CHECK: insr z0.s, s1			; CHECK: insr z0.s, s1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)			%out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
	ret <vscale x 4 x float> %out			ret <vscale x 4 x float> %out
	}			}

	▲ Show 20 Lines • Show All 167 Lines • ▼ Show 20 Lines
	declare <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)			declare <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
	declare <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)			declare <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)

	declare <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8>, i8)			declare <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8>, i8)
	declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i16)			declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i16)
	declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)			declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)
	declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)			declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)
	declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)			declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)
				declare <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat>, bfloat)
	declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)			declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)
	declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)			declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)

	declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)			declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
	declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)			declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
	declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)			declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
	declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)			declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

	declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)			declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
	declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)			declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
	declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)			declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)

	declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)			declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
	declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)			declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
	declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)			declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
	declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)			declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

	declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)			declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
	declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)			declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
	declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)			declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)

				; +bf16 is required for the bfloat version.
				attributes #0 = { "target-features"="+sve,+bf16" }

llvm/test/CodeGen/AArch64/sve-vector-splat.ll

	Show First 20 Lines • Show All 166 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%ins = insertelement <vscale x 16 x i1> undef, i1 %val, i32 0			%ins = insertelement <vscale x 16 x i1> undef, i1 %val, i32 0
	%splat = shufflevector <vscale x 16 x i1> %ins, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer			%splat = shufflevector <vscale x 16 x i1> %ins, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
	ret <vscale x 16 x i1> %splat			ret <vscale x 16 x i1> %splat
	}			}

	;; Splats of legal floating point vector types			;; Splats of legal floating point vector types

				define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
				; CHECK-LABEL: splat_nxv8bf16:
				; CHECK: mov z0.h, h0
				; CHECK-NEXT: ret
				%1 = insertelement <vscale x 8 x bfloat> undef, bfloat %val, i32 0
				%2 = shufflevector <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
				ret <vscale x 8 x bfloat> %2
				}

	define <vscale x 8 x half> @splat_nxv8f16(half %val) {			define <vscale x 8 x half> @splat_nxv8f16(half %val) {
	; CHECK-LABEL: splat_nxv8f16:			; CHECK-LABEL: splat_nxv8f16:
	; CHECK: mov z0.h, h0			; CHECK: mov z0.h, h0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%1 = insertelement <vscale x 8 x half> undef, half %val, i32 0			%1 = insertelement <vscale x 8 x half> undef, half %val, i32 0
	%2 = shufflevector <vscale x 8 x half> %1, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer			%2 = shufflevector <vscale x 8 x half> %1, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
	ret <vscale x 8 x half> %2			ret <vscale x 8 x half> %2
	}			}
	▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines

	define <vscale x 8 x half> @splat_nxv8f16_zero() {			define <vscale x 8 x half> @splat_nxv8f16_zero() {
	; CHECK-LABEL: splat_nxv8f16_zero:			; CHECK-LABEL: splat_nxv8f16_zero:
	; CHECK: mov z0.h, #0			; CHECK: mov z0.h, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	ret <vscale x 8 x half> zeroinitializer			ret <vscale x 8 x half> zeroinitializer
	}			}

				define <vscale x 8 x bfloat> @splat_nxv8bf16_zero() #0 {
				; CHECK-LABEL: splat_nxv8bf16_zero:
				; CHECK: mov z0.h, #0
				; CHECK-NEXT: ret
				ret <vscale x 8 x bfloat> zeroinitializer
				}

	define <vscale x 4 x half> @splat_nxv4f16_zero() {			define <vscale x 4 x half> @splat_nxv4f16_zero() {
	; CHECK-LABEL: splat_nxv4f16_zero:			; CHECK-LABEL: splat_nxv4f16_zero:
	; CHECK: mov z0.h, #0			; CHECK: mov z0.h, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	ret <vscale x 4 x half> zeroinitializer			ret <vscale x 4 x half> zeroinitializer
	}			}

	define <vscale x 2 x half> @splat_nxv2f16_zero() {			define <vscale x 2 x half> @splat_nxv2f16_zero() {
	▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
	define <vscale x 2 x double> @splat_nxv2f64_imm() {			define <vscale x 2 x double> @splat_nxv2f64_imm() {
	; CHECK-LABEL: splat_nxv2f64_imm:			; CHECK-LABEL: splat_nxv2f64_imm:
	; CHECK: mov z0.d, #1.0			; CHECK: mov z0.d, #1.0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%1 = insertelement <vscale x 2 x double> undef, double 1.0, i32 0			%1 = insertelement <vscale x 2 x double> undef, double 1.0, i32 0
	%2 = shufflevector <vscale x 2 x double> %1, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer			%2 = shufflevector <vscale x 2 x double> %1, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
	ret <vscale x 2 x double> %2			ret <vscale x 2 x double> %2
	}			}

				; +bf16 is required for the bfloat version.
				attributes #0 = { "target-features"="+sve,+bf16" }

This is an archive of the discontinued LLVM Phabricator instance.

[sve][acle] Implement some of the C intrinsics for brain float.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 274151

clang/include/clang/Basic/arm_sve.td

clang/lib/CodeGen/CGBuiltin.cpp

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

llvm/test/CodeGen/AArch64/sve-vector-splat.ll

This is an archive of the discontinued LLVM Phabricator instance.

[sve][acle] Implement some of the C intrinsics for brain float.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 274151

clang/include/clang/Basic/arm_sve.td

clang/lib/CodeGen/CGBuiltin.cpp

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll

llvm/test/CodeGen/AArch64/sve-vector-splat.ll

[sve][acle] Implement some of the C intrinsics for brain float.
ClosedPublic