Diff 293474

clang/include/clang/Basic/BuiltinsPPC.def

	Show First 20 Lines • Show All 384 Lines • ▼ Show 20 Lines
	BUILTIN(__builtin_altivec_vextdubvrx, "V2ULLiV16UcV16UcUi", "")			BUILTIN(__builtin_altivec_vextdubvrx, "V2ULLiV16UcV16UcUi", "")
	BUILTIN(__builtin_altivec_vextduhvlx, "V2ULLiV8UsV8UsUi", "")			BUILTIN(__builtin_altivec_vextduhvlx, "V2ULLiV8UsV8UsUi", "")
	BUILTIN(__builtin_altivec_vextduhvrx, "V2ULLiV8UsV8UsUi", "")			BUILTIN(__builtin_altivec_vextduhvrx, "V2ULLiV8UsV8UsUi", "")
	BUILTIN(__builtin_altivec_vextduwvlx, "V2ULLiV4UiV4UiUi", "")			BUILTIN(__builtin_altivec_vextduwvlx, "V2ULLiV4UiV4UiUi", "")
	BUILTIN(__builtin_altivec_vextduwvrx, "V2ULLiV4UiV4UiUi", "")			BUILTIN(__builtin_altivec_vextduwvrx, "V2ULLiV4UiV4UiUi", "")
	BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")			BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")
	BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")			BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")

				// P10 Vector rotate built-ins.
				BUILTIN(__builtin_altivec_vrlqmi, "V1ULLLiV1ULLLiV1ULLLiV1ULLLi", "")
				BUILTIN(__builtin_altivec_vrlqnm, "V1ULLLiV1ULLLiV1ULLLi", "")

	// VSX built-ins.			// VSX built-ins.

	BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")			BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")
	BUILTIN(__builtin_vsx_lxvw4x, "V4iivC*", "")			BUILTIN(__builtin_vsx_lxvw4x, "V4iivC*", "")
	BUILTIN(__builtin_vsx_lxvd2x_be, "V2dSLLivC*", "")			BUILTIN(__builtin_vsx_lxvd2x_be, "V2dSLLivC*", "")
	BUILTIN(__builtin_vsx_lxvw4x_be, "V4iSLLivC*", "")			BUILTIN(__builtin_vsx_lxvw4x_be, "V4iSLLivC*", "")

	BUILTIN(__builtin_vsx_stxvd2x, "vV2div*", "")			BUILTIN(__builtin_vsx_stxvd2x, "vV2div*", "")
	▲ Show 20 Lines • Show All 210 Lines • Show Last 20 Lines

clang/lib/Headers/altivec.h

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 7,783 Lines • ▼ Show 20 Lines
	}			}

	static __inline__ vector unsigned long long __ATTRS_o_ai			static __inline__ vector unsigned long long __ATTRS_o_ai
	vec_rl(vector unsigned long long __a, vector unsigned long long __b) {			vec_rl(vector unsigned long long __a, vector unsigned long long __b) {
	return __builtin_altivec_vrld(__a, __b);			return __builtin_altivec_vrld(__a, __b);
	}			}
	#endif			#endif

				#ifdef __POWER10_VECTOR__
				static __inline__ vector signed __int128 __ATTRS_o_ai
				vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) {
				return (__b << __a)\|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a));
				}

				static __inline__ vector unsigned __int128 __ATTRS_o_ai
				vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
				return (__b << __a)\|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a));
				}
				#endif

	/* vec_rlmi */			/* vec_rlmi */
	#ifdef __POWER9_VECTOR__			#ifdef __POWER9_VECTOR__
	static __inline__ vector unsigned int __ATTRS_o_ai			static __inline__ vector unsigned int __ATTRS_o_ai
	vec_rlmi(vector unsigned int __a, vector unsigned int __b,			vec_rlmi(vector unsigned int __a, vector unsigned int __b,
	vector unsigned int __c) {			vector unsigned int __c) {
	return __builtin_altivec_vrlwmi(__a, __c, __b);			return __builtin_altivec_vrlwmi(__a, __c, __b);
	}			}

	static __inline__ vector unsigned long long __ATTRS_o_ai			static __inline__ vector unsigned long long __ATTRS_o_ai
	vec_rlmi(vector unsigned long long __a, vector unsigned long long __b,			vec_rlmi(vector unsigned long long __a, vector unsigned long long __b,
	vector unsigned long long __c) {			vector unsigned long long __c) {
	return __builtin_altivec_vrldmi(__a, __c, __b);			return __builtin_altivec_vrldmi(__a, __c, __b);
	}			}
				#endif

				#ifdef __POWER10_VECTOR__
				static __inline__ vector unsigned __int128 __ATTRS_o_ai
				vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b,
				vector unsigned __int128 __c) {
				return __builtin_altivec_vrlqmi(__a, __c, __b);
				}

				static __inline__ vector signed __int128 __ATTRS_o_ai
				vec_rlmi(vector signed __int128 __a, vector signed __int128 __b,
				vector signed __int128 __c) {
				return __builtin_altivec_vrlqmi(__a, __c, __b);
				}
				#endif

	/* vec_rlnm */			/* vec_rlnm */
				#ifdef __POWER9_VECTOR__
	static __inline__ vector unsigned int __ATTRS_o_ai			static __inline__ vector unsigned int __ATTRS_o_ai
	vec_rlnm(vector unsigned int __a, vector unsigned int __b,			vec_rlnm(vector unsigned int __a, vector unsigned int __b,
	vector unsigned int __c) {			vector unsigned int __c) {
	vector unsigned int OneByte = { 0x8, 0x8, 0x8, 0x8 };			vector unsigned int OneByte = { 0x8, 0x8, 0x8, 0x8 };
	return __builtin_altivec_vrlwnm(__a, ((__c << OneByte) \| __b));			return __builtin_altivec_vrlwnm(__a, ((__c << OneByte) \| __b));
	}			}

	static __inline__ vector unsigned long long __ATTRS_o_ai			static __inline__ vector unsigned long long __ATTRS_o_ai
	vec_rlnm(vector unsigned long long __a, vector unsigned long long __b,			vec_rlnm(vector unsigned long long __a, vector unsigned long long __b,
	vector unsigned long long __c) {			vector unsigned long long __c) {
	vector unsigned long long OneByte = { 0x8, 0x8 };			vector unsigned long long OneByte = { 0x8, 0x8 };
	return __builtin_altivec_vrldnm(__a, ((__c << OneByte) \| __b));			return __builtin_altivec_vrldnm(__a, ((__c << OneByte) \| __b));
	}			}
	#endif			#endif

				#ifdef __POWER10_VECTOR__
				static __inline__ vector unsigned __int128 __ATTRS_o_ai
				vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b,
				vector unsigned __int128 __c) {
				// Merge __b and __c using an appropriate shuffle.
				vector unsigned char TmpB = (vector unsigned char)__b;
				vector unsigned char TmpC = (vector unsigned char)__c;
				vector unsigned char MaskAndShift =
				nemanjaiUnsubmitted Done Reply Inline Actions While correct, this implementation will require two constant pool loads (for the two shift amounts), then two `vrlq`'s to shift the two vectors and finally an `xxlor` to OR them together. We should be able to do this with a single constant pool load and `vperm`. Presumably the implementation would be something like: // Merge __b and __c using an appropriate shuffle. vector unsigned char TmpB = (vector unsigned char)__b; vector unsigned char TmpC = (vector unsigned char)__c; vector unsigned char MaskAndShift = #ifdef __LITTLE_ENDIAN__ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1, 0, -1, -1, -1, -1, -1); #else __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1, -1, -1, -1, -1, -1, -1, -1); #endif return __builtin_altivec_vrlqnm(__a, MaskAndShift); (but of course, double-check that the numbers are correct). nemanjai: While correct, this implementation will require two constant pool loads (for the two shift…
				#ifdef __LITTLE_ENDIAN__
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1,
				0, -1, -1, -1, -1, -1);
				#else
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1,
				-1, -1, -1, -1, -1, -1, -1);
				#endif
				return __builtin_altivec_vrlqnm(__a, MaskAndShift);
				nemanjaiUnsubmitted Done Reply Inline Actions Please add explicit cast to `vector unsigned __int128` for `MaskAndShift`. Similarly below. I forgot to add that to my comment. nemanjai: Please add explicit cast to `vector unsigned __int128` for `MaskAndShift`. Similarly below. I…
				}

				static __inline__ vector signed __int128 __ATTRS_o_ai
				vec_rlnm(vector signed __int128 __a, vector signed __int128 __b,
				vector signed __int128 __c) {
				// Merge __b and __c using an appropriate shuffle.
				vector unsigned char TmpB = (vector unsigned char)__b;
				vector unsigned char TmpC = (vector unsigned char)__c;
				vector unsigned char MaskAndShift =
				#ifdef __LITTLE_ENDIAN__
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1,
				0, -1, -1, -1, -1, -1);
				#else
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1,
				-1, -1, -1, -1, -1, -1, -1);
				#endif
				return __builtin_altivec_vrlqnm(__a, MaskAndShift);
				}
				#endif

	/* vec_vrlb */			/* vec_vrlb */

	static __inline__ vector signed char __ATTRS_o_ai			static __inline__ vector signed char __ATTRS_o_ai
	vec_vrlb(vector signed char __a, vector unsigned char __b) {			vec_vrlb(vector signed char __a, vector unsigned char __b) {
	return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);			return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);
	}			}

	static __inline__ vector unsigned char __ATTRS_o_ai			static __inline__ vector unsigned char __ATTRS_o_ai
	▲ Show 20 Lines • Show All 9,850 Lines • Show Last 20 Lines

clang/test/CodeGen/builtins-ppc-p10vector.c

	// REQUIRES: powerpc-registered-target			// REQUIRES: powerpc-registered-target
	// RUN: %clang_cc1 -target-feature +vsx \			// RUN: %clang_cc1 -target-feature +vsx \
	// RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \			// RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \
	// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-BE,CHECK			// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-BE,CHECK
				amykUnsubmitted Done Reply Inline Actions The `CHECK-COMMON` should not be needed. You can just use the `CHECK` prefix in the tests since we have set up check prefixes. amyk: The `CHECK-COMMON` should not be needed. You can just use the `CHECK` prefix in the tests since…
	// RUN: %clang_cc1 -target-feature +vsx \			// RUN: %clang_cc1 -target-feature +vsx \
	// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \			// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \
	// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-LE,CHECK			// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-LE,CHECK

	#include <altivec.h>			#include <altivec.h>

	vector signed __int128 vi128a;			vector signed __int128 vi128a;
	vector signed char vsca, vscb;			vector signed char vsca, vscb;
	vector unsigned char vuca, vucb, vucc;			vector unsigned char vuca, vucb, vucc;
	vector signed short vssa, vssb;			vector signed short vssa, vssb;
	vector unsigned short vusa, vusb, vusc;			vector unsigned short vusa, vusb, vusc;
	vector signed int vsia, vsib;			vector signed int vsia, vsib;
	vector unsigned int vuia, vuib, vuic;			vector unsigned int vuia, vuib, vuic;
	vector signed long long vslla, vsllb;			vector signed long long vslla, vsllb;
	vector unsigned long long vulla, vullb, vullc;			vector unsigned long long vulla, vullb, vullc;
	vector signed __int128 vsi128a, vsi128b;			vector signed __int128 vsi128a, vsi128b, vsi128c;
	vector unsigned __int128 vui128a, vui128b, vui128c;			vector unsigned __int128 vui128a, vui128b, vui128c;
	vector float vfa, vfb;			vector float vfa, vfb;
	vector double vda, vdb;			vector double vda, vdb;
	signed int *iap;			signed int *iap;
	unsigned int uia, uib, *uiap;			unsigned int uia, uib, *uiap;
	signed char *cap;			signed char *cap;
	unsigned char uca;			unsigned char uca;
	const unsigned char *ucap;			const unsigned char *ucap;
	▲ Show 20 Lines • Show All 1,123 Lines • ▼ Show 20 Lines
	}			}

	vector unsigned __int128 test_vec_xl_zext_i64(void) {			vector unsigned __int128 test_vec_xl_zext_i64(void) {
	// CHECK: load i64			// CHECK: load i64
	// CHECK: zext i64			// CHECK: zext i64
	// CHECK: ret <1 x i128>			// CHECK: ret <1 x i128>
	return vec_xl_zext(llb, ullap);			return vec_xl_zext(llb, ullap);
	}			}

				vector signed __int128 test_vec_rl_s128(void) {
				// CHECK-LABEL: @test_vec_rl_s128(
				// CHECK: sub <1 x i128>
				// CHECK-NEXT: lshr <1 x i128>
				// CHECK-NEXT: or <1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rl(vsi128a, vsi128b);
				}

				vector unsigned __int128 test_vec_rl_u128(void) {
				// CHECK-LABEL: @test_vec_rl_u128(
				// CHECK: sub <1 x i128>
				// CHECK: lshr <1 x i128>
				// CHECK: or <1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rl(vui128a, vui128b);
				}

				vector signed __int128 test_vec_rlnm_s128(void) {
				// CHECK-LABEL: @test_vec_rlnm_s128(
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
				nemanjaiUnsubmitted Done Reply Inline Actions Please show the shift in the test case as well. nemanjai: Please show the shift in the test case as well.
				nemanjaiUnsubmitted Done Reply Inline Actions Please show the shift in the test case as well. This was still not addressed. Please show the shuffle in the checks. nemanjai: > Please show the shift in the test case as well. This was still not addressed. Please show…
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlnm(vsi128a, vsi128b, vsi128c);
				}

				vector unsigned __int128 test_vec_rlnm_u128(void) {
				// CHECK-LABEL: @test_vec_rlnm_u128(
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlnm(vui128a, vui128b, vui128c);
				}

				vector signed __int128 test_vec_rlmi_s128(void) {
				// CHECK-LABEL: @test_vec_rlmi_s128(
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlmi(vsi128a, vsi128b, vsi128c);
				}

				vector unsigned __int128 test_vec_rlmi_u128(void) {
				// CHECK-LABEL: @test_vec_rlmi_u128(
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlmi(vui128a, vui128b, vui128c);
				}

llvm/include/llvm/IR/IntrinsicsPowerPC.td

	Show First 20 Lines • Show All 996 Lines • ▼ Show 20 Lines
	def int_ppc_altivec_vrldnm :			def int_ppc_altivec_vrldnm :
	PowerPC_Vec_Intrinsic<"vrldnm", [llvm_v2i64_ty],			PowerPC_Vec_Intrinsic<"vrldnm", [llvm_v2i64_ty],
	[llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;			[llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
	def int_ppc_altivec_vrldmi :			def int_ppc_altivec_vrldmi :
	PowerPC_Vec_Intrinsic<"vrldmi", [llvm_v2i64_ty],			PowerPC_Vec_Intrinsic<"vrldmi", [llvm_v2i64_ty],
	[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],			[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
	[IntrNoMem]>;			[IntrNoMem]>;

				def int_ppc_altivec_vrlqnm :
				PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty],
				[llvm_v1i128_ty, llvm_v1i128_ty],
				[IntrNoMem]>;
				def int_ppc_altivec_vrlqmi :
				PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty],
				[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
				[IntrNoMem]>;

	// Vector Divide Extended Intrinsics.			// Vector Divide Extended Intrinsics.
	def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;			def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
	def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;			def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
	def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;			def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
	def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;			def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;

	// Vector Multiply High Intrinsics.			// Vector Multiply High Intrinsics.
	def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;			def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
	▲ Show 20 Lines • Show All 287 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 884 Lines • ▼ Show 20 Lines	if (Subtarget.isISA3_1()) {
setOperationAction(ISD::UDIV, MVT::v4i32, Legal);		setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
setOperationAction(ISD::SDIV, MVT::v4i32, Legal);		setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
setOperationAction(ISD::UREM, MVT::v2i64, Legal);		setOperationAction(ISD::UREM, MVT::v2i64, Legal);
setOperationAction(ISD::SREM, MVT::v2i64, Legal);		setOperationAction(ISD::SREM, MVT::v2i64, Legal);
setOperationAction(ISD::UREM, MVT::v4i32, Legal);		setOperationAction(ISD::UREM, MVT::v4i32, Legal);
setOperationAction(ISD::SREM, MVT::v4i32, Legal);		setOperationAction(ISD::SREM, MVT::v4i32, Legal);
setOperationAction(ISD::UDIV, MVT::v1i128, Legal);		setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
setOperationAction(ISD::SDIV, MVT::v1i128, Legal);		setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
		setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
}		}

setOperationAction(ISD::MUL, MVT::v8i16, Legal);		setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);		setOperationAction(ISD::MUL, MVT::v16i8, Custom);

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);		setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);		setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

▲ Show 20 Lines • Show All 15,787 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Show First 20 Lines • Show All 1,440 Lines • ▼ Show 20 Lines	let Predicates = [IsISA3_1] in {
def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),		def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>;		"vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>;
def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),		def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
"vextsd2q $vD, $vB", IIC_VecGeneral, []>;		"vextsd2q $vD, $vB", IIC_VecGeneral, []>;
def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),		def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;		"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),		def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;		"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
"vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;		def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;		def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;		def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;		def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;		def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;		def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;		def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
		def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
		amykUnsubmitted Done Reply Inline Actions If possible, I think it is better to leave the instruction patterns in the position they were in, and just add the patterns to them. amyk: If possible, I think it is better to leave the instruction patterns in the position they were…
		def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
		[(set v1i128:$vD,
		(int_ppc_altivec_vrlqnm v1i128:$vA,
		v1i128:$vB))]>;
		def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
		(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
		"vrlqmi $vD, $vA, $vB", IIC_VecFP,
		[(set v1i128:$vD,
		(int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
		v1i128:$vDi))]>,
		RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
}		}

let Predicates = [IsISA3_1, HasVSX] in {		let Predicates = [IsISA3_1, HasVSX] in {
def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;		def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;		def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
}		}

//---------------------------- Anonymous Patterns ----------------------------//		//---------------------------- Anonymous Patterns ----------------------------//
Show All 35 Lines	let Predicates = [IsISA3_1] in {
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;

		def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
		(v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
}		}

let Predicates = [IsISA3_1, HasVSX] in {		let Predicates = [IsISA3_1, HasVSX] in {
def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),		def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),
(COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;		(COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;
def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),		def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),
(COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;		(COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
}		}
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
				; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				amykUnsubmitted Done Reply Inline Actions Please also add the BE run line. amyk: Please also add the BE run line.
				; RUN: FileCheck %s

				; This test case aims to test the builtins for vector rotate instructions
				; on Power10.


				define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) {
				; CHECK-LABEL: test_vrlq:
				; CHECK: # %bb.0:
				; CHECK-NEXT: vrlq v2, v3, v2
				; CHECK-NEXT: blr
				%shl.i = shl <1 x i128> %y, %x
				%sub.i = sub <1 x i128> <i128 128>, %x
				%lshr.i = lshr <1 x i128> %y, %sub.i
				%tmp = or <1 x i128> %shl.i, %lshr.i
				ret <1 x i128> %tmp
				}

				define <1 x i128> @test_vrlq_cost_mult8(<1 x i128> %x) {
				; CHECK-LABEL: test_vrlq_cost_mult8:
				; CHECK: # %bb.0:
				; CHECK: vrlq v2, v3, v2
				; CHECK-NEXT: blr
				%shl.i = shl <1 x i128> <i128 16>, %x
				%sub.i = sub <1 x i128> <i128 128>, %x
				%lshr.i = lshr <1 x i128> <i128 16>, %sub.i
				%tmp = or <1 x i128> %shl.i, %lshr.i
				ret <1 x i128> %tmp
				}

				define <1 x i128> @test_vrlq_cost_non_mult8(<1 x i128> %x) {
				; CHECK-LABEL: test_vrlq_cost_non_mult8:
				; CHECK: # %bb.0:
				; CHECK: vrlq v2, v3, v2
				; CHECK-NEXT: blr
				%shl.i = shl <1 x i128> <i128 4>, %x
				%sub.i = sub <1 x i128> <i128 128>, %x
				%lshr.i = lshr <1 x i128> <i128 4>, %sub.i
				%tmp = or <1 x i128> %shl.i, %lshr.i
				ret <1 x i128> %tmp
				}

				; Function Attrs: nounwind readnone
				define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
				; CHECK-LABEL: test_vrlqmi:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vrlqmi v3, v2, v4
				; CHECK-NEXT: vmr v2, v3
				; CHECK-NEXT: blr
				entry:
				%tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
				ret <1 x i128> %tmp
				}

				; Function Attrs: nounwind readnone
				define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
				nemanjaiUnsubmitted Done Reply Inline Actions Add a test case for this that was produced from `vec_rlnm` at -O2. nemanjai: Add a test case for this that was produced from `vec_rlnm` at -O2.
				; CHECK-LABEL: test_vrlqnm:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vrlqnm v2, v2, v3
				; CHECK-NEXT: xxland v2, v2, v4
				; CHECK-NEXT: blr
				entry:
				%0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b)
				%tmp = and <1 x i128> %0, %c
				ret <1 x i128> %tmp
				}

				; Function Attrs: nounwind readnone
				declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)

				; Function Attrs: nounwind readnone
				declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 293474

clang/include/clang/Basic/BuiltinsPPC.def

clang/lib/Headers/altivec.h

clang/test/CodeGen/builtins-ppc-p10vector.c

llvm/include/llvm/IR/IntrinsicsPowerPC.td

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtinsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 293474

clang/include/clang/Basic/BuiltinsPPC.def

clang/lib/Headers/altivec.h

clang/test/CodeGen/builtins-ppc-p10vector.c

llvm/include/llvm/IR/IntrinsicsPowerPC.td

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

[PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins
ClosedPublic