Diff 298770

clang/include/clang/Basic/BuiltinsPPC.def

	Show First 20 Lines • Show All 416 Lines • ▼ Show 20 Lines
	BUILTIN(__builtin_altivec_vextdubvrx, "V2ULLiV16UcV16UcUi", "")			BUILTIN(__builtin_altivec_vextdubvrx, "V2ULLiV16UcV16UcUi", "")
	BUILTIN(__builtin_altivec_vextduhvlx, "V2ULLiV8UsV8UsUi", "")			BUILTIN(__builtin_altivec_vextduhvlx, "V2ULLiV8UsV8UsUi", "")
	BUILTIN(__builtin_altivec_vextduhvrx, "V2ULLiV8UsV8UsUi", "")			BUILTIN(__builtin_altivec_vextduhvrx, "V2ULLiV8UsV8UsUi", "")
	BUILTIN(__builtin_altivec_vextduwvlx, "V2ULLiV4UiV4UiUi", "")			BUILTIN(__builtin_altivec_vextduwvlx, "V2ULLiV4UiV4UiUi", "")
	BUILTIN(__builtin_altivec_vextduwvrx, "V2ULLiV4UiV4UiUi", "")			BUILTIN(__builtin_altivec_vextduwvrx, "V2ULLiV4UiV4UiUi", "")
	BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")			BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")
	BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")			BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")

				// P10 Vector rotate built-ins.
				BUILTIN(__builtin_altivec_vrlqmi, "V1ULLLiV1ULLLiV1ULLLiV1ULLLi", "")
				BUILTIN(__builtin_altivec_vrlqnm, "V1ULLLiV1ULLLiV1ULLLi", "")

	// VSX built-ins.			// VSX built-ins.

	BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")			BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")
	BUILTIN(__builtin_vsx_lxvw4x, "V4iivC*", "")			BUILTIN(__builtin_vsx_lxvw4x, "V4iivC*", "")
	BUILTIN(__builtin_vsx_lxvd2x_be, "V2dSLLivC*", "")			BUILTIN(__builtin_vsx_lxvd2x_be, "V2dSLLivC*", "")
	BUILTIN(__builtin_vsx_lxvw4x_be, "V4iSLLivC*", "")			BUILTIN(__builtin_vsx_lxvw4x_be, "V4iSLLivC*", "")

	BUILTIN(__builtin_vsx_stxvd2x, "vV2div*", "")			BUILTIN(__builtin_vsx_stxvd2x, "vV2div*", "")
	▲ Show 20 Lines • Show All 215 Lines • Show Last 20 Lines

clang/lib/Headers/altivec.h

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 7,921 Lines • ▼ Show 20 Lines
	}			}

	static __inline__ vector unsigned long long __ATTRS_o_ai			static __inline__ vector unsigned long long __ATTRS_o_ai
	vec_rl(vector unsigned long long __a, vector unsigned long long __b) {			vec_rl(vector unsigned long long __a, vector unsigned long long __b) {
	return __builtin_altivec_vrld(__a, __b);			return __builtin_altivec_vrld(__a, __b);
	}			}
	#endif			#endif

				#ifdef __POWER10_VECTOR__
				static __inline__ vector signed __int128 __ATTRS_o_ai
				vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) {
				return (__b << __a)\|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a));
				}

				static __inline__ vector unsigned __int128 __ATTRS_o_ai
				vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
				return (__b << __a)\|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a));
				}
				#endif

	/* vec_rlmi */			/* vec_rlmi */
	#ifdef __POWER9_VECTOR__			#ifdef __POWER9_VECTOR__
	static __inline__ vector unsigned int __ATTRS_o_ai			static __inline__ vector unsigned int __ATTRS_o_ai
	vec_rlmi(vector unsigned int __a, vector unsigned int __b,			vec_rlmi(vector unsigned int __a, vector unsigned int __b,
	vector unsigned int __c) {			vector unsigned int __c) {
	return __builtin_altivec_vrlwmi(__a, __c, __b);			return __builtin_altivec_vrlwmi(__a, __c, __b);
	}			}

	static __inline__ vector unsigned long long __ATTRS_o_ai			static __inline__ vector unsigned long long __ATTRS_o_ai
	vec_rlmi(vector unsigned long long __a, vector unsigned long long __b,			vec_rlmi(vector unsigned long long __a, vector unsigned long long __b,
	vector unsigned long long __c) {			vector unsigned long long __c) {
	return __builtin_altivec_vrldmi(__a, __c, __b);			return __builtin_altivec_vrldmi(__a, __c, __b);
	}			}
				#endif

				#ifdef __POWER10_VECTOR__
				static __inline__ vector unsigned __int128 __ATTRS_o_ai
				vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b,
				vector unsigned __int128 __c) {
				return __builtin_altivec_vrlqmi(__a, __c, __b);
				}

				static __inline__ vector signed __int128 __ATTRS_o_ai
				vec_rlmi(vector signed __int128 __a, vector signed __int128 __b,
				vector signed __int128 __c) {
				return __builtin_altivec_vrlqmi(__a, __c, __b);
				}
				#endif

	/* vec_rlnm */			/* vec_rlnm */
				#ifdef __POWER9_VECTOR__
	static __inline__ vector unsigned int __ATTRS_o_ai			static __inline__ vector unsigned int __ATTRS_o_ai
	vec_rlnm(vector unsigned int __a, vector unsigned int __b,			vec_rlnm(vector unsigned int __a, vector unsigned int __b,
	vector unsigned int __c) {			vector unsigned int __c) {
	vector unsigned int OneByte = { 0x8, 0x8, 0x8, 0x8 };			vector unsigned int OneByte = { 0x8, 0x8, 0x8, 0x8 };
	return __builtin_altivec_vrlwnm(__a, ((__c << OneByte) \| __b));			return __builtin_altivec_vrlwnm(__a, ((__c << OneByte) \| __b));
	}			}

	static __inline__ vector unsigned long long __ATTRS_o_ai			static __inline__ vector unsigned long long __ATTRS_o_ai
	vec_rlnm(vector unsigned long long __a, vector unsigned long long __b,			vec_rlnm(vector unsigned long long __a, vector unsigned long long __b,
	vector unsigned long long __c) {			vector unsigned long long __c) {
	vector unsigned long long OneByte = { 0x8, 0x8 };			vector unsigned long long OneByte = { 0x8, 0x8 };
	return __builtin_altivec_vrldnm(__a, ((__c << OneByte) \| __b));			return __builtin_altivec_vrldnm(__a, ((__c << OneByte) \| __b));
	}			}
	#endif			#endif

				#ifdef __POWER10_VECTOR__
				static __inline__ vector unsigned __int128 __ATTRS_o_ai
				vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b,
				vector unsigned __int128 __c) {
				// Merge __b and __c using an appropriate shuffle.
				vector unsigned char TmpB = (vector unsigned char)__b;
				vector unsigned char TmpC = (vector unsigned char)__c;
				vector unsigned char MaskAndShift =
				nemanjaiUnsubmitted Done Reply Inline Actions While correct, this implementation will require two constant pool loads (for the two shift amounts), then two `vrlq`'s to shift the two vectors and finally an `xxlor` to OR them together. We should be able to do this with a single constant pool load and `vperm`. Presumably the implementation would be something like: // Merge __b and __c using an appropriate shuffle. vector unsigned char TmpB = (vector unsigned char)__b; vector unsigned char TmpC = (vector unsigned char)__c; vector unsigned char MaskAndShift = #ifdef __LITTLE_ENDIAN__ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1, 0, -1, -1, -1, -1, -1); #else __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1, -1, -1, -1, -1, -1, -1, -1); #endif return __builtin_altivec_vrlqnm(__a, MaskAndShift); (but of course, double-check that the numbers are correct). nemanjai: While correct, this implementation will require two constant pool loads (for the two shift…
				#ifdef __LITTLE_ENDIAN__
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0,
				1, -1, -1, -1, -1, -1);
				#else
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1,
				-1, -1, -1, -1, -1, -1, -1);
				#endif
				return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift);
				nemanjaiUnsubmitted Done Reply Inline Actions Please add explicit cast to `vector unsigned __int128` for `MaskAndShift`. Similarly below. I forgot to add that to my comment. nemanjai: Please add explicit cast to `vector unsigned __int128` for `MaskAndShift`. Similarly below. I…
				}

				static __inline__ vector signed __int128 __ATTRS_o_ai
				vec_rlnm(vector signed __int128 __a, vector signed __int128 __b,
				vector signed __int128 __c) {
				// Merge __b and __c using an appropriate shuffle.
				vector unsigned char TmpB = (vector unsigned char)__b;
				vector unsigned char TmpC = (vector unsigned char)__c;
				vector unsigned char MaskAndShift =
				#ifdef __LITTLE_ENDIAN__
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0,
				1, -1, -1, -1, -1, -1);
				#else
				__builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1,
				-1, -1, -1, -1, -1, -1, -1);
				#endif
				return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift);
				}
				#endif

	/* vec_vrlb */			/* vec_vrlb */

	static __inline__ vector signed char __ATTRS_o_ai			static __inline__ vector signed char __ATTRS_o_ai
	vec_vrlb(vector signed char __a, vector unsigned char __b) {			vec_vrlb(vector signed char __a, vector unsigned char __b) {
	return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);			return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);
	}			}

	static __inline__ vector unsigned char __ATTRS_o_ai			static __inline__ vector unsigned char __ATTRS_o_ai
	▲ Show 20 Lines • Show All 10,168 Lines • Show Last 20 Lines

clang/test/CodeGen/builtins-ppc-p10vector.c

	// REQUIRES: powerpc-registered-target			// REQUIRES: powerpc-registered-target
	// RUN: %clang_cc1 -target-feature +vsx \			// RUN: %clang_cc1 -target-feature +vsx \
	// RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \			// RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \
	// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-BE,CHECK			// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-BE,CHECK
				amykUnsubmitted Done Reply Inline Actions The `CHECK-COMMON` should not be needed. You can just use the `CHECK` prefix in the tests since we have set up check prefixes. amyk: The `CHECK-COMMON` should not be needed. You can just use the `CHECK` prefix in the tests since…
	// RUN: %clang_cc1 -target-feature +vsx \			// RUN: %clang_cc1 -target-feature +vsx \
	// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \			// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \
	// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-LE,CHECK			// RUN: -o - \| FileCheck %s -check-prefixes=CHECK-LE,CHECK

	#include <altivec.h>			#include <altivec.h>

	vector signed __int128 vi128a;			vector signed __int128 vi128a;
	vector signed char vsca, vscb;			vector signed char vsca, vscb;
	vector unsigned char vuca, vucb, vucc;			vector unsigned char vuca, vucb, vucc;
	vector signed short vssa, vssb;			vector signed short vssa, vssb;
	vector unsigned short vusa, vusb, vusc;			vector unsigned short vusa, vusb, vusc;
	vector signed int vsia, vsib;			vector signed int vsia, vsib;
	vector unsigned int vuia, vuib, vuic;			vector unsigned int vuia, vuib, vuic;
	vector signed long long vslla, vsllb;			vector signed long long vslla, vsllb;
	vector unsigned long long vulla, vullb, vullc;			vector unsigned long long vulla, vullb, vullc;
	vector signed __int128 vsi128a, vsi128b;			vector signed __int128 vsi128a, vsi128b, vsi128c;
	vector unsigned __int128 vui128a, vui128b, vui128c;			vector unsigned __int128 vui128a, vui128b, vui128c;
	vector float vfa, vfb;			vector float vfa, vfb;
	vector double vda, vdb;			vector double vda, vdb;
	float fa;			float fa;
	double da;			double da;
	signed int sia;			signed int sia;
	signed int *iap;			signed int *iap;
	unsigned int uia, uib, *uiap;			unsigned int uia, uib, *uiap;
	▲ Show 20 Lines • Show All 1,591 Lines • ▼ Show 20 Lines
	vector signed __int128 test_vec_mod_s128(void) {			vector signed __int128 test_vec_mod_s128(void) {
	// CHECK: srem <1 x i128>			// CHECK: srem <1 x i128>
	// CHECK-NEXT: ret <1 x i128>			// CHECK-NEXT: ret <1 x i128>
	return vec_mod(vsi128a, vsi128b);			return vec_mod(vsi128a, vsi128b);
	}			}

	vector bool __int128 test_vec_cmpeq_s128(void) {			vector bool __int128 test_vec_cmpeq_s128(void) {
	// CHECK-LABEL: @test_vec_cmpeq_s128(			// CHECK-LABEL: @test_vec_cmpeq_s128(
	// CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>			// CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>
				nemanjaiUnsubmitted Done Reply Inline Actions Please show the shift in the test case as well. nemanjai: Please show the shift in the test case as well.
				nemanjaiUnsubmitted Done Reply Inline Actions Please show the shift in the test case as well. This was still not addressed. Please show the shuffle in the checks. nemanjai: > Please show the shift in the test case as well. This was still not addressed. Please show…
	// CHECK-NEXT: ret <1 x i128>			// CHECK-NEXT: ret <1 x i128>
	return vec_cmpeq(vsi128a, vsi128b);			return vec_cmpeq(vsi128a, vsi128b);
	}			}

	vector bool __int128 test_vec_cmpeq_u128(void) {			vector bool __int128 test_vec_cmpeq_u128(void) {
	// CHECK-LABEL: @test_vec_cmpeq_u128(			// CHECK-LABEL: @test_vec_cmpeq_u128(
	// CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>			// CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>
	// CHECK-NEXT: ret <1 x i128>			// CHECK-NEXT: ret <1 x i128>
	▲ Show 20 Lines • Show All 238 Lines • ▼ Show 20 Lines
	}			}

	int test_vec_all_ge_u128(void) {			int test_vec_all_ge_u128(void) {
	// CHECK-LABEL: @test_vec_all_ge_u128(			// CHECK-LABEL: @test_vec_all_ge_u128(
	// CHECK: call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 0, <1 x i128> %2, <1 x i128> %3)			// CHECK: call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 0, <1 x i128> %2, <1 x i128> %3)
	// CHECK-NEXT: ret i32			// CHECK-NEXT: ret i32
	return vec_all_ge(vui128a, vui128b);			return vec_all_ge(vui128a, vui128b);
	}			}

				vector signed __int128 test_vec_rl_s128(void) {
				// CHECK-LABEL: @test_vec_rl_s128(
				// CHECK: sub <1 x i128>
				// CHECK-NEXT: lshr <1 x i128>
				// CHECK-NEXT: or <1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rl(vsi128a, vsi128b);
				}

				vector unsigned __int128 test_vec_rl_u128(void) {
				// CHECK-LABEL: @test_vec_rl_u128(
				// CHECK: sub <1 x i128>
				// CHECK: lshr <1 x i128>
				// CHECK: or <1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rl(vui128a, vui128b);
				}

				vector signed __int128 test_vec_rlnm_s128(void) {
				// CHECK-LABEL: @test_vec_rlnm_s128(
				// CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				// CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlnm(vsi128a, vsi128b, vsi128c);
				}

				vector unsigned __int128 test_vec_rlnm_u128(void) {
				// CHECK-LABEL: @test_vec_rlnm_u128(
				// CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				// CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlnm(vui128a, vui128b, vui128c);
				}

				vector signed __int128 test_vec_rlmi_s128(void) {
				// CHECK-LABEL: @test_vec_rlmi_s128(
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlmi(vsi128a, vsi128b, vsi128c);
				}

				vector unsigned __int128 test_vec_rlmi_u128(void) {
				// CHECK-LABEL: @test_vec_rlmi_u128(
				// CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
				// CHECK-NEXT: ret <1 x i128>
				return vec_rlmi(vui128a, vui128b, vui128c);
				}

llvm/include/llvm/IR/IntrinsicsPowerPC.td

	Show First 20 Lines • Show All 1,051 Lines • ▼ Show 20 Lines
	def int_ppc_altivec_vrldnm :			def int_ppc_altivec_vrldnm :
	PowerPC_Vec_Intrinsic<"vrldnm", [llvm_v2i64_ty],			PowerPC_Vec_Intrinsic<"vrldnm", [llvm_v2i64_ty],
	[llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;			[llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
	def int_ppc_altivec_vrldmi :			def int_ppc_altivec_vrldmi :
	PowerPC_Vec_Intrinsic<"vrldmi", [llvm_v2i64_ty],			PowerPC_Vec_Intrinsic<"vrldmi", [llvm_v2i64_ty],
	[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],			[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
	[IntrNoMem]>;			[IntrNoMem]>;

				def int_ppc_altivec_vrlqnm :
				PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty],
				[llvm_v1i128_ty, llvm_v1i128_ty],
				[IntrNoMem]>;
				def int_ppc_altivec_vrlqmi :
				PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty],
				[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
				[IntrNoMem]>;

	// Vector Divide Extended Intrinsics.			// Vector Divide Extended Intrinsics.
	def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;			def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
	def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;			def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
	def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;			def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
	def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;			def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;
	def int_ppc_altivec_vdivesq : PowerPC_Vec_QQQ_Intrinsic<"vdivesq">;			def int_ppc_altivec_vdivesq : PowerPC_Vec_QQQ_Intrinsic<"vdivesq">;
	def int_ppc_altivec_vdiveuq : PowerPC_Vec_QQQ_Intrinsic<"vdiveuq">;			def int_ppc_altivec_vdiveuq : PowerPC_Vec_QQQ_Intrinsic<"vdiveuq">;

	▲ Show 20 Lines • Show All 328 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 886 Lines • ▼ Show 20 Lines	if (Subtarget.isISA3_1()) {
setOperationAction(ISD::UREM, MVT::v2i64, Legal);		setOperationAction(ISD::UREM, MVT::v2i64, Legal);
setOperationAction(ISD::SREM, MVT::v2i64, Legal);		setOperationAction(ISD::SREM, MVT::v2i64, Legal);
setOperationAction(ISD::UREM, MVT::v4i32, Legal);		setOperationAction(ISD::UREM, MVT::v4i32, Legal);
setOperationAction(ISD::SREM, MVT::v4i32, Legal);		setOperationAction(ISD::SREM, MVT::v4i32, Legal);
setOperationAction(ISD::UREM, MVT::v1i128, Legal);		setOperationAction(ISD::UREM, MVT::v1i128, Legal);
setOperationAction(ISD::SREM, MVT::v1i128, Legal);		setOperationAction(ISD::SREM, MVT::v1i128, Legal);
setOperationAction(ISD::UDIV, MVT::v1i128, Legal);		setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
setOperationAction(ISD::SDIV, MVT::v1i128, Legal);		setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
		setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
}		}

setOperationAction(ISD::MUL, MVT::v8i16, Legal);		setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);		setOperationAction(ISD::MUL, MVT::v16i8, Custom);

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);		setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);		setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

▲ Show 20 Lines • Show All 16,019 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Show First 20 Lines • Show All 2,105 Lines • ▼ Show 20 Lines	def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
[(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>;		[(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>;
def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),		def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
"vextsd2q $vD, $vB", IIC_VecGeneral,		"vextsd2q $vD, $vB", IIC_VecGeneral,
[(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>;		[(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>;
def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),		def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;		"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),		def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;		"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;		def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
		[(set v1i128:$vD,
		(int_ppc_altivec_vrlqnm v1i128:$vA,
		v1i128:$vB))]>;
def VRLQMI : VXForm_1<69, (outs vrrc:$vD),		def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),		(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
"vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,		"vrlqmi $vD, $vA, $vB", IIC_VecFP,
		[(set v1i128:$vD,
		(int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
		v1i128:$vDi))]>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;		RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;		def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;		def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;		def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;		def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;		def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;		def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;		def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;		def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
}		}
		amykUnsubmitted Done Reply Inline Actions If possible, I think it is better to leave the instruction patterns in the position they were in, and just add the patterns to them. amyk: If possible, I think it is better to leave the instruction patterns in the position they were…

let Predicates = [IsISA3_1, HasVSX] in {		let Predicates = [IsISA3_1, HasVSX] in {
def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;		def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;		def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
}		}

//---------------------------- Anonymous Patterns ----------------------------//		//---------------------------- Anonymous Patterns ----------------------------//
let Predicates = [IsISA3_1] in {		let Predicates = [IsISA3_1] in {
Show All 34 Lines	let Predicates = [IsISA3_1] in {
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),		def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;		(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;

		def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
		(v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
}		}

let Predicates = [IsISA3_1, HasVSX] in {		let Predicates = [IsISA3_1, HasVSX] in {
def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),		def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),
(COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;		(COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;
def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),		def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),
(COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;		(COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
}		}
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
				; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				amykUnsubmitted Done Reply Inline Actions Please also add the BE run line. amyk: Please also add the BE run line.
				; RUN: FileCheck %s -check-prefixes=CHECK-LE,CHECK

				; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
				; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				; RUN: FileCheck %s -check-prefixes=CHECK-BE,CHECK

				; This test case aims to test the builtins for vector rotate instructions
				; on Power10.


				define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) {
				; CHECK-LABEL: test_vrlq:
				; CHECK: # %bb.0:
				; CHECK-NEXT: vrlq v2, v3, v2
				; CHECK-NEXT: blr
				%shl.i = shl <1 x i128> %y, %x
				%sub.i = sub <1 x i128> <i128 128>, %x
				%lshr.i = lshr <1 x i128> %y, %sub.i
				%tmp = or <1 x i128> %shl.i, %lshr.i
				ret <1 x i128> %tmp
				}

				define <1 x i128> @test_vrlq_cost_mult8(<1 x i128> %x) {
				; CHECK-LABEL: test_vrlq_cost_mult8:
				; CHECK: # %bb.0:
				; CHECK: vrlq v2, v3, v2
				; CHECK-NEXT: blr
				%shl.i = shl <1 x i128> <i128 16>, %x
				%sub.i = sub <1 x i128> <i128 128>, %x
				%lshr.i = lshr <1 x i128> <i128 16>, %sub.i
				%tmp = or <1 x i128> %shl.i, %lshr.i
				ret <1 x i128> %tmp
				}

				define <1 x i128> @test_vrlq_cost_non_mult8(<1 x i128> %x) {
				; CHECK-LABEL: test_vrlq_cost_non_mult8:
				; CHECK: # %bb.0:
				; CHECK: vrlq v2, v3, v2
				; CHECK-NEXT: blr
				%shl.i = shl <1 x i128> <i128 4>, %x
				%sub.i = sub <1 x i128> <i128 128>, %x
				%lshr.i = lshr <1 x i128> <i128 4>, %sub.i
				%tmp = or <1 x i128> %shl.i, %lshr.i
				ret <1 x i128> %tmp
				}

				; Function Attrs: nounwind readnone
				define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
				; CHECK-LABEL: test_vrlqmi:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vrlqmi v3, v2, v4
				; CHECK-NEXT: vmr v2, v3
				; CHECK-NEXT: blr
				entry:
				%tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
				ret <1 x i128> %tmp
				}
				nemanjaiUnsubmitted Done Reply Inline Actions Add a test case for this that was produced from `vec_rlnm` at -O2. nemanjai: Add a test case for this that was produced from `vec_rlnm` at -O2.

				; Function Attrs: nounwind readnone
				define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
				; CHECK-LABEL: test_vrlqnm:
				; CHECK: # %bb.0: # %entry
				; CHECK-BE: lxvx v5
				; CHECK-BE-NEXT: vperm v3, v3, v4, v5
				; CHECK-LE-NEXT: plxv v5
				; CHECK-LE-NEXT: vperm v3, v4, v3, v5
				; CHECK-NEXT: vrlqnm v2, v2, v3
				; CHECK-NEXT: blr
				entry:
				%0 = bitcast <1 x i128> %b to <16 x i8>
				%1 = bitcast <1 x i128> %c to <16 x i8>
				%shuffle.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%d = bitcast <16 x i8> %shuffle.i to <1 x i128>
				%tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %d)
				ret <1 x i128> %tmp
				}

				; Function Attrs: nounwind readnone
				declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)

				; Function Attrs: nounwind readnone
				declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 298770

clang/include/clang/Basic/BuiltinsPPC.def

clang/lib/Headers/altivec.h

clang/test/CodeGen/builtins-ppc-p10vector.c

llvm/include/llvm/IR/IntrinsicsPowerPC.td

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtinsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 298770

clang/include/clang/Basic/BuiltinsPPC.def

clang/lib/Headers/altivec.h

clang/test/CodeGen/builtins-ppc-p10vector.c

llvm/include/llvm/IR/IntrinsicsPowerPC.td

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

[PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins
ClosedPublic