Index: include/clang/Basic/BuiltinsPPC.def =================================================================== --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -339,6 +339,9 @@ BUILTIN(__builtin_vsx_xvabssp, "V4fV4f", "") BUILTIN(__builtin_vsx_xvabsdp, "V2dV2d", "") +BUILTIN(__builtin_vsx_xxspltw, "V4iV4iUIi", "") +BUILTIN(__builtin_vsx_xxpermdi, "V2ULLiV2ULLiV2ULLiUIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -7721,6 +7721,83 @@ b0, b1, b0, b1, b0, b1)); } +#ifdef __VSX__ +static __inline__ vector signed int __ATTRS_o_ai +vec_splat(vector signed int __a, unsigned const int __b) { +#ifdef __LITTLE_ENDIAN__ + unsigned __elem = 3 - (__b & 0x3); +#else + unsigned __elem = __b & 0x3; +#endif + switch(__elem) { + case 0: + return __builtin_vsx_xxspltw(__a, 0); + case 1: + return __builtin_vsx_xxspltw(__a, 1); + case 2: + return __builtin_vsx_xxspltw(__a, 2); + case 3: + return __builtin_vsx_xxspltw(__a, 3); + } +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_splat(vector unsigned int __a, unsigned const int __b) { +#ifdef __LITTLE_ENDIAN__ + unsigned __elem = 3 - (__b & 0x3); +#else + unsigned __elem = __b & 0x3; +#endif + switch(__elem) { + case 0: + return __builtin_vsx_xxspltw(__a, 0); + case 1: + return __builtin_vsx_xxspltw(__a, 1); + case 2: + return __builtin_vsx_xxspltw(__a, 2); + case 3: + return __builtin_vsx_xxspltw(__a, 3); + } +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_splat(vector bool int __a, unsigned const int __b) { +#ifdef __LITTLE_ENDIAN__ + unsigned __elem = 3 - (__b & 0x3); +#else + unsigned __elem = __b & 0x3; +#endif + switch(__elem) { + case 0: + return __builtin_vsx_xxspltw(__a, 0); + case 1: + return __builtin_vsx_xxspltw(__a, 1); + case 2: + return __builtin_vsx_xxspltw(__a, 2); + case 3: + return __builtin_vsx_xxspltw(__a, 3); + } +} + +static __inline__ vector float __ATTRS_o_ai vec_splat(vector float __a, + unsigned const int __b) { +#ifdef __LITTLE_ENDIAN__ + unsigned __elem = 3 - (__b & 0x3); +#else + unsigned __elem = __b & 0x3; +#endif + switch(__elem) { + case 0: + return __builtin_vsx_xxspltw(__a, 0); + case 1: + return __builtin_vsx_xxspltw(__a, 1); + case 2: + return __builtin_vsx_xxspltw(__a, 2); + case 3: + return __builtin_vsx_xxspltw(__a, 3); + } +} +#else static __inline__ vector signed int __ATTRS_o_ai vec_splat(vector signed int __a, unsigned const int __b) { unsigned char b0 = (__b & 0x03) * 4; @@ -7756,43 +7833,52 @@ (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3)); } +#endif #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_splat(vector double __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + unsigned __elem = __b & 1; +#ifdef __LITTLE_ENDIAN__ + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 0) : + __builtin_vsx_xxpermdi(__a, __a, 3); +#else + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 3) : + __builtin_vsx_xxpermdi(__a, __a, 0); +#endif } static __inline__ vector bool long long __ATTRS_o_ai vec_splat(vector bool long long __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + unsigned __elem = __b & 1; +#ifdef __LITTLE_ENDIAN__ + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 0) : + __builtin_vsx_xxpermdi(__a, __a, 3); +#else + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 3) : + __builtin_vsx_xxpermdi(__a, __a, 0); +#endif } static __inline__ vector signed long long __ATTRS_o_ai vec_splat(vector signed long long __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + unsigned __elem = __b & 1; +#ifdef __LITTLE_ENDIAN__ + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 0) : + __builtin_vsx_xxpermdi(__a, __a, 3); +#else + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 3) : + __builtin_vsx_xxpermdi(__a, __a, 0); +#endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_splat(vector unsigned long long __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + unsigned __elem = __b & 1; +#ifdef __LITTLE_ENDIAN__ + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 0) : + __builtin_vsx_xxpermdi(__a, __a, 3); +#else + return __elem ? __builtin_vsx_xxpermdi(__a, __a, 3) : + __builtin_vsx_xxpermdi(__a, __a, 0); +#endif } #endif Index: test/CodeGen/builtins-ppc-vsx.c =================================================================== --- test/CodeGen/builtins-ppc-vsx.c +++ test/CodeGen/builtins-ppc-vsx.c @@ -262,40 +262,20 @@ // CHECK-LE: @llvm.ppc.altivec.vperm res_vd = vec_splat(vd, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 3) +// CHECK-LE: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 0) res_vbll = vec_splat(vbll, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 3) +// CHECK-LE: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 0) res_vsll = vec_splat(vsll, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 3) +// CHECK-LE: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 0) res_vull = vec_splat(vull, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 3) +// CHECK-LE: @llvm.ppc.vsx.xxpermdi(<2 x i64> {{%.+}}, <2 x i64> {{%.+}}, i32 0) res_vsi = vec_pack(vsll, vsll); // CHECK: @llvm.ppc.altivec.vperm Index: test/CodeGen/ppc-vsx-splat.c =================================================================== --- test/CodeGen/ppc-vsx-splat.c +++ test/CodeGen/ppc-vsx-splat.c @@ -0,0 +1,100 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec -triple \ +// RUN: powerpc64-unknown-unknown -S -faltivec %s -o - | FileCheck %s \ +// RUN: --check-prefix=CHECK-NOOPT + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec -triple \ +// RUN: powerpc64le-unknown-unknown -S -faltivec %s -o - | FileCheck %s \ +// RUN: -check-prefix=CHECK-NOOPT + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec -triple \ +// RUN: powerpc64-unknown-unknown -O2 -S -faltivec %s -o - | FileCheck %s + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec -triple \ +// RUN: powerpc64le-unknown-unknown -O2 -S -faltivec %s -o - | FileCheck %s \ +// RUN: -check-prefix=CHECK-LE + +#include +vector signed int spltwv(vector signed int a, unsigned b) { + return vec_splat(a, b); +// CHECK-LABEL: spltwv +// CHECK-LE-LABEL: spltwv +// CHECK-NOOPT-LABEL: spltwv +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +// CHECK-NOOPT-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-NOOPT-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-NOOPT-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-NOOPT-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +} + +vector signed long long spltdv(vector signed long long a, unsigned b) { + return vec_splat(a, b); +// CHECK-LABEL: spltdv +// CHECK-LE-LABEL: spltdv +// CHECK-NOOPT-LABEL: spltdv +// CHECK-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +} + +vector signed int spltw0(vector signed int a) { + return vec_splat(a, 0); +// CHECK-LABEL: spltw0 +// CHECK-LE-LABEL: spltw0 +// CHECK-NOOPT-LABEL: spltw0 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +} + +vector signed int spltw1(vector signed int a) { + return vec_splat(a, 1); +// CHECK-LABEL: spltw1 +// CHECK-LE-LABEL: spltw1 +// CHECK-NOOPT-LABEL: spltw1 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +} + +vector signed int spltw2(vector signed int a) { + return vec_splat(a, 2); +// CHECK-LABEL: spltw2 +// CHECK-LE-LABEL: spltw2 +// CHECK-NOOPT-LABEL: spltw2 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +} + +vector signed int spltw3(vector signed int a) { + return vec_splat(a, 3); +// CHECK-LABEL: spltw3 +// CHECK-LE-LABEL: spltw3 +// CHECK-NOOPT-LABEL: spltw3 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +} + +vector signed long long spltd0(vector signed long long a, unsigned b) { + return vec_splat(a, 0); +// CHECK-LABEL: spltd0 +// CHECK-LE-LABEL: spltd0 +// CHECK-NOOPT-LABEL: spltd0 +// CHECK: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +} + +vector signed long long spltd1(vector signed long long a, unsigned b) { + return vec_splat(a, 1); +// CHECK-LABEL: spltd1 +// CHECK-LE-LABEL: spltd1 +// CHECK-NOOPT-LABEL: spltd1 +// CHECK: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +}