Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -7723,76 +7723,88 @@ static __inline__ vector signed int __ATTRS_o_ai vec_splat(vector signed int __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x03) * 4; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, - b2, b3, b0, b1, b2, b3)); + const unsigned __elem = __b & 0x3; + switch(__elem) { + case 0: + return __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + case 1: + return __builtin_shufflevector(__a, __a, 1, 1, 1, 1); + case 2: + return __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + case 3: + return __builtin_shufflevector(__a, __a, 3, 3, 3, 3); + } } static __inline__ vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x03) * 4; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, - b2, b3, b0, b1, b2, b3)); + const unsigned __elem = __b & 0x3; + switch(__elem) { + case 0: + return __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + case 1: + return __builtin_shufflevector(__a, __a, 1, 1, 1, 1); + case 2: + return __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + case 3: + return __builtin_shufflevector(__a, __a, 3, 3, 3, 3); + } } static __inline__ vector bool int __ATTRS_o_ai vec_splat(vector bool int __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x03) * 4; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, - b2, b3, b0, b1, b2, b3)); + const unsigned __elem = __b & 0x3; + switch(__elem) { + case 0: + return __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + case 1: + return __builtin_shufflevector(__a, __a, 1, 1, 1, 1); + case 2: + return __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + case 3: + return __builtin_shufflevector(__a, __a, 3, 3, 3, 3); + } } static __inline__ vector float __ATTRS_o_ai vec_splat(vector float __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x03) * 4; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, - b2, b3, b0, b1, b2, b3)); + const unsigned __elem = __b & 0x3; + switch(__elem) { + case 0: + return __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + case 1: + return __builtin_shufflevector(__a, __a, 1, 1, 1, 1); + case 2: + return __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + case 3: + return __builtin_shufflevector(__a, __a, 3, 3, 3, 3); + } } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_splat(vector double __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + const unsigned __elem = __b & 0x1; + return __elem ? __builtin_shufflevector(__a, __a, 1, 1) : + __builtin_shufflevector(__a, __a, 0, 0); } static __inline__ vector bool long long __ATTRS_o_ai vec_splat(vector bool long long __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + const unsigned __elem = __b & 0x1; + return __elem ? __builtin_shufflevector(__a, __a, 1, 1) : + __builtin_shufflevector(__a, __a, 0, 0); } static __inline__ vector signed long long __ATTRS_o_ai vec_splat(vector signed long long __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + const unsigned __elem = __b & 0x1; + return __elem ? __builtin_shufflevector(__a, __a, 1, 1) : + __builtin_shufflevector(__a, __a, 0, 0); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_splat(vector unsigned long long __a, unsigned const int __b) { - unsigned char b0 = (__b & 0x01) * 8; - unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, - b6 = b0 + 6, b7 = b0 + 7; - return vec_perm(__a, __a, - (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, - b2, b3, b4, b5, b6, b7)); + const unsigned __elem = __b & 0x1; + return __elem ? __builtin_shufflevector(__a, __a, 1, 1) : + __builtin_shufflevector(__a, __a, 0, 0); } #endif Index: test/CodeGen/builtins-ppc-altivec.c =================================================================== --- test/CodeGen/builtins-ppc-altivec.c +++ test/CodeGen/builtins-ppc-altivec.c @@ -3995,20 +3995,44 @@ // CHECK-LE: @llvm.ppc.altivec.vperm res_vi = vec_splat(vi, 0); -// CHECK: @llvm.ppc.altivec.vperm -// CHECK-LE: @llvm.ppc.altivec.vperm +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> res_vui = vec_splat(vui, 0); -// CHECK: @llvm.ppc.altivec.vperm -// CHECK-LE: @llvm.ppc.altivec.vperm +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> res_vbi = vec_splat(vbi, 0); -// CHECK: @llvm.ppc.altivec.vperm -// CHECK-LE: @llvm.ppc.altivec.vperm +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> res_vf = vec_splat(vf, 0); -// CHECK: @llvm.ppc.altivec.vperm -// CHECK-LE: @llvm.ppc.altivec.vperm +// CHECK: shufflevector <4 x float> +// CHECK: shufflevector <4 x float> +// CHECK: shufflevector <4 x float> +// CHECK: shufflevector <4 x float> +// CHECK-LE: shufflevector <4 x float> +// CHECK-LE: shufflevector <4 x float> +// CHECK-LE: shufflevector <4 x float> +// CHECK-LE: shufflevector <4 x float> res_vsc = vec_vspltb(vsc, 0); // CHECK: @llvm.ppc.altivec.vperm @@ -5430,12 +5454,18 @@ /* vec_sums */ res_vi = vec_sums(vi, vi); // CHECK: @llvm.ppc.altivec.vsumsws -// CHECK-LE: @llvm.ppc.altivec.vperm +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> // CHECK-LE: @llvm.ppc.altivec.vsumsws res_vi = vec_vsumsws(vi, vi); // CHECK: @llvm.ppc.altivec.vsumsws -// CHECK-LE: @llvm.ppc.altivec.vperm +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> +// CHECK-LE: shufflevector <4 x i32> // CHECK-LE: @llvm.ppc.altivec.vsumsws /* vec_trunc */ Index: test/CodeGen/builtins-ppc-vsx.c =================================================================== --- test/CodeGen/builtins-ppc-vsx.c +++ test/CodeGen/builtins-ppc-vsx.c @@ -262,40 +262,28 @@ // CHECK-LE: @llvm.ppc.altivec.vperm res_vd = vec_splat(vd, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: shufflevector <2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x i32> +// CHECK: shufflevector <2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x i32> zeroinitializer +// CHECK-LE: shufflevector <2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x i32> +// CHECK-LE: shufflevector <2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x i32> zeroinitializer res_vbll = vec_splat(vbll, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> +// CHECK: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> zeroinitializer +// CHECK-LE: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> +// CHECK-LE: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> zeroinitializer res_vsll = vec_splat(vsll, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> +// CHECK: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> zeroinitializer +// CHECK-LE: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> +// CHECK-LE: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> zeroinitializer res_vull = vec_splat(vull, 1); -// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> -// CHECK-LE: xor <16 x i8> -// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32> -// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8> +// CHECK: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> +// CHECK: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> zeroinitializer +// CHECK-LE: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> +// CHECK-LE: shufflevector <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i32> zeroinitializer res_vsi = vec_pack(vsll, vsll); // CHECK: @llvm.ppc.altivec.vperm Index: test/CodeGen/ppc-vsx-splat.c =================================================================== --- test/CodeGen/ppc-vsx-splat.c +++ test/CodeGen/ppc-vsx-splat.c @@ -0,0 +1,80 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec -triple \ +// RUN: powerpc64-unknown-unknown -O2 -S -faltivec %s -o - | FileCheck %s + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec -triple \ +// RUN: powerpc64le-unknown-unknown -O2 -S -faltivec %s -o - | FileCheck %s \ +// RUN: -check-prefix=CHECK-LE + +#include +vector signed int spltwv(vector signed int a, unsigned b) { + return vec_splat(a, b); +// CHECK-LABEL: spltwv +// CHECK-LE-LABEL: spltwv +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-LE-DAG: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +} + +vector signed long long spltdv(vector signed long long a, unsigned b) { + return vec_splat(a, b); +// CHECK-LABEL: spltdv +// CHECK-LE-LABEL: spltdv +// CHECK-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE-DAG: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +} + +vector signed int spltw0(vector signed int a) { + return vec_splat(a, 0); +// CHECK-LABEL: spltw0 +// CHECK-LE-LABEL: spltw0 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +} + +vector signed int spltw1(vector signed int a) { + return vec_splat(a, 1); +// CHECK-LABEL: spltw1 +// CHECK-LE-LABEL: spltw1 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +} + +vector signed int spltw2(vector signed int a) { + return vec_splat(a, 2); +// CHECK-LABEL: spltw2 +// CHECK-LE-LABEL: spltw2 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 2 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 +} + +vector signed int spltw3(vector signed int a) { + return vec_splat(a, 3); +// CHECK-LABEL: spltw3 +// CHECK-LE-LABEL: spltw3 +// CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 3 +// CHECK-LE: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 +} + +vector signed long long spltd0(vector signed long long a, unsigned b) { + return vec_splat(a, 0); +// CHECK-LABEL: spltd0 +// CHECK-LE-LABEL: spltd0 +// CHECK: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +// CHECK-LE: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +} + +vector signed long long spltd1(vector signed long long a, unsigned b) { + return vec_splat(a, 1); +// CHECK-LABEL: spltd1 +// CHECK-LE-LABEL: spltd1 +// CHECK: xxspltd {{[0-9]+}}, {{[0-9]+}}, 1 +// CHECK-LE: xxspltd {{[0-9]+}}, {{[0-9]+}}, 0 +}