Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -134,7 +134,7 @@ #endif } -#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +#ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_abs(vector double __a) { return __builtin_vsx_xvabsdp(__a); } @@ -305,6 +305,22 @@ } #endif +static __inline__ vector signed int __ATTRS_o_ai +vec_adde(vector signed int __a, vector signed int __b, + vector signed int __c) { + vector signed int __mask = {1, 1, 1, 1}; + vector signed int __carry = __c & __mask; + return vec_add(vec_add(__a, __b), __carry); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_adde(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + vector unsigned int __mask = {1, 1, 1, 1}; + vector unsigned int __carry = __c & __mask; + return vec_add(vec_add(__a, __b), __carry); +} + /* vec_addec */ #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) @@ -319,6 +335,50 @@ vector unsigned __int128 __c) { return __builtin_altivec_vaddecuq(__a, __b, __c); } + +static __inline__ vector signed int __ATTRS_o_ai +vec_addec(vector signed int __a, vector signed int __b, + vector signed int __c) { + + signed int __result[4]; + for (int i = 0; i < 4; i++) { + unsigned int __tempa = (unsigned int) __a[i]; + unsigned int __tempb = (unsigned int) __b[i]; + unsigned int __tempc = (unsigned int) __c[i]; + __tempc = __tempc & 0x00000001; + unsigned long long __longa = (unsigned long long) __tempa; + unsigned long long __longb = (unsigned long long) __tempb; + unsigned long long __longc = (unsigned long long) __tempc; + unsigned long long __sum = __longa + __longb + __longc; + unsigned long long __res = (__sum >> 32) & 0x01; + unsigned long long __tempres = (unsigned int) __res; + __result[i] = (signed int) __tempres; + } + + vector signed int ret = { __result[0], __result[1], __result[2], __result[3] }; + return ret; +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_addec(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + + unsigned int __result[4]; + for (int i = 0; i < 4; i++) { + unsigned int __tempc = __c[i] & 1; + unsigned long long __longa = (unsigned long long) __a[i]; + unsigned long long __longb = (unsigned long long) __b[i]; + unsigned long long __longc = (unsigned long long) __tempc; + unsigned long long __sum = __longa + __longb + __longc; + unsigned long long __res = (__sum >> 32) & 0x01; + unsigned long long __tempres = (unsigned int) __res; + __result[i] = (signed int) __tempres; + } + + vector unsigned int ret = { __result[0], __result[1], __result[2], __result[3] }; + return ret; +} + #endif /* vec_vaddubm */ @@ -10161,6 +10221,11 @@ /* vec_subc */ +static __inline__ vector signed int __ATTRS_o_ai +vec_subc(vector signed int __a, vector signed int __b) { + return __builtin_altivec_vsubcuw(__a, __b); +} + static __inline__ vector unsigned int __ATTRS_o_ai vec_subc(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vsubcuw(__a, __b); @@ -10406,6 +10471,18 @@ return __builtin_altivec_vsubeuqm(__a, __b, __c); } +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_sube(vector signed __int128 __a, vector signed __int128 __b, + vector signed __int128 __c) { + return __builtin_altivec_vsubeuqm(__a, __b, __c); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_sube(vector unsigned __int128 __a, vector unsigned __int128 __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vsubeuqm(__a, __b, __c); +} + /* vec_vsubcuq */ static __inline__ vector signed __int128 __ATTRS_o_ai @@ -10431,8 +10508,47 @@ vector unsigned __int128 __c) { return __builtin_altivec_vsubecuq(__a, __b, __c); } + +static __inline__ vector signed int __ATTRS_o_ai +vec_subec(vector signed int __a, vector signed int __b, + vector signed int __c) { + return vec_addec(__a, ~__b, __c); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_subec(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return vec_addec(__a, ~__b, __c); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_subec(vector signed __int128 __a, vector signed __int128 __b, + vector signed __int128 __c) { + return __builtin_altivec_vsubecuq(__a, __b, __c); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_subec(vector unsigned __int128 __a, vector unsigned __int128 __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vsubecuq(__a, __b, __c); +} #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static __inline__ vector signed int __ATTRS_o_ai +vec_sube(vector signed int __a, vector signed int __b, + vector signed int __c) { + vector signed int __mask = {1, 1, 1, 1}; + vector signed int __carry = __c & __mask; + return vec_adde(__a, ~__b, __carry); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_sube(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + vector unsigned int __mask = {1, 1, 1, 1}; + vector unsigned int __carry = __c & __mask; + return vec_adde(__a, ~__b, __carry); +} /* vec_sum4s */ static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, @@ -15034,6 +15150,64 @@ #endif #endif +static vector float __ATTRS_o_ai vec_neg(vector float __a) { + return -__a; +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_neg(vector double __a) { + return -__a; +} + +#endif + +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static vector long long __ATTRS_o_ai vec_neg(vector long long __a) { + return -__a; +} +#endif + +static vector signed int __ATTRS_o_ai vec_neg(vector signed int __a) { + return -__a; +} + +static vector signed short __ATTRS_o_ai vec_neg(vector signed short __a) { + return -__a; +} + +static vector signed char __ATTRS_o_ai vec_neg(vector signed char __a) { + return -__a; +} + +static vector float __ATTRS_o_ai vec_nabs(vector float __a) { + return - vec_abs(__a); +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_nabs(vector double __a) { + return - vec_abs(__a); +} + +#endif + +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static vector long long __ATTRS_o_ai vec_nabs(vector long long __a) { + return __builtin_altivec_vminsd(__a, -__a); +} +#endif + +static vector signed int __ATTRS_o_ai vec_nabs(vector signed int __a) { + return __builtin_altivec_vminsw(__a, -__a); +} + +static vector signed short __ATTRS_o_ai vec_nabs(vector signed short __a) { + return __builtin_altivec_vminsh(__a, -__a); +} + +static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { + return __builtin_altivec_vminsb(__a, -__a); +} + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ Index: test/CodeGen/builtins-ppc-altivec.c =================================================================== --- test/CodeGen/builtins-ppc-altivec.c +++ test/CodeGen/builtins-ppc-altivec.c @@ -86,7 +86,43 @@ // CHECK-LE: bitcast <4 x i32> %{{.*}} to <4 x float> // CHECK-LE: store <4 x float> %{{.*}}, <4 x float>* @vf // CHECK-NOALTIVEC: error: use of undeclared identifier 'vf' -// CHECK-NOALTIVEC: vf = vec_abs(vf) +// CHECK-NOALTIVEC: vf = vec_abs(vf) + + vsc = vec_nabs(vsc); +// CHECK: sub <16 x i8> zeroinitializer +// CHECK: @llvm.ppc.altivec.vminsb +// CHECK-LE: sub <16 x i8> zeroinitializer +// CHECK-LE: @llvm.ppc.altivec.vminsb + + vs = vec_nabs(vs); +// CHECK: sub <8 x i16> zeroinitializer +// CHECK: @llvm.ppc.altivec.vminsh +// CHECK-LE: sub <8 x i16> zeroinitializer +// CHECK-LE: @llvm.ppc.altivec.vminsh + + vi = vec_nabs(vi); +// CHECK: sub <4 x i32> zeroinitializer +// CHECK: @llvm.ppc.altivec.vminsw +// CHECK-LE: sub <4 x i32> zeroinitializer +// CHECK-LE: @llvm.ppc.altivec.vminsw + + res_vi = vec_neg(vi); +// CHECK: sub <4 x i32> zeroinitializer, {{%[0-9]+}} +// CHECK-LE: sub <4 x i32> zeroinitializer, {{%[0-9]+}} +// CHECK-NOALTIVEC: error: use of undeclared identifier 'vi' +// CHECK-NOALTIVEC: vi = vec_neg(vi); + + res_vs = vec_neg(vs); +// CHECK: sub <8 x i16> zeroinitializer, {{%[0-9]+}} +// CHECK-LE: sub <8 x i16> zeroinitializer, {{%[0-9]+}} +// CHECK-NOALTIVEC: error: use of undeclared identifier 'vs' +// CHECK-NOALTIVEC: res_vs = vec_neg(vs); + + res_vsc = vec_neg(vsc); +// CHECK: sub <16 x i8> zeroinitializer, {{%[0-9]+}} +// CHECK-LE: sub <16 x i8> zeroinitializer, {{%[0-9]+}} +// CHECK-NOALTIVEC: error: use of undeclared identifier 'vsc' +// CHECK-NOALTIVEC: res_vsc = vec_neg(vsc); /* vec_abs */ vsc = vec_abss(vsc); @@ -184,6 +220,22 @@ // CHECK: fadd <4 x float> // CHECK-LE: fadd <4 x float> + res_vi = vec_adde(vi, vi, vi); +// CHECK: and <4 x i32> +// CHECK: add <4 x i32> +// CHECK: add <4 x i32> +// CHECK-LE: and <4 x i32> +// CHECK-LE: add <4 x i32> +// CHECK-LE: add <4 x i32> + + res_vui = vec_adde(vui, vui, vui); +// CHECK: and <4 x i32> +// CHECK: add <4 x i32> +// CHECK: add <4 x i32> +// CHECK-LE: and <4 x i32> +// CHECK-LE: add <4 x i32> +// CHECK-LE: add <4 x i32> + res_vsc = vec_vaddubm(vsc, vsc); // CHECK: add <16 x i8> // CHECK-LE: add <16 x i8> @@ -5159,6 +5211,8 @@ // CHECK: fsub <4 x float> // CHECK-LE: fsub <4 x float> + + res_vsc = vec_vsububm(vsc, vsc); // CHECK: sub <16 x i8> // CHECK-LE: sub <16 x i8> @@ -5240,6 +5294,10 @@ // CHECK: @llvm.ppc.altivec.vsubcuw // CHECK-LE: @llvm.ppc.altivec.vsubcuw + res_vi = vec_subc(vi, vi); +// CHECK: @llvm.ppc.altivec.vsubcuw +// CHECK-LE: @llvm.ppc.altivec.vsubcuw + res_vui = vec_vsubcuw(vui, vui); // CHECK: @llvm.ppc.altivec.vsubcuw // CHECK-LE: @llvm.ppc.altivec.vsubcuw @@ -5317,6 +5375,26 @@ // CHECK: @llvm.ppc.altivec.vsubuws // CHECK-LE: @llvm.ppc.altivec.vsubuws + res_vi = vec_sube(vi, vi, vi); +// CHECK: and <4 x i32> +// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: add <4 x i32> +// CHECK: add <4 x i32> +// CHECK-LE: and <4 x i32> +// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LE: add <4 x i32> +// CHECK-LE: add <4 x i32> + + res_vui = vec_sube(vui, vui, vui); +// CHECK: and <4 x i32> +// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: add <4 x i32> +// CHECK: add <4 x i32> +// CHECK-LE: and <4 x i32> +// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LE: add <4 x i32> +// CHECK-LE: add <4 x i32> + res_vsc = vec_vsubsbs(vsc, vsc); // CHECK: @llvm.ppc.altivec.vsubsbs // CHECK-LE: @llvm.ppc.altivec.vsubsbs Index: test/CodeGen/builtins-ppc-p8vector.c =================================================================== --- test/CodeGen/builtins-ppc-p8vector.c +++ test/CodeGen/builtins-ppc-p8vector.c @@ -73,13 +73,6 @@ // CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %{{[0-9]*}}, <2 x i64> // CHECK-PPC: error: call to 'vec_abs' is ambiguous - res_vd = vec_abs(vda); -// CHECK: call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) -// CHECK: store <2 x double> %{{.*}}, <2 x double>* @res_vd -// CHECK-LE: call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) -// CHECK-LE: store <2 x double> %{{.*}}, <2 x double>* @res_vd -// CHECK-PPC: error: call to 'vec_abs' is ambiguous - /* vec_add */ res_vsll = vec_add(vsll, vsll); // CHECK: add <2 x i64> @@ -1504,4 +1497,83 @@ // CHECK: llvm.ppc.altivec.vbpermq // CHECK-LE: llvm.ppc.altivec.vbpermq // CHECK-PPC: warning: implicit declaration of function 'vec_bperm' + + res_vsll = vec_neg(vsll); +// CHECK: sub <2 x i64> zeroinitializer, {{%[0-9]+}} +// CHECK-LE: sub <2 x i64> zeroinitializer, {{%[0-9]+}} +// CHECK_PPC: call to 'vec_neg' is ambiguous + + +} + + +vector signed int test_vec_addec_signed (vector signed int a, vector signed int b, vector signed int c) { + return vec_addec(a, b, c); +// CHECK-LABEL: @test_vec_addec_signed +// CHECK-LABEL: for.cond.i: +// CHECK: icmp slt i32 {{%[0-9]+}}, 4 +// CHECK-LABEL: for.body.i: +// CHECK: extractelement +// CHECK: extractelement +// CHECK: extractelement +// CHECK: and i32 {{%[0-9]+}}, 1 +// CHECK: zext +// CHECK: zext +// CHECK: zext +// CHECK: add i64 +// CHECK: add i64 +// CHECK: lshr i64 +// CHECK: and i64 +// CHECK: trunc i64 {{%[0-9]+}} to i32 +// CHECK: zext i32 +// CHECK: trunc i64 {{%[0-9]+}} to i32 +// CHECK: sext i32 +// CHECK: add nsw i32 +// CHECK: br label +// CHECK: ret <4 x i32> + +} + + +vector unsigned int test_vec_addec_unsigned (vector unsigned int a, vector unsigned int b, vector unsigned int c) { + return vec_addec(a, b, c); + +// CHECK-LABEL: @test_vec_addec_unsigned +// CHECK-LABEL: for.cond.i: +// CHECK: icmp slt i32 {{%[0-9]+}}, 4 +// CHECK-LABEL: for.body.i: +// CHECK: extractelement +// CHECK: and i32 +// CHECK: extractelement +// CHECK: zext i32 +// CHECK: extractelement +// CHECK: zext i32 +// CHECK: zext i32 +// CHECK: add i64 +// CHECK: lshr i64 +// CHECK: and i64 +// CHECK: trunc i64 {{%[0-9]+}} to i32 +// CHECK: zext i32 +// CHECK: trunc i64 {{%[0-9]+}} to i32 +// CHECK: sext i32 +// CHECK: add nsw i32 +// CHECK: br label +// CHECK: ret <4 x i32> +} + +vector signed int test_vec_subec_signed (vector signed int a, vector signed int b, vector signed int c) { + return vec_subec(a, b, c); +// CHECK-LABEL: @test_vec_subec_signed +// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LABEL: for.cond.i.i: +// CHECK: ret <4 x i32> +} + +vector unsigned int test_vec_subec_unsigned (vector unsigned int a, vector unsigned int b, vector unsigned int c) { + return vec_subec(a, b, c); + +// CHECK-LABEL: @test_vec_subec_unsigned +// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LABEL: for.cond.i.i: +// CHECK: ret <4 x i32> } Index: test/CodeGen/builtins-ppc-quadword.c =================================================================== --- test/CodeGen/builtins-ppc-quadword.c +++ test/CodeGen/builtins-ppc-quadword.c @@ -119,11 +119,32 @@ // CHECK-LE: @llvm.ppc.altivec.vsubeuqm // CHECK-PPC: error: assigning to '__vector __int128' (vector of 1 '__int128' value) from incompatible type 'int' + /* vec_sube */ + res_vlll = vec_sube(vlll, vlll, vlll); +// CHECK: @llvm.ppc.altivec.vsubeuqm +// CHECK-LE: @llvm.ppc.altivec.vsubeuqm +// CHECK-PPC: error: call to 'vec_sube' is ambiguous + + res_vulll = vec_sube(vulll, vulll, vulll); +// CHECK: @llvm.ppc.altivec.vsubeuqm +// CHECK-LE: @llvm.ppc.altivec.vsubeuqm +// CHECK-PPC: error: call to 'vec_sube' is ambiguous + + res_vlll = vec_sube(vlll, vlll, vlll); +// CHECK: @llvm.ppc.altivec.vsubeuqm +// CHECK-LE: @llvm.ppc.altivec.vsubeuqm +// CHECK-PPC: error: call to 'vec_sube' is ambiguous + res_vulll = vec_vsubeuqm(vulll, vulll, vulll); // CHECK: @llvm.ppc.altivec.vsubeuqm // CHECK-LE: @llvm.ppc.altivec.vsubeuqm // CHECK-PPC: error: assigning to '__vector unsigned __int128' (vector of 1 'unsigned __int128' value) from incompatible type 'int' - + + res_vulll = vec_sube(vulll, vulll, vulll); +// CHECK: @llvm.ppc.altivec.vsubeuqm +// CHECK-LE: @llvm.ppc.altivec.vsubeuqm +// CHECK-PPC: error: call to 'vec_sube' is ambiguous + /* vec_subc */ res_vlll = vec_subc(vlll, vlll); // CHECK: @llvm.ppc.altivec.vsubcuq @@ -150,11 +171,21 @@ res_vlll = vec_vsubecuq(vlll, vlll, vlll); // CHECK: @llvm.ppc.altivec.vsubecuq // CHECK-LE: @llvm.ppc.altivec.vsubecuq -// CHECK-PPC: error: assigning to '__vector __int128' (vector of 1 '__int128' value) from incompatible type 'int' +// CHECK-PPC: error: assigning to '__vector __int128' (vector of 1 '__int128' value) from incompatible type 'int' res_vulll = vec_vsubecuq(vulll, vulll, vulll); // CHECK: @llvm.ppc.altivec.vsubecuq // CHECK-LE: @llvm.ppc.altivec.vsubecuq +// CHECK-PPC: error: assigning to '__vector unsigned __int128' (vector of 1 'unsigned __int128' value) from incompatible type 'int' + + res_vlll = vec_subec(vlll, vlll, vlll); +// CHECK: @llvm.ppc.altivec.vsubecuq +// CHECK-LE: @llvm.ppc.altivec.vsubecuq +// CHECK-PPC: error: assigning to '__vector __int128' (vector of 1 '__int128' value) from incompatible type 'int' + + res_vulll = vec_subec(vulll, vulll, vulll); +// CHECK: @llvm.ppc.altivec.vsubecuq +// CHECK-LE: @llvm.ppc.altivec.vsubecuq // CHECK-PPC: error: assigning to '__vector unsigned __int128' (vector of 1 'unsigned __int128' value) from incompatible type 'int' } Index: test/CodeGen/builtins-ppc-vsx.c =================================================================== --- test/CodeGen/builtins-ppc-vsx.c +++ test/CodeGen/builtins-ppc-vsx.c @@ -69,6 +69,18 @@ // CHECK: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}}) // CHECK-LE: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}}) + res_vd = vec_abs(vd); +// CHECK: call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{[0-9]*}}) +// CHECK-LE: call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{[0-9]*}}) + + res_vf = vec_nabs(vf); +// CHECK: [[VEC:%[0-9]+]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{[0-9]*}}) +// CHECK-NEXT: fsub <4 x float> , [[VEC]] + + res_vd = vec_nabs(vd); +// CHECK: [[VECD:%[0-9]+]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{[0-9]*}}) +// CHECK: fsub <2 x double> , [[VECD]] + dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() @@ -1080,4 +1092,12 @@ // CHECK: fmul <2 x double> // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double> // CHECK-LE: fmul <2 x double> + + res_vf = vec_neg(vf); +// CHECK: fsub <4 x float> , {{%[0-9]+}} +// CHECK-LE: fsub <4 x float> , {{%[0-9]+}} + + res_vd = vec_neg(vd); +// CHECK: fsub <2 x double> , {{%[0-9]+}} +// CHECK-LE: fsub <2 x double> , {{%[0-9]+}} }