Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -15034,6 +15034,120 @@ #endif #endif +static __inline__ vector bool char __ATTRS_o_ai +vec_revb(vector bool char __a) { + return __a; +} + +static __inline__ vector signed char __ATTRS_o_ai +vec_revb(vector signed char __a) { + return __a; +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_revb(vector unsigned char __a) { + return __a; +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_revb(vector bool short __a) { + vector unsigned char __indices = + { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_revb(vector signed short __a) { + vector unsigned char __indices = + { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_revb(vector unsigned short __a) { + vector unsigned char __indices = + { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_revb(vector bool int __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector signed int __ATTRS_o_ai +vec_revb(vector signed int __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_revb(vector unsigned int __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector float __ATTRS_o_ai +vec_revb(vector float __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +#ifdef __VSX__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_revb(vector bool long long __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_revb(vector signed long long __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_revb(vector unsigned long long __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector double __ATTRS_o_ai +vec_revb(vector double __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} +#endif /* End __VSX__ */ + +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_revb(vector signed __int128 __a) { + vector unsigned char __indices = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + return (vector signed __int128)vec_perm((vector signed int)__a, + (vector signed int)__a, + __indices); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_revb(vector unsigned __int128 __a) { + vector unsigned char __indices = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + return (vector unsigned __int128)vec_perm((vector signed int)__a, + (vector signed int)__a, + __indices); +} +#endif /* END __POWER8_VECTOR__ && __powerpc64__ */ + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ Index: test/CodeGen/builtins-ppc-altivec.c =================================================================== --- test/CodeGen/builtins-ppc-altivec.c +++ test/CodeGen/builtins-ppc-altivec.c @@ -8996,3 +8996,96 @@ // CHECK: @llvm.ppc.altivec.vcmpgefp.p(i32 2 // CHECK-LE: @llvm.ppc.altivec.vcmpgefp.p(i32 2 } + +/* ------------------------------ optional -------------------------------------- */ +// CHECK-LABEL: define void @test8 +// CHECK-LE-LABEL: define void @test8 +void test8() { + + res_vbc = vec_revb(vbc); +// CHECK: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vbc, align 16 +// CHECK: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK: store <16 x i8> [[T3]], <16 x i8>* @res_vbc, align 16 +// CHECK-LE: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vbc, align 16 +// CHECK-LE: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK-LE: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK-LE: store <16 x i8> [[T3]], <16 x i8>* @res_vbc, align 16 + + res_vsc = vec_revb(vsc); +// CHECK: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vsc, align 16 +// CHECK: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK: store <16 x i8> [[T3]], <16 x i8>* @res_vsc, align 16 +// CHECK-LE: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vsc, align 16 +// CHECK-LE: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK-LE: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK-LE: store <16 x i8> [[T3]], <16 x i8>* @res_vsc, align 16 + + res_vuc = vec_revb(vuc); +// CHECK: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vuc, align 16 +// CHECK: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK: store <16 x i8> [[T3]], <16 x i8>* @res_vuc, align 16 +// CHECK-LE: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vuc, align 16 +// CHECK-LE: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK-LE: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK-LE: store <16 x i8> [[T3]], <16 x i8>* @res_vuc, align 16 + + res_vbs = vec_revb(vbs); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vs = vec_revb(vs); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vus = vec_revb(vus); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vbi = vec_revb(vbi); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: {{%.+}} = load <4 x i32>, <4 x i32>* @vbi, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vi = vec_revb(vi); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vui = vec_revb(vui); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vf = vec_revb(vf); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +} Index: test/CodeGen/builtins-ppc-quadword.c =================================================================== --- test/CodeGen/builtins-ppc-quadword.c +++ test/CodeGen/builtins-ppc-quadword.c @@ -157,4 +157,12 @@ // CHECK-LE: @llvm.ppc.altivec.vsubecuq // CHECK-PPC: error: assigning to '__vector unsigned __int128' (vector of 1 'unsigned __int128' value) from incompatible type 'int' + res_vulll = vec_revb(vulll); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK_PPC: error: call to 'vec_revb' is ambiguous } Index: test/CodeGen/builtins-ppc-vsx.c =================================================================== --- test/CodeGen/builtins-ppc-vsx.c +++ test/CodeGen/builtins-ppc-vsx.c @@ -1080,4 +1080,36 @@ // CHECK: fmul <2 x double> // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double> // CHECK-LE: fmul <2 x double> + + res_vbll = vec_revb(vbll); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vsll = vec_revb(vsll); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vull = vec_revb(vull); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vd = vec_revb(vd); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: {{%.+}} = xor <16 x i8> +// CHECK-LE: {{%.+}} = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) }