Index: include/clang/Basic/BuiltinsPPC.def =================================================================== --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -297,9 +297,13 @@ BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") BUILTIN(__builtin_vsx_lxvw4x, "V4iivC*", "") +BUILTIN(__builtin_vsx_lxvd2x_be, "V2dSLLivC*", "") +BUILTIN(__builtin_vsx_lxvw4x_be, "V4iSLLivC*", "") BUILTIN(__builtin_vsx_stxvd2x, "vV2div*", "") BUILTIN(__builtin_vsx_stxvw4x, "vV4iiv*", "") +BUILTIN(__builtin_vsx_stxvd2x_be, "vV2dSLLivC*", "") +BUILTIN(__builtin_vsx_stxvw4x_be, "vV4iSLLivC*", "") BUILTIN(__builtin_vsx_xvmaxdp, "V2dV2dV2d", "") BUILTIN(__builtin_vsx_xvmaxsp, "V4fV4fV4f", "") Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7935,7 +7935,7 @@ case PPC::BI__builtin_ppc_get_timebase: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); - // vec_ld, vec_lvsl, vec_lvsr + // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr case PPC::BI__builtin_altivec_lvx: case PPC::BI__builtin_altivec_lvxl: case PPC::BI__builtin_altivec_lvebx: @@ -7945,6 +7945,8 @@ case PPC::BI__builtin_altivec_lvsr: case PPC::BI__builtin_vsx_lxvd2x: case PPC::BI__builtin_vsx_lxvw4x: + case PPC::BI__builtin_vsx_lxvd2x_be: + case PPC::BI__builtin_vsx_lxvw4x_be: { Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); @@ -7980,12 +7982,18 @@ case PPC::BI__builtin_vsx_lxvw4x: ID = Intrinsic::ppc_vsx_lxvw4x; break; + case PPC::BI__builtin_vsx_lxvd2x_be: + ID = Intrinsic::ppc_vsx_lxvd2x_be; + break; + case PPC::BI__builtin_vsx_lxvw4x_be: + ID = Intrinsic::ppc_vsx_lxvw4x_be; + break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } - // vec_st + // vec_st, vec_xst_be case PPC::BI__builtin_altivec_stvx: case PPC::BI__builtin_altivec_stvxl: case PPC::BI__builtin_altivec_stvebx: @@ -7993,6 +8001,8 @@ case PPC::BI__builtin_altivec_stvewx: case PPC::BI__builtin_vsx_stxvd2x: case PPC::BI__builtin_vsx_stxvw4x: + case PPC::BI__builtin_vsx_stxvd2x_be: + case PPC::BI__builtin_vsx_stxvw4x_be: { Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); @@ -8021,6 +8031,12 @@ case PPC::BI__builtin_vsx_stxvw4x: ID = Intrinsic::ppc_vsx_stxvw4x; break; + case PPC::BI__builtin_vsx_stxvd2x_be: + ID = Intrinsic::ppc_vsx_stxvd2x_be; + break; + case PPC::BI__builtin_vsx_stxvw4x_be: + ID = Intrinsic::ppc_vsx_stxvw4x_be; + break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -15673,6 +15673,82 @@ } #endif +/* vec_xl_be */ + +#ifdef __LITTLE_ENDIAN__ +static __inline__ vector signed char __ATTRS_o_ai +vec_xl_be(signed long long __offset, signed char *__ptr) { + vector signed char __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, + 13, 12, 11, 10, 9, 8); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_xl_be(signed long long __offset, unsigned char *__ptr) { + vector unsigned char __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, + 13, 12, 11, 10, 9, 8); +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_xl_be(signed long long __offset, signed short *__ptr) { + vector signed short __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_xl_be(signed long long __offset, unsigned short *__ptr) { + vector unsigned short __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); +} + +static __inline__ vector signed int __ATTRS_o_ai +vec_xl_be(signed long long __offset, signed int *__ptr) { + return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_xl_be(signed long long __offset, unsigned int *__ptr) { + return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr); +} + +static __inline__ vector float __ATTRS_o_ai +vec_xl_be(signed long long __offset, float *__ptr) { + return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr); +} + +#ifdef __VSX__ +static __inline__ vector signed long long __ATTRS_o_ai +vec_xl_be(signed long long __offset, signed long long *__ptr) { + return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_xl_be(signed long long __offset, unsigned long long *__ptr) { + return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); +} + +static __inline__ vector double __ATTRS_o_ai +vec_xl_be(signed long long __offset, double *__ptr) { + return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr); +} +#endif + +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_xl_be(signed long long __offset, signed __int128 *__ptr) { + return vec_xl(__offset, __ptr); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { + return vec_xl(__offset, __ptr); +} +#endif +#else + #define vec_xl_be vec_xl +#endif + /* vec_xst */ static inline __ATTRS_o_ai void vec_xst(vector signed char __vec, @@ -15750,6 +15826,99 @@ *(vector unsigned __int128 *)(__ptr + __offset) = __vec; } #endif + +/* vec_xst_be */ + +#ifdef __LITTLE_ENDIAN__ +static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed char __vec, + signed long long __offset, + signed char *__ptr) { + vector signed char __tmp = + __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, + 13, 12, 11, 10, 9, 8); + __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec, + signed long long __offset, + unsigned char *__ptr) { + vector unsigned char __tmp = + __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, + 13, 12, 11, 10, 9, 8); + __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec, + signed long long __offset, + signed short *__ptr) { + vector signed short __tmp = + __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); + __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec, + signed long long __offset, + unsigned short *__ptr) { + vector unsigned short __tmp = + __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); + __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec, + signed long long __offset, + signed int *__ptr) { + __builtin_vsx_stxvw4x_be(__vec, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned int __vec, + signed long long __offset, + unsigned int *__ptr) { + __builtin_vsx_stxvw4x_be(__vec, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector float __vec, + signed long long __offset, + float *__ptr) { + __builtin_vsx_stxvw4x_be(__vec, __offset, __ptr); +} + +#ifdef __VSX__ +static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed long long __vec, + signed long long __offset, + signed long long *__ptr) { + __builtin_vsx_stxvd2x_be(__vec, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned long long __vec, + signed long long __offset, + unsigned long long *__ptr) { + __builtin_vsx_stxvd2x_be(__vec, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector double __vec, + signed long long __offset, + double *__ptr) { + __builtin_vsx_stxvd2x_be(__vec, __offset, __ptr); +} +#endif + +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed __int128 __vec, + signed long long __offset, + signed __int128 *__ptr) { + vec_xst(__vec, __offset, __ptr); +} + +static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned __int128 __vec, + signed long long __offset, + unsigned __int128 *__ptr) { + vec_xst(__vec, __offset, __ptr); +} +#endif +#else + #define vec_xst_be vec_xst +#endif + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ Index: test/CodeGen/builtins-ppc-altivec.c =================================================================== --- test/CodeGen/builtins-ppc-altivec.c +++ test/CodeGen/builtins-ppc-altivec.c @@ -9259,7 +9259,7 @@ // CHECK-LE: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 16 } -/* ------------------------------ vec_xst ------------------------------------ */ +/* ------------------------------ vec_xst ----------------------------------- */ void test10() { // CHECK-LABEL: define void @test10 // CHECK-LE-LABEL: define void @test10 @@ -9291,3 +9291,78 @@ // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 16 // CHECK-LE: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 16 } + +/* ----------------------------- vec_xl_be ---------------------------------- */ +void test11() { + // CHECK-LABEL: define void @test11 + // CHECK-LE-LABEL: define void @test11 + res_vsc = vec_xl_be(param_sll, ¶m_sc); + // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> + + res_vuc = vec_xl_be(param_sll, ¶m_uc); + // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> + + res_vs = vec_xl_be(param_sll, ¶m_s); + // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> + + res_vus = vec_xl_be(param_sll, ¶m_us); + // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> + + res_vi = vec_xl_be(param_sll, ¶m_i); + // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) + + res_vui = vec_xl_be(param_sll, ¶m_ui); + // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) + + res_vf = vec_xl_be(param_sll, ¶m_f); + // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 16 + // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) +} + +/* ----------------------------- vec_xst_be --------------------------------- */ +void test12() { + // CHECK-LABEL: define void @test12 + // CHECK-LE-LABEL: define void @test12 + vec_xst_be(vsc, param_sll, ¶m_sc); + // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> + // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) + + vec_xst_be(vuc, param_sll, ¶m_uc); + // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> + // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) + + vec_xst_be(vs, param_sll, ¶m_s); + // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> + // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) + + vec_xst_be(vus, param_sll, ¶m_us); + // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> + // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) + + vec_xst_be(vi, param_sll, ¶m_i); + // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) + + vec_xst_be(vui, param_sll, ¶m_ui); + // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) + + vec_xst_be(vf, param_sll, ¶m_f); + // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 16 + // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) +} + Index: test/CodeGen/builtins-ppc-quadword.c =================================================================== --- test/CodeGen/builtins-ppc-quadword.c +++ test/CodeGen/builtins-ppc-quadword.c @@ -193,4 +193,26 @@ // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 // CHECK-PPC: error: call to 'vec_xst' is ambiguous + + /* vec_xl_be */ + res_vlll = vec_xl_be(param_sll, ¶m_lll); + // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-PPC: error: call to 'vec_xl' is ambiguous + + res_vulll = vec_xl_be(param_sll, ¶m_ulll); + // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-PPC: error: call to 'vec_xl' is ambiguous + + /* vec_xst_be */ + vec_xst_be(vlll, param_sll, ¶m_lll); + // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-PPC: error: call to 'vec_xst' is ambiguous + + vec_xst_be(vulll, param_sll, ¶m_ulll); + // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK-PPC: error: call to 'vec_xst' is ambiguous } Index: test/CodeGen/builtins-ppc-vsx.c =================================================================== --- test/CodeGen/builtins-ppc-vsx.c +++ test/CodeGen/builtins-ppc-vsx.c @@ -1273,4 +1273,28 @@ vec_xst(vd, sll, ad); // CHECK: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 16 // CHECK-LE: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 16 + +res_vsll = vec_xl_be(sll, asll); +// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + +res_vull = vec_xl_be(sll, aull); +// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + +res_vd = vec_xl_be(sll, ad); +// CHECK: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 16 +// CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) + +vec_xst_be(vsll, sll, asll); +// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) + +vec_xst_be(vull, sll, aull); +// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) + +vec_xst_be(vd, sll, ad); +// CHECK: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 16 +// CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) }