diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -151,9 +151,11 @@ TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector") BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "") BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "") +BUILTIN(__builtin_ppc_mffs, "d", "") TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions") BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "") BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "") +BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "") TARGET_BUILTIN(__builtin_ppc_insert_exp, "ddULLi", "", "power9-vector") BUILTIN(__builtin_ppc_fmsub, "dddd", "") BUILTIN(__builtin_ppc_fmsubs, "ffff", "") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17009,6 +17009,11 @@ Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateFDiv(Op0, Op1, "swdiv"); } + case PPC::BI__builtin_ppc_set_fpscr_rn: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), + {EmitScalarExpr(E->getArg(0))}); + case PPC::BI__builtin_ppc_mffs: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); } } diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -14,7 +14,7 @@ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that - makes explicit use of Intel intrinsics to powerp64/powerpc64le. + makes explicit use of Intel intrinsics to powerpc64/powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. @@ -68,10 +68,10 @@ __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -83,10 +83,15 @@ switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -102,7 +107,7 @@ This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -128,9 +133,14 @@ */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128d)__r; } @@ -159,10 +169,10 @@ __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -174,10 +184,15 @@ switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -193,7 +208,7 @@ This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -219,9 +234,14 @@ */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128)__r; } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c @@ -1,5 +1,8 @@ // REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ +// RUN: | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ +// RUN: -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK void test_eh_return_data_regno() { @@ -26,6 +29,9 @@ // CHECK: call double @llvm.ppc.setrnd(i32 %2) res = __builtin_setrnd(x); + + // CHECK: call double @llvm.ppc.setrnd(i32 %4) + res = __builtin_ppc_set_fpscr_rn(x); } void test_builtin_ppc_flm() { @@ -33,7 +39,10 @@ // CHECK: call double @llvm.ppc.readflm() res = __builtin_readflm(); - // CHECK: call double @llvm.ppc.setflm(double %1) + // CHECK: call double @llvm.ppc.readflm() + res = __builtin_ppc_mffs(); + + // CHECK: call double @llvm.ppc.setflm(double %2) res = __builtin_setflm(res); #ifdef _ARCH_PWR9 diff --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c --- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c @@ -239,44 +239,48 @@ // CHECK-LABEL: @test_round // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) -// CHECK: call signext i32 @__builtin_mffs() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_floor(float vector[4]) // CHECK: call <4 x float> @vec_ceil(float vector[4]) // CHECK: call <4 x float> @vec_trunc(float vector[4]) // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 // CHECK-LABEL: define available_externally <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) -// CHECK: call signext i32 @__builtin_mffs() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) // CHECK: call <2 x double> @vec_floor(double vector[2]) // CHECK: call <2 x double> @vec_ceil(double vector[2]) // CHECK: call <2 x double> @vec_trunc(double vector[2]) // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: call <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})