diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -8413,9 +8413,20 @@ } #ifdef __VSX__ +#ifdef __XL_COMPAT_ALTIVEC__ +static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a); +static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) { + double __fpscr = __builtin_readflm(); + __builtin_setrnd(0); + vector double __rounded = vec_rint(__a); + __builtin_setflm(__fpscr); + return __rounded; +} +#else static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) { return __builtin_vsx_xvrdpi(__a); } +#endif /* vec_rint */ diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -409,10 +409,6 @@ // CHECK: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float> // CHECK-LE: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float> - res_vd = vec_round(vd); -// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> -// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double> - res_vd = vec_perm(vd, vd, vuc); // CHECK: @llvm.ppc.altivec.vperm // CHECK-LE: @llvm.ppc.altivec.vperm diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat.c b/clang/test/CodeGen/builtins-ppc-xlcompat.c --- a/clang/test/CodeGen/builtins-ppc-xlcompat.c +++ b/clang/test/CodeGen/builtins-ppc-xlcompat.c @@ -5,11 +5,16 @@ // RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ // RUN: -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \ // RUN: -D__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \ +// RUN: -U__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck \ +// RUN: --check-prefix=NOCOMPAT %s #include vector double vd = { 3.4e22, 1.8e-3 }; vector signed long long vsll = { -12345678999ll, 12345678999 }; vector unsigned long long vull = { 11547229456923630743llu, 18014402265226391llu }; vector float res_vf; +vector double res_vd; vector signed int res_vsi; vector unsigned int res_vui; @@ -38,4 +43,11 @@ // CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* @vd, align 16 // CHECK-NEXT: fmul <2 x double> [[TMP8]], // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double> + + res_vd = vec_round(vd); +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call double @llvm.ppc.setrnd(i32 0) +// CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double> +// CHECK: call double @llvm.ppc.setflm(double +// NOCOMPAT: call <2 x double> @llvm.round.v2f64(<2 x double> } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12122,6 +12122,7 @@ MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); + MachineRegisterInfo &MRI = F->getRegInfo(); if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || @@ -12727,7 +12728,10 @@ Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. - BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); + if (MRI.use_empty(OldFPSCRReg)) + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg); + else + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); // The floating point rounding mode is in the bits 62:63 of FPCSR, and has // the following settings: @@ -12860,7 +12864,10 @@ // Result of setflm is previous FPSCR content, so we need to save it first. Register OldFPSCRReg = MI.getOperand(0).getReg(); - BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); + if (MRI.use_empty(OldFPSCRReg)) + BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg); + else + BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); // Put bits in 32:63 to FPSCR. Register NewFPSCRReg = MI.getOperand(1).getReg(); diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll --- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll +++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: xsdivdp 1, 1, 2 ; CHECK-NEXT: xsadddp 1, 1, 3 ; CHECK-NEXT: xsadddp 0, 1, 0 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: mtfsf 255, 4 ; CHECK-NEXT: xsdivdp 1, 3, 4 ; CHECK-NEXT: xsadddp 1, 1, 2 @@ -47,7 +46,6 @@ ; CHECK-NEXT: xsdivdp 1, 1, 2 ; CHECK-NEXT: xsadddp 1, 1, 3 ; CHECK-NEXT: xsadddp 0, 1, 0 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: mtfsf 255, 4 ; CHECK-NEXT: xsdivdp 1, 3, 4 ; CHECK-NEXT: xsadddp 1, 1, 2 @@ -96,7 +94,6 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: stfd 0, 0(30) -; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsf 255, 31 ; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: ld 0, 16(1) @@ -134,7 +131,6 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: stfd 0, 0(30) -; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsf 255, 31 ; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: ld 0, 16(1)