diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14273,8 +14273,8 @@ else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || BuiltinID == PPC::BI__builtin_vsx_xvrspic) ID = Builder.getIsFPConstrained() - ? Intrinsic::experimental_constrained_nearbyint - : Intrinsic::nearbyint; + ? Intrinsic::experimental_constrained_rint + : Intrinsic::rint; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || BuiltinID == PPC::BI__builtin_vsx_xvrspip) ID = Builder.getIsFPConstrained() diff --git a/clang/test/CodeGen/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/builtins-ppc-fpconstrained.c --- a/clang/test/CodeGen/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/builtins-ppc-fpconstrained.c @@ -59,14 +59,14 @@ vf = __builtin_vsx_xvrspic(vf); // CHECK-LABEL: try-xvrspic - // CHECK-UNCONSTRAINED: @llvm.nearbyint.v4f32(<4 x float> %{{.*}}) - // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-UNCONSTRAINED: @llvm.rint.v4f32(<4 x float> %{{.*}}) + // CHECK-CONSTRAINED: @llvm.experimental.constrained.rint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: xvrspic vd = __builtin_vsx_xvrdpic(vd); // CHECK-LABEL: try-xvrdpic - // CHECK-UNCONSTRAINED: @llvm.nearbyint.v2f64(<2 x double> %{{.*}}) - // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-UNCONSTRAINED: @llvm.rint.v2f64(<2 x double> %{{.*}}) + // CHECK-CONSTRAINED: @llvm.experimental.constrained.rint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: xvrdpic vf = __builtin_vsx_xvrspip(vf); diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -863,12 +863,12 @@ // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double> res_vf = vec_rint(vf); -// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) -// CHECK-LE: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}}) +// CHECK: call <4 x float> @llvm.rint.v4f32(<4 x float> %{{[0-9]+}}) +// CHECK-LE: call <4 x float> @llvm.rint.v4f32(<4 x float> %{{[0-9]+}}) res_vd = vec_rint(vd); -// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) -// CHECK-LE: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}}) +// CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{[0-9]+}}) +// CHECK-LE: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{[0-9]+}}) res_vf = vec_rsqrte(vf); // CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}}) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -316,8 +316,10 @@ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); - if (Subtarget.hasVSX()) - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal); + if (Subtarget.hasVSX()) { + setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); + } if (Subtarget.hasFSQRT()) { setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); @@ -1059,7 +1061,7 @@ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); @@ -1073,7 +1075,7 @@ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -890,15 +890,15 @@ def XSRDPIC : XX2Form<60, 107, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpic $XT, $XB", IIC_VecFP, - [(set f64:$XT, (any_fnearbyint f64:$XB))]>; + [(set f64:$XT, (fnearbyint f64:$XB))]>; def XVRDPIC : XX2Form<60, 235, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpic $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>; + [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; def XVRSPIC : XX2Form<60, 171, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspic $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>; + [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; // Max/Min Instructions let isCommutable = 1 in { def XSMAXDP : XX3Form<60, 160, @@ -2681,7 +2681,7 @@ def : Pat<(f32 (any_fround f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (any_fnearbyint f32:$S)), +def : Pat<(f32 (fnearbyint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; def : Pat<(f32 (any_ffloor f32:$S)), @@ -2696,11 +2696,11 @@ def : Pat<(f32 (any_frint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; +def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; // Rounding for double precision. -def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>; -def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; +def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>; +def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; // Materialize a zero-vector of long long def : Pat<(v2i64 immAllZerosV), diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -170,12 +170,30 @@ define double @nearbyint_f64(double %f1, double %f2) { ; P8-LABEL: nearbyint_f64: ; P8: # %bb.0: -; P8-NEXT: xsrdpic f1, f1 +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl nearbyint +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_f64: ; P9: # %bb.0: -; P9-NEXT: xsrdpic f1, f1 +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl nearbyint +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 ; P9-NEXT: blr %res = call double @llvm.experimental.constrained.nearbyint.f64( double %f1, @@ -187,12 +205,104 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) { ; P8-LABEL: nearbyint_v4f32: ; P8: # %bb.0: -; P8-NEXT: xvrspic v2, v2 +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -176(r1) +; P8-NEXT: .cfi_def_cfa_offset 176 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset v30, -32 +; P8-NEXT: .cfi_offset v31, -16 +; P8-NEXT: xxsldwi vs0, v2, v2, 3 +; P8-NEXT: li r3, 144 +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 160 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v2 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: xxsldwi vs0, v31, v31, 1 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: li r3, 128 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs0, vs1, vs0 +; P8-NEXT: xscvspdpn f1, v31 +; P8-NEXT: xvcvdpsp v30, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: xxswapd vs0, v31 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl nearbyintf +; P8-NEXT: nop +; P8-NEXT: li r3, 128 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 160 +; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 144 +; P8-NEXT: xxmrghd vs0, vs0, vs1 +; P8-NEXT: xvcvdpsp v2, vs0 +; P8-NEXT: vmrgew v2, v2, v30 +; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 176 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v4f32: ; P9: # %bb.0: -; P9-NEXT: xvrspic v2, v2 +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -80(r1) +; P9-NEXT: .cfi_def_cfa_offset 80 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: .cfi_offset v30, -32 +; P9-NEXT: .cfi_offset v31, -16 +; P9-NEXT: xxsldwi vs0, v2, v2, 3 +; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; P9-NEXT: vmr v31, v2 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: xxsldwi vs0, v31, v31, 1 +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: xxmrghd vs0, vs1, vs0 +; P9-NEXT: xscvspdpn f1, v31 +; P9-NEXT: xvcvdpsp v30, vs0 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: xxswapd vs0, v31 +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: bl nearbyintf +; P9-NEXT: nop +; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload +; P9-NEXT: xxmrghd vs0, vs0, vs1 +; P9-NEXT: xvcvdpsp v2, vs0 +; P9-NEXT: vmrgew v2, v2, v30 +; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; P9-NEXT: addi r1, r1, 80 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 ; P9-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( <4 x float> %vf1, @@ -204,12 +314,62 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) { ; P8-LABEL: nearbyint_v2f64: ; P8: # %bb.0: -; P8-NEXT: xvrdpic v2, v2 +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -160(r1) +; P8-NEXT: .cfi_def_cfa_offset 160 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset v31, -16 +; P8-NEXT: li r3, 144 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v2 +; P8-NEXT: xxlor f1, v31, v31 +; P8-NEXT: bl nearbyint +; P8-NEXT: nop +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xxswapd vs1, v31 +; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; P8-NEXT: bl nearbyint +; P8-NEXT: nop +; P8-NEXT: li r3, 128 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 144 +; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd v2, vs0, vs1 +; P8-NEXT: addi r1, r1, 160 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v2f64: ; P9: # %bb.0: -; P9-NEXT: xvrdpic v2, v2 +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -64(r1) +; P9-NEXT: .cfi_def_cfa_offset 64 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: .cfi_offset v31, -16 +; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill +; P9-NEXT: vmr v31, v2 +; P9-NEXT: xscpsgndp f1, v31, v31 +; P9-NEXT: bl nearbyint +; P9-NEXT: nop +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xxswapd vs1, v31 +; P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; P9-NEXT: bl nearbyint +; P9-NEXT: nop +; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload +; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload +; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: xxmrghd v2, vs0, vs1 +; P9-NEXT: addi r1, r1, 64 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 ; P9-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( <2 x double> %vf1, diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -4899,19 +4899,50 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpic 34, 0 +; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: addi 1, 1, 64 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -5010,31 +5041,72 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpic 3, 1 -; PC64LE-NEXT: xvrdpic 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE9-NEXT: xsrdpic 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( @@ -5047,28 +5119,86 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l -; PC64LE-NEXT: lxvd2x 1, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvrdpic 35, 0 -; PC64LE-NEXT: xvrdpic 34, 1 +; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: vmr 2, 31 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l -; PC64LE9-NEXT: xvrdpic 35, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: vmr 2, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(