diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -793,12 +793,16 @@ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2563,6 +2563,14 @@ def : Pat<(f32 (ftrunc f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIZ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (frint f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIC + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; + +// Rounding for double precision. +def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>; +def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; } // Materialize a zero-vector of long long diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll --- a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll +++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll @@ -11,118 +11,34 @@ define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind { ; CHECK-LE-LABEL: splat_swap: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: mflr 0 -; CHECK-LE-NEXT: std 0, 16(1) -; CHECK-LE-NEXT: stdu 1, -80(1) -; CHECK-LE-NEXT: li 3, 64 -; CHECK-LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; CHECK-LE-NEXT: xvadddp 63, 34, 35 -; CHECK-LE-NEXT: xxlor 1, 63, 63 -; CHECK-LE-NEXT: bl rint -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: xxswapd 0, 63 -; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-LE-NEXT: li 3, 48 -; CHECK-LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; CHECK-LE-NEXT: fmr 1, 0 -; CHECK-LE-NEXT: bl rint -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: li 3, 48 -; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; CHECK-LE-NEXT: li 3, 64 -; CHECK-LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; CHECK-LE-NEXT: xxmrghd 0, 0, 1 +; CHECK-LE-NEXT: xvadddp 0, 34, 35 +; CHECK-LE-NEXT: xvrdpic 0, 0 ; CHECK-LE-NEXT: xxswapd 1, 0 ; CHECK-LE-NEXT: xssubdp 1, 1, 0 -; CHECK-LE-NEXT: addi 1, 1, 80 -; CHECK-LE-NEXT: ld 0, 16(1) -; CHECK-LE-NEXT: mtlr 0 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: splat_swap: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: mflr 0 -; CHECK-BE-NEXT: std 0, 16(1) -; CHECK-BE-NEXT: stdu 1, -160(1) -; CHECK-BE-NEXT: li 3, 144 -; CHECK-BE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; CHECK-BE-NEXT: xvadddp 63, 34, 35 -; CHECK-BE-NEXT: xxlor 1, 63, 63 -; CHECK-BE-NEXT: bl rint -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-BE-NEXT: li 3, 128 -; CHECK-BE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; CHECK-BE-NEXT: xxswapd 1, 63 -; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; CHECK-BE-NEXT: bl rint -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: li 3, 128 -; CHECK-BE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-BE-NEXT: li 3, 144 -; CHECK-BE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; CHECK-BE-NEXT: xxmrghd 0, 0, 1 +; CHECK-BE-NEXT: xvadddp 0, 34, 35 +; CHECK-BE-NEXT: xvrdpic 0, 0 ; CHECK-BE-NEXT: xxswapd 1, 0 ; CHECK-BE-NEXT: xssubdp 1, 0, 1 -; CHECK-BE-NEXT: addi 1, 1, 160 -; CHECK-BE-NEXT: ld 0, 16(1) -; CHECK-BE-NEXT: mtlr 0 ; CHECK-BE-NEXT: blr ; ; CHECK-P9LE-LABEL: splat_swap: ; CHECK-P9LE: # %bb.0: -; CHECK-P9LE-NEXT: mflr 0 -; CHECK-P9LE-NEXT: std 0, 16(1) -; CHECK-P9LE-NEXT: stdu 1, -64(1) -; CHECK-P9LE-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; CHECK-P9LE-NEXT: xvadddp 63, 34, 35 -; CHECK-P9LE-NEXT: xscpsgndp 1, 63, 63 -; CHECK-P9LE-NEXT: bl rint -; CHECK-P9LE-NEXT: nop -; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-P9LE-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; CHECK-P9LE-NEXT: xxswapd 1, 63 -; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; CHECK-P9LE-NEXT: bl rint -; CHECK-P9LE-NEXT: nop -; CHECK-P9LE-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-P9LE-NEXT: xxmrghd 0, 0, 1 -; CHECK-P9LE-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; CHECK-P9LE-NEXT: xvadddp 0, 34, 35 +; CHECK-P9LE-NEXT: xvrdpic 0, 0 ; CHECK-P9LE-NEXT: xxswapd 1, 0 ; CHECK-P9LE-NEXT: xssubdp 1, 1, 0 -; CHECK-P9LE-NEXT: addi 1, 1, 64 -; CHECK-P9LE-NEXT: ld 0, 16(1) -; CHECK-P9LE-NEXT: mtlr 0 ; CHECK-P9LE-NEXT: blr ; ; CHECK-P9BE-LABEL: splat_swap: ; CHECK-P9BE: # %bb.0: -; CHECK-P9BE-NEXT: mflr 0 -; CHECK-P9BE-NEXT: std 0, 16(1) -; CHECK-P9BE-NEXT: stdu 1, -144(1) -; CHECK-P9BE-NEXT: stxv 63, 128(1) # 16-byte Folded Spill -; CHECK-P9BE-NEXT: xvadddp 63, 34, 35 -; CHECK-P9BE-NEXT: xscpsgndp 1, 63, 63 -; CHECK-P9BE-NEXT: bl rint -; CHECK-P9BE-NEXT: nop -; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-P9BE-NEXT: stxv 1, 112(1) # 16-byte Folded Spill -; CHECK-P9BE-NEXT: xxswapd 1, 63 -; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; CHECK-P9BE-NEXT: bl rint -; CHECK-P9BE-NEXT: nop -; CHECK-P9BE-NEXT: lxv 0, 112(1) # 16-byte Folded Reload -; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-P9BE-NEXT: xxmrghd 0, 0, 1 -; CHECK-P9BE-NEXT: lxv 63, 128(1) # 16-byte Folded Reload +; CHECK-P9BE-NEXT: xvadddp 0, 34, 35 +; CHECK-P9BE-NEXT: xvrdpic 0, 0 ; CHECK-P9BE-NEXT: xxswapd 1, 0 ; CHECK-P9BE-NEXT: xssubdp 1, 0, 1 -; CHECK-P9BE-NEXT: addi 1, 1, 144 -; CHECK-P9BE-NEXT: ld 0, 16(1) -; CHECK-P9BE-NEXT: mtlr 0 ; CHECK-P9BE-NEXT: blr %added = fadd <2 x double> %x, %y %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll --- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll @@ -559,3 +559,47 @@ } declare float @llvm.ceil.f32(float) + +define dso_local double @test_rint(double %d) local_unnamed_addr { +; BE-LABEL: test_rint: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpic f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_rint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpic f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_rint: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpic f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call double @llvm.rint.f64(double %d) + ret double %0 +} + +declare double @llvm.rint.f64(double) + +define dso_local float @test_rintf(float %f) local_unnamed_addr { +; BE-LABEL: test_rintf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpic f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_rintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpic f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_rintf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpic f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call float @llvm.rint.f32(float %f) + ret float %0 +} + +declare float @llvm.rint.f32(float) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -4748,34 +4748,20 @@ define <1 x float> @constrained_vector_rint_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_rint_v1f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) ; PC64LE-NEXT: addis 3, 2, .LCPI75_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI75_0@toc@l(3) -; PC64LE-NEXT: bl rintf -; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 +; PC64LE-NEXT: lfs 0, .LCPI75_0@toc@l(3) +; PC64LE-NEXT: xsrdpic 0, 0 +; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_rint_v1f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI75_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI75_0@toc@l(3) -; PC64LE9-NEXT: bl rintf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: lfs 0, .LCPI75_0@toc@l(3) +; PC64LE9-NEXT: xsrdpic 0, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32( @@ -4788,50 +4774,19 @@ define <2 x double> @constrained_vector_rint_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_rint_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI76_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI76_0@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI76_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI76_1@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI76_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xvrdpic 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_rint_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI76_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI76_0@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI76_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI76_1@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI76_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 34, 0 ; PC64LE9-NEXT: blr entry: %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( @@ -4844,80 +4799,50 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_rint_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill -; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -48(1) -; PC64LE-NEXT: addis 3, 2, .LCPI77_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI77_0@toc@l(3) -; PC64LE-NEXT: bl rintf -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI77_1@toc@ha -; PC64LE-NEXT: fmr 31, 1 -; PC64LE-NEXT: lfs 1, .LCPI77_1@toc@l(3) -; PC64LE-NEXT: bl rintf -; PC64LE-NEXT: nop ; PC64LE-NEXT: addis 3, 2, .LCPI77_2@toc@ha -; PC64LE-NEXT: fmr 30, 1 -; PC64LE-NEXT: lfs 1, .LCPI77_2@toc@l(3) -; PC64LE-NEXT: bl rintf -; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 30 +; PC64LE-NEXT: addis 4, 2, .LCPI77_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI77_2@toc@l(3) +; PC64LE-NEXT: lfs 1, .LCPI77_1@toc@l(4) +; PC64LE-NEXT: addis 3, 2, .LCPI77_0@toc@ha +; PC64LE-NEXT: xsrdpic 0, 0 +; PC64LE-NEXT: lfs 2, .LCPI77_0@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI77_3@toc@ha -; PC64LE-NEXT: xscvdpspn 1, 1 +; PC64LE-NEXT: xsrdpic 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI77_3@toc@l +; PC64LE-NEXT: xsrdpic 2, 2 +; PC64LE-NEXT: xscvdpspn 0, 0 +; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 31 +; PC64LE-NEXT: xscvdpspn 0, 2 ; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: vmrglw 2, 3, 2 ; PC64LE-NEXT: lvx 3, 0, 3 ; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE-NEXT: vperm 2, 4, 2, 3 -; PC64LE-NEXT: addi 1, 1, 48 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload -; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload -; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_rint_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI77_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI77_0@toc@l(3) -; PC64LE9-NEXT: bl rintf -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfs 0, .LCPI77_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI77_1@toc@ha -; PC64LE9-NEXT: fmr 31, 1 ; PC64LE9-NEXT: lfs 1, .LCPI77_1@toc@l(3) -; PC64LE9-NEXT: bl rintf -; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI77_2@toc@ha -; PC64LE9-NEXT: fmr 30, 1 -; PC64LE9-NEXT: lfs 1, .LCPI77_2@toc@l(3) -; PC64LE9-NEXT: bl rintf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 -; PC64LE9-NEXT: xscvdpspn 0, 31 +; PC64LE9-NEXT: xsrdpic 0, 0 +; PC64LE9-NEXT: lfs 2, .LCPI77_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI77_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI77_3@toc@l +; PC64LE9-NEXT: xsrdpic 1, 1 +; PC64LE9-NEXT: xsrdpic 2, 2 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 1, 1 +; PC64LE9-NEXT: xscvdpspn 2, 2 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 +; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload -; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload -; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( @@ -4930,72 +4855,31 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_rint_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI78_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI78_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI78_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI78_0@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI78_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI78_1@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI78_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 -; PC64LE-NEXT: lfd 1, .LCPI78_2@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xsrdpic 3, 1 +; PC64LE-NEXT: xvrdpic 2, 0 +; PC64LE-NEXT: xxswapd 1, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_rint_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI78_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI78_0@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfd 0, .LCPI78_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI78_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI78_1@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI78_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 -; PC64LE9-NEXT: lfd 1, .LCPI78_2@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI78_1@toc@l +; PC64LE9-NEXT: xsrdpic 3, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 2, 0 +; PC64LE9-NEXT: xxswapd 1, 2 ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; PC64LE9-NEXT: blr entry: %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64( @@ -5008,86 +4892,28 @@ define <4 x double> @constrained_vector_rint_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_rint_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI79_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI79_0@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI79_1@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI79_1@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI79_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfd 1, .LCPI79_2@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI79_3@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI79_3@toc@l(3) -; PC64LE-NEXT: bl rint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI79_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI79_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: addi 3, 4, .LCPI79_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvrdpic 34, 0 +; PC64LE-NEXT: xvrdpic 35, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_rint_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI79_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI79_0@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI79_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI79_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI79_1@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI79_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfd 1, .LCPI79_2@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI79_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI79_3@toc@l(3) -; PC64LE9-NEXT: bl rint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI79_1@toc@l +; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 35, 0 ; PC64LE9-NEXT: blr entry: %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( diff --git a/llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vector-rounding-ops.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \ +; RUN: FileCheck %s --check-prefix=P9 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \ +; RUN: FileCheck %s +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ +; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST + +define dso_local <2 x double> @test_rint_v2f64(<2 x double> %d) local_unnamed_addr { +; P9-LABEL: test_rint_v2f64: +; P9: # %bb.0: # %entry +; P9-NEXT: xvrdpic v2, v2 +; P9-NEXT: blr +; +; CHECK-LABEL: test_rint_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrdpic v2, v2 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_rint_v2f64: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xvrdpic v2, v2 +; FAST-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %d) + ret <2 x double> %0 +} + +declare <2 x double> @llvm.rint.v2f64(<2 x double>) + + +define dso_local <4 x float> @test_rint_v4f32(<4 x float> %d) local_unnamed_addr { +; P9-LABEL: test_rint_v4f32: +; P9: # %bb.0: # %entry +; P9-NEXT: xvrspic v2, v2 +; P9-NEXT: blr +; +; CHECK-LABEL: test_rint_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrspic v2, v2 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_rint_v4f32: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xvrspic v2, v2 +; FAST-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %d) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.rint.v4f32(<4 x float>)