diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -390,6 +390,16 @@ setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::LRINT, MVT::f64, Legal); + setOperationAction(ISD::LRINT, MVT::f32, Legal); + setOperationAction(ISD::LLRINT, MVT::f64, Legal); + setOperationAction(ISD::LLRINT, MVT::f32, Legal); + setOperationAction(ISD::LROUND, MVT::f64, Legal); + setOperationAction(ISD::LROUND, MVT::f32, Legal); + setOperationAction(ISD::LLROUND, MVT::f64, Legal); + setOperationAction(ISD::LLROUND, MVT::f32, Legal); + } } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); @@ -777,8 +787,13 @@ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2526,6 +2526,43 @@ // (move to FPR, nothing else needed) def : Pat<(f64 (bitconvert i64:$S)), (f64 (MTVSRD $S))>; + +// Rounding to integer. +def : Pat<(i64 (lrint f64:$S)), + (i64 (MFVSRD (FCTID $S)))>; +def : Pat<(i64 (lrint f32:$S)), + (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; +def : Pat<(i64 (llrint f64:$S)), + (i64 (MFVSRD (FCTID $S)))>; +def : Pat<(i64 (llrint f32:$S)), + (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; +def : Pat<(i64 (lround f64:$S)), + (i64 (MFVSRD (FCTID (XSRDPI $S))))>; +def : Pat<(i64 (lround f32:$S)), + (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +def : Pat<(i64 (llround f64:$S)), + (i64 (MFVSRD (FCTID (XSRDPI $S))))>; +def : Pat<(i64 (llround f32:$S)), + (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +} + +let Predicates = [HasVSX] in { +// Rounding for single precision. +def : Pat<(f32 (fround f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPI + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (fnearbyint f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIC + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (ffloor f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIM + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (fceil f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIP + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (ftrunc f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIZ + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; } // Materialize a zero-vector of long long diff --git a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll --- a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll @@ -29,7 +29,7 @@ define float @f_i128_fi_nsz(float %v) #0 { ; CHECK-LABEL: f_i128_fi_nsz: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: friz 0, 1 +; CHECK-NEXT: xsrdpiz 0, 1 ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/rounding-ops.ll b/llvm/test/CodeGen/PowerPC/rounding-ops.ll --- a/llvm/test/CodeGen/PowerPC/rounding-ops.ll +++ b/llvm/test/CodeGen/PowerPC/rounding-ops.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: test1: ; CHECK: frim 1, 1 ; CHECK-VSX-LABEL: test1: -; CHECK-VSX: frim 1, 1 +; CHECK-VSX: xsrdpim 1, 1 } declare float @floorf(float) nounwind readnone @@ -34,7 +34,7 @@ ; CHECK-LABEL: test3: ; CHECK: frin 1, 1 ; CHECK-VSX-LABEL: test3: -; CHECK-VSX: frin 1, 1 +; CHECK-VSX: xsrdpi 1, 1 } declare float @roundf(float) nounwind readnone @@ -58,7 +58,7 @@ ; CHECK-LABEL: test5: ; CHECK: frip 1, 1 ; CHECK-VSX-LABEL: test5: -; CHECK-VSX: frip 1, 1 +; CHECK-VSX: xsrdpip 1, 1 } declare float @ceilf(float) nounwind readnone @@ -82,7 +82,7 @@ ; CHECK-LABEL: test9: ; CHECK: friz 1, 1 ; CHECK-VSX-LABEL: test9: -; CHECK-VSX: friz 1, 1 +; CHECK-VSX: xsrdpiz 1, 1 } declare float @truncf(float) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | \ +; RUN: FileCheck %s --check-prefix=BE +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \ +; RUN: FileCheck %s +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ +; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST +define dso_local i64 @test_lrint(double %d) local_unnamed_addr { +; BE-LABEL: test_lrint: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_lrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_lrint: +; FAST: # %bb.0: # %entry +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.lrint.i64.f64(double %d) + ret i64 %0 +} + +declare i64 @llvm.lrint.i64.f64(double) + +define dso_local i64 @test_lrintf(float %f) local_unnamed_addr { +; BE-LABEL: test_lrintf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_lrintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_lrintf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.lrint.i64.f32(float %f) + ret i64 %0 +} + +declare i64 @llvm.lrint.i64.f32(float) + +define dso_local i64 @test_llrint(double %d) local_unnamed_addr { +; BE-LABEL: test_llrint: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_llrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_llrint: +; FAST: # %bb.0: # %entry +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.llrint.i64.f64(double %d) + ret i64 %0 +} + +declare i64 @llvm.llrint.i64.f64(double) + +define dso_local i64 @test_llrintf(float %f) local_unnamed_addr { +; BE-LABEL: test_llrintf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_llrintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_llrintf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.llrint.i64.f32(float %f) + ret i64 %0 +} + +declare i64 @llvm.llrint.i64.f32(float) + +define dso_local i64 @test_lround(double %d) local_unnamed_addr { +; BE-LABEL: test_lround: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lround +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_lround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lround +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_lround: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpi f0, f1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.lround.i64.f64(double %d) + ret i64 %0 +} + +declare i64 @llvm.lround.i64.f64(double) + +define dso_local i64 @test_lroundf(float %f) local_unnamed_addr { +; BE-LABEL: test_lroundf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lroundf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_lroundf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lroundf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_lroundf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpi f0, f1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.lround.i64.f32(float %f) + ret i64 %0 +} + +declare i64 @llvm.lround.i64.f32(float) + +define dso_local i64 @test_llround(double %d) local_unnamed_addr { +; BE-LABEL: test_llround: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llround +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_llround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llround +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_llround: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpi f0, f1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.llround.i64.f64(double %d) + ret i64 %0 +} + +declare i64 @llvm.llround.i64.f64(double) + +define dso_local i64 @test_llroundf(float %f) local_unnamed_addr { +; BE-LABEL: test_llroundf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llroundf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_llroundf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llroundf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_llroundf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpi f0, f1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr +entry: + %0 = tail call i64 @llvm.llround.i64.f32(float %f) + ret i64 %0 +} + +declare i64 @llvm.llround.i64.f32(float) + +define dso_local double @test_nearbyint(double %d) local_unnamed_addr { +; BE-LABEL: test_nearbyint: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpic f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_nearbyint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpic f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_nearbyint: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpic f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call double @llvm.nearbyint.f64(double %d) + ret double %0 +} + +declare double @llvm.nearbyint.f64(double) + +define dso_local float @test_nearbyintf(float %f) local_unnamed_addr { +; BE-LABEL: test_nearbyintf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpic f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_nearbyintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpic f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_nearbyintf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpic f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call float @llvm.nearbyint.f32(float %f) + ret float %0 +} + +declare float @llvm.nearbyint.f32(float) + +define dso_local double @test_round(double %d) local_unnamed_addr { +; BE-LABEL: test_round: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpi f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_round: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_round: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpi f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call double @llvm.round.f64(double %d) + ret double %0 +} + +declare double @llvm.round.f64(double) + +define dso_local float @test_roundf(float %f) local_unnamed_addr { +; BE-LABEL: test_roundf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpi f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_roundf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_roundf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpi f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call float @llvm.round.f32(float %f) + ret float %0 +} + +declare float @llvm.round.f32(float) + +define dso_local double @test_trunc(double %d) local_unnamed_addr { +; BE-LABEL: test_trunc: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpiz f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_trunc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpiz f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_trunc: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpiz f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call double @llvm.trunc.f64(double %d) + ret double %0 +} + +declare double @llvm.trunc.f64(double) + +define dso_local float @test_truncf(float %f) local_unnamed_addr { +; BE-LABEL: test_truncf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpiz f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_truncf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpiz f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_truncf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpiz f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call float @llvm.trunc.f32(float %f) + ret float %0 +} + +declare float @llvm.trunc.f32(float) + +define dso_local double @test_floor(double %d) local_unnamed_addr { +; BE-LABEL: test_floor: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpim f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_floor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpim f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_floor: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpim f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call double @llvm.floor.f64(double %d) + ret double %0 +} + +declare double @llvm.floor.f64(double) + +define dso_local float @test_floorf(float %f) local_unnamed_addr { +; BE-LABEL: test_floorf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpim f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_floorf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpim f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_floorf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpim f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call float @llvm.floor.f32(float %f) + ret float %0 +} + +declare float @llvm.floor.f32(float) + +define dso_local double @test_ceil(double %d) local_unnamed_addr { +; BE-LABEL: test_ceil: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpip f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_ceil: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpip f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_ceil: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpip f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call double @llvm.ceil.f64(double %d) + ret double %0 +} + +declare double @llvm.ceil.f64(double) + +define dso_local float @test_ceilf(float %f) local_unnamed_addr { +; BE-LABEL: test_ceilf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpip f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_ceilf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpip f1, f1 +; CHECK-NEXT: blr +; +; FAST-LABEL: test_ceilf: +; FAST: # %bb.0: # %entry +; FAST-NEXT: xsrdpip f1, f1 +; FAST-NEXT: blr +entry: + %0 = tail call float @llvm.ceil.f32(float %f) + ret float %0 +} + +declare float @llvm.ceil.f32(float) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -5101,34 +5101,20 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v1f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) ; PC64LE-NEXT: addis 3, 2, .LCPI80_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI80_0@toc@l(3) -; PC64LE-NEXT: bl nearbyintf -; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 +; PC64LE-NEXT: lfs 0, .LCPI80_0@toc@l(3) +; PC64LE-NEXT: xsrdpic 0, 0 +; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI80_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI80_0@toc@l(3) -; PC64LE9-NEXT: bl nearbyintf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: lfs 0, .LCPI80_0@toc@l(3) +; PC64LE9-NEXT: xsrdpic 0, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32( @@ -5141,50 +5127,19 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xvrdpic 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 34, 0 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -5197,80 +5152,50 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill -; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -48(1) -; PC64LE-NEXT: addis 3, 2, .LCPI82_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI82_0@toc@l(3) -; PC64LE-NEXT: bl nearbyintf -; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI82_1@toc@ha -; PC64LE-NEXT: fmr 31, 1 -; PC64LE-NEXT: lfs 1, .LCPI82_1@toc@l(3) -; PC64LE-NEXT: bl nearbyintf -; PC64LE-NEXT: nop ; PC64LE-NEXT: addis 3, 2, .LCPI82_2@toc@ha -; PC64LE-NEXT: fmr 30, 1 -; PC64LE-NEXT: lfs 1, .LCPI82_2@toc@l(3) -; PC64LE-NEXT: bl nearbyintf -; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 30 +; PC64LE-NEXT: addis 4, 2, .LCPI82_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI82_2@toc@l(3) +; PC64LE-NEXT: lfs 1, .LCPI82_1@toc@l(4) +; PC64LE-NEXT: addis 3, 2, .LCPI82_0@toc@ha +; PC64LE-NEXT: xsrdpic 0, 0 +; PC64LE-NEXT: lfs 2, .LCPI82_0@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI82_3@toc@ha -; PC64LE-NEXT: xscvdpspn 1, 1 +; PC64LE-NEXT: xsrdpic 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI82_3@toc@l +; PC64LE-NEXT: xsrdpic 2, 2 +; PC64LE-NEXT: xscvdpspn 0, 0 +; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 31 +; PC64LE-NEXT: xscvdpspn 0, 2 ; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 2, 3 +; PC64LE-NEXT: vmrglw 2, 3, 2 ; PC64LE-NEXT: lvx 3, 0, 3 ; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE-NEXT: vperm 2, 4, 2, 3 -; PC64LE-NEXT: addi 1, 1, 48 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload -; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload -; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI82_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI82_0@toc@l(3) -; PC64LE9-NEXT: bl nearbyintf -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfs 0, .LCPI82_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI82_1@toc@ha -; PC64LE9-NEXT: fmr 31, 1 ; PC64LE9-NEXT: lfs 1, .LCPI82_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyintf -; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI82_2@toc@ha -; PC64LE9-NEXT: fmr 30, 1 -; PC64LE9-NEXT: lfs 1, .LCPI82_2@toc@l(3) -; PC64LE9-NEXT: bl nearbyintf -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 -; PC64LE9-NEXT: xscvdpspn 0, 31 +; PC64LE9-NEXT: xsrdpic 0, 0 +; PC64LE9-NEXT: lfs 2, .LCPI82_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI82_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI82_3@toc@l +; PC64LE9-NEXT: xsrdpic 1, 1 +; PC64LE9-NEXT: xsrdpic 2, 2 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 1, 1 +; PC64LE9-NEXT: xscvdpspn 2, 2 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 +; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 +; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload -; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload -; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32( @@ -5283,72 +5208,31 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 -; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xsrdpic 3, 1 +; PC64LE-NEXT: xvrdpic 2, 0 +; PC64LE-NEXT: xxswapd 1, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 -; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l +; PC64LE9-NEXT: xsrdpic 3, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 2, 0 +; PC64LE9-NEXT: xxswapd 1, 2 ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( @@ -5361,86 +5245,28 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvrdpic 34, 0 +; PC64LE-NEXT: xvrdpic 35, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l +; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 35, 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( @@ -6548,7 +6374,7 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI103_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI103_0@toc@l(3) -; PC64LE-NEXT: frip 0, 0 +; PC64LE-NEXT: xsrdpip 0, 0 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: blr @@ -6557,7 +6383,7 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI103_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI103_0@toc@l(3) -; PC64LE9-NEXT: frip 0, 0 +; PC64LE9-NEXT: xsrdpip 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: blr @@ -6600,12 +6426,12 @@ ; PC64LE-NEXT: lfs 0, .LCPI105_2@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI105_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE-NEXT: frip 0, 0 +; PC64LE-NEXT: xsrdpip 0, 0 ; PC64LE-NEXT: lfs 2, .LCPI105_0@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI105_3@toc@ha -; PC64LE-NEXT: frip 1, 1 +; PC64LE-NEXT: xsrdpip 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI105_3@toc@l -; PC64LE-NEXT: frip 2, 2 +; PC64LE-NEXT: xsrdpip 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -6624,12 +6450,12 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI105_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI105_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI105_2@toc@ha -; PC64LE9-NEXT: frip 0, 0 +; PC64LE9-NEXT: xsrdpip 0, 0 ; PC64LE9-NEXT: lfs 2, .LCPI105_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI105_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI105_3@toc@l -; PC64LE9-NEXT: frip 1, 1 -; PC64LE9-NEXT: frip 2, 2 +; PC64LE9-NEXT: xsrdpip 1, 1 +; PC64LE9-NEXT: xsrdpip 2, 2 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 @@ -6688,7 +6514,7 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI107_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI107_0@toc@l(3) -; PC64LE-NEXT: frim 0, 0 +; PC64LE-NEXT: xsrdpim 0, 0 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: blr @@ -6697,7 +6523,7 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI107_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI107_0@toc@l(3) -; PC64LE9-NEXT: frim 0, 0 +; PC64LE9-NEXT: xsrdpim 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: blr @@ -6741,12 +6567,12 @@ ; PC64LE-NEXT: lfs 0, .LCPI109_2@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI109_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI109_0@toc@ha -; PC64LE-NEXT: frim 0, 0 +; PC64LE-NEXT: xsrdpim 0, 0 ; PC64LE-NEXT: lfs 2, .LCPI109_0@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI109_3@toc@ha -; PC64LE-NEXT: frim 1, 1 +; PC64LE-NEXT: xsrdpim 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI109_3@toc@l -; PC64LE-NEXT: frim 2, 2 +; PC64LE-NEXT: xsrdpim 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -6765,12 +6591,12 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI109_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI109_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI109_2@toc@ha -; PC64LE9-NEXT: frim 0, 0 +; PC64LE9-NEXT: xsrdpim 0, 0 ; PC64LE9-NEXT: lfs 2, .LCPI109_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI109_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI109_3@toc@l -; PC64LE9-NEXT: frim 1, 1 -; PC64LE9-NEXT: frim 2, 2 +; PC64LE9-NEXT: xsrdpim 1, 1 +; PC64LE9-NEXT: xsrdpim 2, 2 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 @@ -6829,7 +6655,7 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI111_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI111_0@toc@l(3) -; PC64LE-NEXT: frin 0, 0 +; PC64LE-NEXT: xsrdpi 0, 0 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: blr @@ -6838,7 +6664,7 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI111_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI111_0@toc@l(3) -; PC64LE9-NEXT: frin 0, 0 +; PC64LE9-NEXT: xsrdpi 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: blr @@ -6881,12 +6707,12 @@ ; PC64LE-NEXT: lfs 0, .LCPI113_2@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI113_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE-NEXT: frin 0, 0 +; PC64LE-NEXT: xsrdpi 0, 0 ; PC64LE-NEXT: lfs 2, .LCPI113_0@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI113_3@toc@ha -; PC64LE-NEXT: frin 1, 1 +; PC64LE-NEXT: xsrdpi 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI113_3@toc@l -; PC64LE-NEXT: frin 2, 2 +; PC64LE-NEXT: xsrdpi 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -6905,12 +6731,12 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI113_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI113_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI113_2@toc@ha -; PC64LE9-NEXT: frin 0, 0 +; PC64LE9-NEXT: xsrdpi 0, 0 ; PC64LE9-NEXT: lfs 2, .LCPI113_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI113_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI113_3@toc@l -; PC64LE9-NEXT: frin 1, 1 -; PC64LE9-NEXT: frin 2, 2 +; PC64LE9-NEXT: xsrdpi 1, 1 +; PC64LE9-NEXT: xsrdpi 2, 2 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 @@ -6970,7 +6796,7 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI115_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI115_0@toc@l(3) -; PC64LE-NEXT: friz 0, 0 +; PC64LE-NEXT: xsrdpiz 0, 0 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: blr @@ -6979,7 +6805,7 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI115_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI115_0@toc@l(3) -; PC64LE9-NEXT: friz 0, 0 +; PC64LE9-NEXT: xsrdpiz 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: blr @@ -7022,12 +6848,12 @@ ; PC64LE-NEXT: lfs 0, .LCPI117_2@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI117_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI117_0@toc@ha -; PC64LE-NEXT: friz 0, 0 +; PC64LE-NEXT: xsrdpiz 0, 0 ; PC64LE-NEXT: lfs 2, .LCPI117_0@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI117_3@toc@ha -; PC64LE-NEXT: friz 1, 1 +; PC64LE-NEXT: xsrdpiz 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI117_3@toc@l -; PC64LE-NEXT: friz 2, 2 +; PC64LE-NEXT: xsrdpiz 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -7046,12 +6872,12 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI117_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI117_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI117_2@toc@ha -; PC64LE9-NEXT: friz 0, 0 +; PC64LE9-NEXT: xsrdpiz 0, 0 ; PC64LE9-NEXT: lfs 2, .LCPI117_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI117_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI117_3@toc@l -; PC64LE9-NEXT: friz 1, 1 -; PC64LE9-NEXT: friz 2, 2 +; PC64LE9-NEXT: xsrdpiz 1, 1 +; PC64LE9-NEXT: xsrdpiz 2, 2 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2