diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -319,14 +319,18 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); + + if (!Subtarget.hasSPE()) { + setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); + } + if (Subtarget.hasVSX()) { setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); @@ -472,6 +476,10 @@ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + + // SPE supports signaling compare of f32/f64. + setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4118,57 +4118,57 @@ // after the inclusion of the instruction sets. let Predicates = [HasSPE] in { // SETCC for f32. -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), +def : Pat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETOLT)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), +def : Pat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETLT)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), +def : Pat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETOGT)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), +def : Pat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETGT)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), +def : Pat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETOEQ)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), +def : Pat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETEQ)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), +defm : CRNotPat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETUGE)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), +defm : CRNotPat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETGE)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), +defm : CRNotPat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETULE)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), +defm : CRNotPat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETLE)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), +defm : CRNotPat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETUNE)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), +defm : CRNotPat<(i1 (any_fsetccs f32:$s1, f32:$s2, SETNE)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; // SETCC for f64. -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), +def : Pat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), +def : Pat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETLT)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), +def : Pat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETOGT)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), +def : Pat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETGT)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), +def : Pat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETOEQ)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), +def : Pat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETEQ)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), +defm : CRNotPat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETUGE)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), +defm : CRNotPat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETGE)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), +defm : CRNotPat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETULE)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), +defm : CRNotPat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETLE)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), +defm : CRNotPat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETUNE)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), +defm : CRNotPat<(i1 (any_fsetccs f64:$s1, f64:$s2, SETNE)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; } // match select on i1 variables: diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -147,11 +147,11 @@ def EFDADD : EFXForm_1<736, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efdadd $RT, $RA, $RB", IIC_FPAddSub, - [(set f64:$RT, (fadd f64:$RA, f64:$RB))]>; + [(set f64:$RT, (any_fadd f64:$RA, f64:$RB))]>; def EFDCFS : EFXForm_2a<751, (outs sperc:$RT), (ins spe4rc:$RB), "efdcfs $RT, $RB", IIC_FPDGeneral, - [(set f64:$RT, (fpextend f32:$RB))]>; + [(set f64:$RT, (any_fpextend f32:$RB))]>; def EFDCFSF : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB), "efdcfsf $RT, $RB", IIC_FPDGeneral, []>; @@ -216,11 +216,11 @@ def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efddiv $RT, $RA, $RB", IIC_FPDivD, - [(set f64:$RT, (fdiv f64:$RA, f64:$RB))]>; + [(set f64:$RT, (any_fdiv f64:$RA, f64:$RB))]>; def EFDMUL : EFXForm_1<744, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efdmul $RT, $RA, $RB", IIC_FPDGeneral, - [(set f64:$RT, (fmul f64:$RA, f64:$RB))]>; + [(set f64:$RT, (any_fmul f64:$RA, f64:$RB))]>; def EFDNABS : EFXForm_2<741, (outs sperc:$RT), (ins sperc:$RA), "efdnabs $RT, $RA", IIC_FPDGeneral, @@ -232,7 +232,7 @@ def EFDSUB : EFXForm_1<737, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efdsub $RT, $RA, $RB", IIC_FPDGeneral, - [(set f64:$RT, (fsub f64:$RA, f64:$RB))]>; + [(set f64:$RT, (any_fsub f64:$RA, f64:$RB))]>; let isCompare = 1 in { def EFDTSTEQ : EFXForm_3<766, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), @@ -250,11 +250,11 @@ def EFSADD : EFXForm_1<704, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efsadd $RT, $RA, $RB", IIC_FPAddSub, - [(set f32:$RT, (fadd f32:$RA, f32:$RB))]>; + [(set f32:$RT, (any_fadd f32:$RA, f32:$RB))]>; def EFSCFD : EFXForm_2a<719, (outs spe4rc:$RT), (ins sperc:$RB), "efscfd $RT, $RB", IIC_FPSGeneral, - [(set f32:$RT, (fpround f64:$RB))]>; + [(set f32:$RT, (any_fpround f64:$RB))]>; def EFSCFSF : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB), "efscfsf $RT, $RB", IIC_FPSGeneral, []>; @@ -303,11 +303,11 @@ def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efsdiv $RT, $RA, $RB", IIC_FPDivD, - [(set f32:$RT, (fdiv f32:$RA, f32:$RB))]>; + [(set f32:$RT, (any_fdiv f32:$RA, f32:$RB))]>; def EFSMUL : EFXForm_1<712, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efsmul $RT, $RA, $RB", IIC_FPGeneral, - [(set f32:$RT, (fmul f32:$RA, f32:$RB))]>; + [(set f32:$RT, (any_fmul f32:$RA, f32:$RB))]>; def EFSNABS : EFXForm_2<709, (outs spe4rc:$RT), (ins spe4rc:$RA), "efsnabs $RT, $RA", IIC_FPGeneral, @@ -319,7 +319,7 @@ def EFSSUB : EFXForm_1<705, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efssub $RT, $RA, $RB", IIC_FPSGeneral, - [(set f32:$RT, (fsub f32:$RA, f32:$RB))]>; + [(set f32:$RT, (any_fsub f32:$RA, f32:$RB))]>; let isCompare = 1 in { def EFSTSTEQ : EFXForm_3<734, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll @@ -0,0 +1,493 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE + +define i32 @test_f32_oeq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_oeq_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpeq cr0, r5, r6 +; SPE-NEXT: bclr 12, gt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"oeq", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ogt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ogt_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpgt cr0, r5, r6 +; SPE-NEXT: bclr 12, gt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ogt", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_oge_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_oge_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: efscmplt cr1, r5, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: efscmpeq cr0, r6, r6 +; SPE-NEXT: efscmpeq cr1, r5, r5 +; SPE-NEXT: crand 4*cr5+gt, 4*cr1+gt, gt +; SPE-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"oge", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_olt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_olt_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: bclr 12, gt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"olt", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ole_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ole_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpgt cr0, r5, r6 +; SPE-NEXT: efscmpgt cr1, r5, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: efscmpeq cr0, r6, r6 +; SPE-NEXT: efscmpeq cr1, r5, r5 +; SPE-NEXT: crand 4*cr5+gt, 4*cr1+gt, gt +; SPE-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ole", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_one_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_one_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: efscmpgt cr1, r5, r6 +; SPE-NEXT: cror 4*cr5+lt, 4*cr1+gt, gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"one", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ord_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpeq cr0, r6, r6 +; SPE-NEXT: efscmpeq cr1, r5, r5 +; SPE-NEXT: crand 4*cr5+lt, 4*cr1+gt, gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ord", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ueq_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: efscmpgt cr1, r5, r6 +; SPE-NEXT: cror 4*cr5+lt, 4*cr1+gt, gt +; SPE-NEXT: bc 12, 4*cr5+lt, .LBB7_1 +; SPE-NEXT: blr +; SPE-NEXT: .LBB7_1: +; SPE-NEXT: addi r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ugt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ugt_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpeq cr0, r6, r6 +; SPE-NEXT: efscmpeq cr1, r6, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: efscmpeq cr0, r5, r5 +; SPE-NEXT: efscmpeq cr1, r5, r5 +; SPE-NEXT: crnor 4*cr5+gt, gt, 4*cr1+gt +; SPE-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; SPE-NEXT: efscmpgt cr0, r5, r6 +; SPE-NEXT: cror 4*cr5+lt, gt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ugt", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_uge_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_uge_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: efscmplt cr1, r5, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"uge", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ult_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ult_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpeq cr0, r6, r6 +; SPE-NEXT: efscmpeq cr1, r6, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: efscmpeq cr0, r5, r5 +; SPE-NEXT: efscmpeq cr1, r5, r5 +; SPE-NEXT: crnor 4*cr5+gt, gt, 4*cr1+gt +; SPE-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: cror 4*cr5+lt, gt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ult", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_ule_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_ule_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpgt cr0, r5, r6 +; SPE-NEXT: efscmpgt cr1, r5, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ule", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_une_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_une_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpeq cr0, r5, r6 +; SPE-NEXT: efscmpeq cr1, r5, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"une", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f32_uno_s(i32 %a, i32 %b, float %f1, float %f2) #0 { +; SPE-LABEL: test_f32_uno_s: +; SPE: # %bb.0: +; SPE-NEXT: efscmpeq cr0, r6, r6 +; SPE-NEXT: efscmpeq cr1, r6, r6 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: efscmpeq cr0, r5, r5 +; SPE-NEXT: efscmpeq cr1, r5, r5 +; SPE-NEXT: crnor 4*cr5+gt, gt, 4*cr1+gt +; SPE-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"uno", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_oeq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_oeq_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmpeq cr0, r5, r7 +; SPE-NEXT: bclr 12, gt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"oeq", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ogt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ogt_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmpgt cr0, r5, r7 +; SPE-NEXT: bclr 12, gt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ogt", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_oge_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_oge_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmplt cr0, r5, r7 +; SPE-NEXT: efdcmplt cr1, r5, r7 +; SPE-NEXT: efdcmpeq cr5, r7, r7 +; SPE-NEXT: efdcmpeq cr6, r5, r5 +; SPE-NEXT: crnor 4*cr7+lt, gt, 4*cr1+gt +; SPE-NEXT: crand 4*cr5+lt, 4*cr6+gt, 4*cr5+gt +; SPE-NEXT: crand 4*cr5+lt, 4*cr7+lt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"oge", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_olt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_olt_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmplt cr0, r5, r7 +; SPE-NEXT: bclr 12, gt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"olt", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ole_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ole_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmpgt cr0, r5, r7 +; SPE-NEXT: efdcmpgt cr1, r5, r7 +; SPE-NEXT: efdcmpeq cr5, r7, r7 +; SPE-NEXT: efdcmpeq cr6, r5, r5 +; SPE-NEXT: crnor 4*cr7+lt, gt, 4*cr1+gt +; SPE-NEXT: crand 4*cr5+lt, 4*cr6+gt, 4*cr5+gt +; SPE-NEXT: crand 4*cr5+lt, 4*cr7+lt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ole", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_one_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_one_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmplt cr0, r5, r7 +; SPE-NEXT: efdcmpgt cr1, r5, r7 +; SPE-NEXT: cror 4*cr5+lt, 4*cr1+gt, gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"one", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ord_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ord_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r6, r7, r8 +; SPE-NEXT: efdcmpeq cr0, r6, r6 +; SPE-NEXT: efdcmpeq cr1, r5, r5 +; SPE-NEXT: crand 4*cr5+lt, 4*cr1+gt, gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ord", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ueq_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmplt cr0, r5, r7 +; SPE-NEXT: efdcmpgt cr1, r5, r7 +; SPE-NEXT: cror 4*cr5+lt, 4*cr1+gt, gt +; SPE-NEXT: bc 12, 4*cr5+lt, .LBB21_1 +; SPE-NEXT: blr +; SPE-NEXT: .LBB21_1: +; SPE-NEXT: addi r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ugt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ugt_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r6, r7, r8 +; SPE-NEXT: efdcmpeq cr0, r6, r6 +; SPE-NEXT: efdcmpeq cr1, r6, r6 +; SPE-NEXT: efdcmpeq cr5, r5, r5 +; SPE-NEXT: efdcmpeq cr6, r5, r5 +; SPE-NEXT: efdcmpgt cr7, r5, r6 +; SPE-NEXT: crnor 4*cr1+lt, gt, 4*cr1+gt +; SPE-NEXT: crnor 4*cr5+lt, 4*cr5+gt, 4*cr6+gt +; SPE-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr1+lt +; SPE-NEXT: cror 4*cr5+lt, 4*cr7+gt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ugt", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_uge_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_uge_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmplt cr0, r5, r7 +; SPE-NEXT: efdcmplt cr1, r5, r7 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"uge", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ult_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ult_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r6, r7, r8 +; SPE-NEXT: efdcmpeq cr0, r6, r6 +; SPE-NEXT: efdcmpeq cr1, r6, r6 +; SPE-NEXT: efdcmpeq cr5, r5, r5 +; SPE-NEXT: efdcmpeq cr6, r5, r5 +; SPE-NEXT: efdcmplt cr7, r5, r6 +; SPE-NEXT: crnor 4*cr1+lt, gt, 4*cr1+gt +; SPE-NEXT: crnor 4*cr5+lt, 4*cr5+gt, 4*cr6+gt +; SPE-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr1+lt +; SPE-NEXT: cror 4*cr5+lt, 4*cr7+gt, 4*cr5+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ult", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_ule_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_ule_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmpgt cr0, r5, r7 +; SPE-NEXT: efdcmpgt cr1, r5, r7 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ule", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_une_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_une_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdcmpeq cr0, r5, r7 +; SPE-NEXT: efdcmpeq cr1, r5, r7 +; SPE-NEXT: crnor 4*cr5+lt, gt, 4*cr1+gt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"une", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 { +; SPE-LABEL: test_f64_uno_s: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r6, r7, r8 +; SPE-NEXT: efdcmpeq cr0, r6, r6 +; SPE-NEXT: efdcmpeq cr1, r6, r6 +; SPE-NEXT: efdcmpeq cr5, r5, r5 +; SPE-NEXT: efdcmpeq cr6, r5, r5 +; SPE-NEXT: crnor 4*cr7+lt, gt, 4*cr1+gt +; SPE-NEXT: crnor 4*cr5+lt, 4*cr5+gt, 4*cr6+gt +; SPE-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr7+lt +; SPE-NEXT: bclr 12, 4*cr5+lt, 0 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: ori r3, r4, 0 +; SPE-NEXT: blr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"uno", metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +attributes #0 = { strictfp nounwind } + +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll @@ -2,6 +2,7 @@ ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) @@ -43,6 +44,11 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fadds f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fadd_f32: +; SPE: # %bb.0: +; SPE-NEXT: efsadd r3, r3, r4 +; SPE-NEXT: blr %res = call float @llvm.experimental.constrained.fadd.f32( float %f1, float %f2, metadata !"round.dynamic", @@ -60,6 +66,16 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fadd f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fadd_f64: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdadd r4, r3, r5 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fadd.f64( double %f1, double %f2, metadata !"round.dynamic", @@ -98,6 +114,14 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fadd_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: efsadd r6, r6, r10 +; SPE-NEXT: efsadd r5, r5, r9 +; SPE-NEXT: efsadd r4, r4, r8 +; SPE-NEXT: efsadd r3, r3, r7 +; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32( <4 x float> %vf1, <4 x float> %vf2, metadata !"round.dynamic", @@ -116,6 +140,19 @@ ; NOVSX-NEXT: fadd f2, f2, f4 ; NOVSX-NEXT: fadd f1, f1, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fadd_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: evldd r4, 8(r1) +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r8, r9, r10 +; SPE-NEXT: li r9, 8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdadd r4, r7, r4 +; SPE-NEXT: evstddx r4, r3, r9 +; SPE-NEXT: efdadd r4, r5, r8 +; SPE-NEXT: evstdd r4, 0(r3) +; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( <2 x double> %vf1, <2 x double> %vf2, metadata !"round.dynamic", @@ -133,6 +170,11 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fsubs f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsub_f32: +; SPE: # %bb.0: +; SPE-NEXT: efssub r3, r3, r4 +; SPE-NEXT: blr %res = call float @llvm.experimental.constrained.fsub.f32( float %f1, float %f2, @@ -151,6 +193,16 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fsub f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsub_f64: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdsub r4, r3, r5 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fsub.f64( double %f1, double %f2, @@ -190,6 +242,14 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsub_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: efssub r6, r6, r10 +; SPE-NEXT: efssub r5, r5, r9 +; SPE-NEXT: efssub r4, r4, r8 +; SPE-NEXT: efssub r3, r3, r7 +; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32( <4 x float> %vf1, <4 x float> %vf2, metadata !"round.dynamic", @@ -208,6 +268,19 @@ ; NOVSX-NEXT: fsub f2, f2, f4 ; NOVSX-NEXT: fsub f1, f1, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsub_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: evldd r4, 8(r1) +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r8, r9, r10 +; SPE-NEXT: li r9, 8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdsub r4, r7, r4 +; SPE-NEXT: evstddx r4, r3, r9 +; SPE-NEXT: efdsub r4, r5, r8 +; SPE-NEXT: evstdd r4, 0(r3) +; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( <2 x double> %vf1, <2 x double> %vf2, metadata !"round.dynamic", @@ -225,6 +298,11 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fmuls f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmul_f32: +; SPE: # %bb.0: +; SPE-NEXT: efsmul r3, r3, r4 +; SPE-NEXT: blr %res = call float @llvm.experimental.constrained.fmul.f32( float %f1, float %f2, @@ -243,6 +321,16 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fmul f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmul_f64: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdmul r4, r3, r5 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fmul.f64( double %f1, double %f2, @@ -282,6 +370,14 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmul_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: efsmul r6, r6, r10 +; SPE-NEXT: efsmul r5, r5, r9 +; SPE-NEXT: efsmul r4, r4, r8 +; SPE-NEXT: efsmul r3, r3, r7 +; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32( <4 x float> %vf1, <4 x float> %vf2, metadata !"round.dynamic", @@ -300,6 +396,19 @@ ; NOVSX-NEXT: fmul f2, f2, f4 ; NOVSX-NEXT: fmul f1, f1, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmul_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: evldd r4, 8(r1) +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r8, r9, r10 +; SPE-NEXT: li r9, 8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efdmul r4, r7, r4 +; SPE-NEXT: evstddx r4, r3, r9 +; SPE-NEXT: efdmul r4, r5, r8 +; SPE-NEXT: evstdd r4, 0(r3) +; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( <2 x double> %vf1, <2 x double> %vf2, metadata !"round.dynamic", @@ -317,6 +426,11 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fdivs f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fdiv_f32: +; SPE: # %bb.0: +; SPE-NEXT: efsdiv r3, r3, r4 +; SPE-NEXT: blr %res = call float @llvm.experimental.constrained.fdiv.f32( float %f1, float %f2, @@ -335,6 +449,16 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fdiv f1, f1, f2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fdiv_f64: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efddiv r4, r3, r5 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fdiv.f64( double %f1, double %f2, @@ -374,6 +498,14 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fdiv_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: efsdiv r6, r6, r10 +; SPE-NEXT: efsdiv r5, r5, r9 +; SPE-NEXT: efsdiv r4, r4, r8 +; SPE-NEXT: efsdiv r3, r3, r7 +; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32( <4 x float> %vf1, <4 x float> %vf2, metadata !"round.dynamic", @@ -392,6 +524,19 @@ ; NOVSX-NEXT: fdiv f2, f2, f4 ; NOVSX-NEXT: fdiv f1, f1, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fdiv_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: evldd r4, 8(r1) +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r8, r9, r10 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: efddiv r4, r7, r4 +; SPE-NEXT: li r7, 8 +; SPE-NEXT: evstddx r4, r3, r7 +; SPE-NEXT: efddiv r4, r5, r8 +; SPE-NEXT: evstdd r4, 0(r3) +; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( <2 x double> %vf1, <2 x double> %vf2, metadata !"round.dynamic", @@ -411,6 +556,18 @@ ; NOVSX-NEXT: fmul f0, f1, f2 ; NOVSX-NEXT: fadd f1, f0, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: no_fma_fold: +; SPE: # %bb.0: +; SPE-NEXT: evmergelo r7, r7, r8 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdmul r3, r3, r5 +; SPE-NEXT: efdadd r4, r3, r7 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr %mul = call double @llvm.experimental.constrained.fmul.f64( double %f1, double %f2, metadata !"round.dynamic", @@ -433,6 +590,19 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fmadds f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmadd_f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl fmaf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call float @llvm.experimental.constrained.fma.f32( float %f0, float %f1, float %f2, metadata !"round.dynamic", @@ -451,6 +621,35 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fmadd f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmadd_f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r8, r7, r8 +; SPE-NEXT: evmergelo r6, r5, r6 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.fma.f64( double %f0, double %f1, double %f2, metadata !"round.dynamic", @@ -496,6 +695,109 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmadd_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -144(r1) +; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r21, -136 +; SPE-NEXT: .cfi_offset r22, -128 +; SPE-NEXT: .cfi_offset r23, -120 +; SPE-NEXT: .cfi_offset r24, -112 +; SPE-NEXT: .cfi_offset r25, -104 +; SPE-NEXT: .cfi_offset r26, -96 +; SPE-NEXT: .cfi_offset r27, -88 +; SPE-NEXT: .cfi_offset r28, -80 +; SPE-NEXT: .cfi_offset r29, -72 +; SPE-NEXT: .cfi_offset r30, -64 +; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r27, r5 +; SPE-NEXT: lwz r5, 164(r1) +; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r25, r3 +; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r26, r4 +; SPE-NEXT: mr r3, r6 +; SPE-NEXT: mr r4, r10 +; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r28, r7 +; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r29, r8 +; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r9 +; SPE-NEXT: lwz r24, 152(r1) +; SPE-NEXT: lwz r23, 156(r1) +; SPE-NEXT: lwz r22, 160(r1) +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r21, r3 +; SPE-NEXT: mr r3, r27 +; SPE-NEXT: mr r4, r30 +; SPE-NEXT: mr r5, r22 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: mr r3, r26 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r23 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r29, r3 +; SPE-NEXT: mr r3, r25 +; SPE-NEXT: mr r4, r28 +; SPE-NEXT: mr r5, r24 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r30 +; SPE-NEXT: mr r6, r21 +; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 148(r1) +; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, metadata !"round.dynamic", @@ -515,6 +817,80 @@ ; NOVSX-NEXT: fmadd f2, f2, f4, f6 ; NOVSX-NEXT: fmadd f1, f1, f3, f5 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmadd_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r26, -80 +; SPE-NEXT: .cfi_offset r27, -72 +; SPE-NEXT: .cfi_offset r28, -64 +; SPE-NEXT: .cfi_offset r29, -56 +; SPE-NEXT: .cfi_offset r30, -48 +; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo r27, r7, r8 +; SPE-NEXT: evmergelo r9, r9, r10 +; SPE-NEXT: evmergelo r4, r5, r6 +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: evldd r8, 112(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r9, r9 +; SPE-NEXT: mr r6, r9 +; SPE-NEXT: evldd r29, 120(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r26, r3, r4 +; SPE-NEXT: evmergehi r3, r27, r27 +; SPE-NEXT: evmergehi r5, r28, r28 +; SPE-NEXT: evmergehi r7, r29, r29 +; SPE-NEXT: mr r4, r27 +; SPE-NEXT: mr r6, r28 +; SPE-NEXT: mr r8, r29 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: li r5, 8 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstddx r3, r30, r5 +; SPE-NEXT: evstdd r26, 0(r30) +; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, metadata !"round.dynamic", @@ -533,6 +909,20 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fmsubs f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmsub_f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: efsneg r5, r5 +; SPE-NEXT: bl fmaf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg float %f2 %res = call float @llvm.experimental.constrained.fma.f32( float %f0, float %f1, float %neg, @@ -552,6 +942,36 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fmsub f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmsub_f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r6, r5, r6 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergelo r3, r7, r8 +; SPE-NEXT: efdneg r8, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg double %f2 %res = call double @llvm.experimental.constrained.fma.f64( double %f0, double %f1, double %neg, @@ -601,6 +1021,113 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmsub_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -144(r1) +; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r21, -136 +; SPE-NEXT: .cfi_offset r22, -128 +; SPE-NEXT: .cfi_offset r23, -120 +; SPE-NEXT: .cfi_offset r24, -112 +; SPE-NEXT: .cfi_offset r25, -104 +; SPE-NEXT: .cfi_offset r26, -96 +; SPE-NEXT: .cfi_offset r27, -88 +; SPE-NEXT: .cfi_offset r28, -80 +; SPE-NEXT: .cfi_offset r29, -72 +; SPE-NEXT: .cfi_offset r30, -64 +; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r25, r3 +; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r26, r4 +; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r27, r5 +; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r28, r7 +; SPE-NEXT: lwz r3, 160(r1) +; SPE-NEXT: lwz r4, 152(r1) +; SPE-NEXT: lwz r5, 156(r1) +; SPE-NEXT: lwz r7, 164(r1) +; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: efsneg r22, r3 +; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: efsneg r23, r5 +; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: efsneg r24, r4 +; SPE-NEXT: efsneg r5, r7 +; SPE-NEXT: mr r3, r6 +; SPE-NEXT: mr r4, r10 +; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r29, r8 +; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r9 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r21, r3 +; SPE-NEXT: mr r3, r27 +; SPE-NEXT: mr r4, r30 +; SPE-NEXT: mr r5, r22 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: mr r3, r26 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r23 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r29, r3 +; SPE-NEXT: mr r3, r25 +; SPE-NEXT: mr r4, r28 +; SPE-NEXT: mr r5, r24 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r30 +; SPE-NEXT: mr r6, r21 +; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 148(r1) +; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg <4 x float> %vf2 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg, @@ -621,6 +1148,82 @@ ; NOVSX-NEXT: fmsub f2, f2, f4, f6 ; NOVSX-NEXT: fmsub f1, f1, f3, f5 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fmsub_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r26, -80 +; SPE-NEXT: .cfi_offset r27, -72 +; SPE-NEXT: .cfi_offset r28, -64 +; SPE-NEXT: .cfi_offset r29, -56 +; SPE-NEXT: .cfi_offset r30, -48 +; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: evldd r3, 112(r1) +; SPE-NEXT: evldd r11, 120(r1) +; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: efdneg r27, r11 +; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo r29, r7, r8 +; SPE-NEXT: evmergelo r9, r9, r10 +; SPE-NEXT: evmergelo r4, r5, r6 +; SPE-NEXT: efdneg r8, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r9, r9 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: mr r6, r9 +; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r26, r3, r4 +; SPE-NEXT: evmergehi r3, r29, r29 +; SPE-NEXT: evmergehi r5, r28, r28 +; SPE-NEXT: evmergehi r7, r27, r27 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r6, r28 +; SPE-NEXT: mr r8, r27 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: li r5, 8 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstddx r3, r30, r5 +; SPE-NEXT: evstdd r26, 0(r30) +; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg <2 x double> %vf2 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg, @@ -640,6 +1243,20 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fnmadds f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmadd_f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl fmaf +; SPE-NEXT: efsneg r3, r3 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %fma = call float @llvm.experimental.constrained.fma.f32( float %f0, float %f1, float %f2, metadata !"round.dynamic", @@ -659,6 +1276,36 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fnmadd f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmadd_f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r8, r7, r8 +; SPE-NEXT: evmergelo r6, r5, r6 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdneg r4, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %fma = call double @llvm.experimental.constrained.fma.f64( double %f0, double %f1, double %f2, metadata !"round.dynamic", @@ -708,6 +1355,110 @@ ; NOVSX-NEXT: lvx v3, 0, r3 ; NOVSX-NEXT: vsubfp v2, v2, v3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmadd_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -144(r1) +; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r21, -136 +; SPE-NEXT: .cfi_offset r22, -128 +; SPE-NEXT: .cfi_offset r23, -120 +; SPE-NEXT: .cfi_offset r24, -112 +; SPE-NEXT: .cfi_offset r25, -104 +; SPE-NEXT: .cfi_offset r26, -96 +; SPE-NEXT: .cfi_offset r27, -88 +; SPE-NEXT: .cfi_offset r28, -80 +; SPE-NEXT: .cfi_offset r29, -72 +; SPE-NEXT: .cfi_offset r30, -64 +; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r27, r5 +; SPE-NEXT: lwz r5, 164(r1) +; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r25, r3 +; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r26, r4 +; SPE-NEXT: mr r3, r6 +; SPE-NEXT: mr r4, r10 +; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r28, r7 +; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r29, r8 +; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r9 +; SPE-NEXT: lwz r24, 152(r1) +; SPE-NEXT: lwz r23, 156(r1) +; SPE-NEXT: lwz r22, 160(r1) +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r21, r3 +; SPE-NEXT: mr r3, r27 +; SPE-NEXT: mr r4, r30 +; SPE-NEXT: mr r5, r22 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: mr r3, r26 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r23 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r29, r3 +; SPE-NEXT: mr r3, r25 +; SPE-NEXT: mr r4, r28 +; SPE-NEXT: mr r5, r24 +; SPE-NEXT: bl fmaf +; SPE-NEXT: efsneg r4, r29 +; SPE-NEXT: efsneg r5, r30 +; SPE-NEXT: efsneg r6, r21 +; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload +; SPE-NEXT: efsneg r3, r3 +; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 148(r1) +; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, metadata !"round.dynamic", @@ -728,6 +1479,82 @@ ; NOVSX-NEXT: fnmadd f2, f2, f4, f6 ; NOVSX-NEXT: fnmadd f1, f1, f3, f5 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmadd_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r26, -80 +; SPE-NEXT: .cfi_offset r27, -72 +; SPE-NEXT: .cfi_offset r28, -64 +; SPE-NEXT: .cfi_offset r29, -56 +; SPE-NEXT: .cfi_offset r30, -48 +; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo r27, r7, r8 +; SPE-NEXT: evmergelo r9, r9, r10 +; SPE-NEXT: evmergelo r4, r5, r6 +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: evldd r8, 112(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r9, r9 +; SPE-NEXT: mr r6, r9 +; SPE-NEXT: evldd r29, 120(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r26, r3, r4 +; SPE-NEXT: evmergehi r3, r27, r27 +; SPE-NEXT: evmergehi r5, r28, r28 +; SPE-NEXT: evmergehi r7, r29, r29 +; SPE-NEXT: mr r4, r27 +; SPE-NEXT: mr r6, r28 +; SPE-NEXT: mr r8, r29 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: li r5, 8 +; SPE-NEXT: efdneg r3, r3 +; SPE-NEXT: evstddx r3, r30, r5 +; SPE-NEXT: efdneg r3, r26 +; SPE-NEXT: evstdd r3, 0(r30) +; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, metadata !"round.dynamic", @@ -747,6 +1574,21 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fnmsubs f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmsub_f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: efsneg r5, r5 +; SPE-NEXT: bl fmaf +; SPE-NEXT: efsneg r3, r3 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg float %f2 %fma = call float @llvm.experimental.constrained.fma.f32( float %f0, float %f1, float %neg, @@ -767,6 +1609,37 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fnmsub f1, f1, f2, f3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmsub_f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r6, r5, r6 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergelo r3, r7, r8 +; SPE-NEXT: efdneg r8, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdneg r4, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg double %f2 %fma = call double @llvm.experimental.constrained.fma.f64( double %f0, double %f1, double %neg, @@ -818,6 +1691,114 @@ ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: vsubfp v2, v5, v2 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmsub_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -144(r1) +; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r21, -44 +; SPE-NEXT: .cfi_offset r22, -40 +; SPE-NEXT: .cfi_offset r23, -36 +; SPE-NEXT: .cfi_offset r24, -32 +; SPE-NEXT: .cfi_offset r25, -28 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r21, -136 +; SPE-NEXT: .cfi_offset r22, -128 +; SPE-NEXT: .cfi_offset r23, -120 +; SPE-NEXT: .cfi_offset r24, -112 +; SPE-NEXT: .cfi_offset r25, -104 +; SPE-NEXT: .cfi_offset r26, -96 +; SPE-NEXT: .cfi_offset r27, -88 +; SPE-NEXT: .cfi_offset r28, -80 +; SPE-NEXT: .cfi_offset r29, -72 +; SPE-NEXT: .cfi_offset r30, -64 +; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r25, r3 +; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r26, r4 +; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r27, r5 +; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r28, r7 +; SPE-NEXT: lwz r3, 160(r1) +; SPE-NEXT: lwz r4, 152(r1) +; SPE-NEXT: lwz r5, 156(r1) +; SPE-NEXT: lwz r7, 164(r1) +; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: efsneg r22, r3 +; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: efsneg r23, r5 +; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: efsneg r24, r4 +; SPE-NEXT: efsneg r5, r7 +; SPE-NEXT: mr r3, r6 +; SPE-NEXT: mr r4, r10 +; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r29, r8 +; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r9 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r21, r3 +; SPE-NEXT: mr r3, r27 +; SPE-NEXT: mr r4, r30 +; SPE-NEXT: mr r5, r22 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: mr r3, r26 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r23 +; SPE-NEXT: bl fmaf +; SPE-NEXT: mr r29, r3 +; SPE-NEXT: mr r3, r25 +; SPE-NEXT: mr r4, r28 +; SPE-NEXT: mr r5, r24 +; SPE-NEXT: bl fmaf +; SPE-NEXT: efsneg r4, r29 +; SPE-NEXT: efsneg r5, r30 +; SPE-NEXT: efsneg r6, r21 +; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload +; SPE-NEXT: efsneg r3, r3 +; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 148(r1) +; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg <4 x float> %vf2 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg, @@ -839,6 +1820,84 @@ ; NOVSX-NEXT: fnmsub f2, f2, f4, f6 ; NOVSX-NEXT: fnmsub f1, f1, f3, f5 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fnmsub_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r26, -24 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r26, -80 +; SPE-NEXT: .cfi_offset r27, -72 +; SPE-NEXT: .cfi_offset r28, -64 +; SPE-NEXT: .cfi_offset r29, -56 +; SPE-NEXT: .cfi_offset r30, -48 +; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: evldd r3, 112(r1) +; SPE-NEXT: evldd r11, 120(r1) +; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: efdneg r27, r11 +; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo r29, r7, r8 +; SPE-NEXT: evmergelo r9, r9, r10 +; SPE-NEXT: evmergelo r4, r5, r6 +; SPE-NEXT: efdneg r8, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: evmergehi r5, r9, r9 +; SPE-NEXT: evmergehi r7, r8, r8 +; SPE-NEXT: mr r6, r9 +; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r26, r3, r4 +; SPE-NEXT: evmergehi r3, r29, r29 +; SPE-NEXT: evmergehi r5, r28, r28 +; SPE-NEXT: evmergehi r7, r27, r27 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r6, r28 +; SPE-NEXT: mr r8, r27 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 +; SPE-NEXT: bl fma +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: li r5, 8 +; SPE-NEXT: efdneg r3, r3 +; SPE-NEXT: evstddx r3, r30, r5 +; SPE-NEXT: efdneg r3, r26 +; SPE-NEXT: evstdd r3, 0(r30) +; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %neg = fneg <2 x double> %vf2 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg, @@ -858,6 +1917,19 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fsqrts f1, f1 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsqrt_f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl sqrtf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call float @llvm.experimental.constrained.sqrt.f32( float %f1, metadata !"round.dynamic", @@ -875,6 +1947,27 @@ ; NOVSX: # %bb.0: ; NOVSX-NEXT: fsqrt f1, f1 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsqrt_f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl sqrt +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call double @llvm.experimental.constrained.sqrt.f64( double %f1, metadata !"round.dynamic", @@ -907,6 +2000,59 @@ ; NOVSX-NEXT: stfs f0, -16(r1) ; NOVSX-NEXT: lvx v2, 0, r3 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsqrt_v4f32: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -80(r1) +; SPE-NEXT: .cfi_def_cfa_offset 80 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r27, -20 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r27, -72 +; SPE-NEXT: .cfi_offset r28, -64 +; SPE-NEXT: .cfi_offset r29, -56 +; SPE-NEXT: .cfi_offset r30, -48 +; SPE-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r28, r3 +; SPE-NEXT: mr r3, r6 +; SPE-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r27, 8(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r29, r4 +; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: mr r30, r5 +; SPE-NEXT: bl sqrtf +; SPE-NEXT: mr r27, r3 +; SPE-NEXT: mr r3, r30 +; SPE-NEXT: bl sqrtf +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: mr r3, r29 +; SPE-NEXT: bl sqrtf +; SPE-NEXT: mr r29, r3 +; SPE-NEXT: mr r3, r28 +; SPE-NEXT: bl sqrtf +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: mr r5, r30 +; SPE-NEXT: mr r6, r27 +; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r27, 8(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 84(r1) +; SPE-NEXT: addi r1, r1, 80 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( <4 x float> %vf1, metadata !"round.dynamic", @@ -925,6 +2071,52 @@ ; NOVSX-NEXT: fsqrt f2, f2 ; NOVSX-NEXT: fsqrt f1, f1 ; NOVSX-NEXT: blr +; +; SPE-LABEL: fsqrt_v2f64: +; SPE: # %bb.0: +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r28, -16 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r28, -48 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -32 +; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill +; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill +; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill +; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo r29, r7, r8 +; SPE-NEXT: evmergelo r4, r5, r6 +; SPE-NEXT: mr r30, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl sqrt +; SPE-NEXT: evmergelo r28, r3, r4 +; SPE-NEXT: evmergehi r3, r29, r29 +; SPE-NEXT: mr r4, r29 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl sqrt +; SPE-NEXT: li r5, 8 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstddx r3, r30, r5 +; SPE-NEXT: evstdd r28, 0(r30) +; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload +; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload +; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( <2 x double> %vf1, metadata !"round.dynamic",