Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -51,6 +51,9 @@ /// FSEL, + /// XSMAXCDP, XSMINCDP - C-type min/max instructions. + XSMAXCDP, XSMINCDP, + /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -548,6 +548,13 @@ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + } + if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. @@ -1294,6 +1301,8 @@ switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; + case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCPD"; + case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; @@ -7191,8 +7200,10 @@ // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. - if (!DAG.getTarget().Options.NoInfsFPMath || - !DAG.getTarget().Options.NoNaNsFPMath) + // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the + // presence of infinities. + if (!Subtarget.isISA3_0() && (!DAG.getTarget().Options.NoInfsFPMath || + !DAG.getTarget().Options.NoNaNsFPMath)) return Op; // TODO: Propagate flags from the select rather than global settings. SDNodeFlags Flags; @@ -7207,6 +7218,16 @@ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) { + switch (CC) { + default: return Op; + case ISD::SETOGT: + return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS); + case ISD::SETOLT: + return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS); + } + } + // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -117,6 +117,10 @@ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2> ]>; +def SDT_FPMinMax : SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -165,7 +169,8 @@ // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; - +def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_FPMinMax, []>; +def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_FPMinMax, []>; def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -990,6 +990,19 @@ (and v4i32:$B, v4i32:$C))), (v4i32 (XXSEL $A, $B, $C))>; +def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)), + (f32 (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $B, VSFRC)), + VSSRC))>; +def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)), + (f32 (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $B, VSFRC)), + VSSRC))>; +def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)), + (f64 (XSMINDP $A, $B))>; +def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)), + (f64 (XSMAXDP $A, $B))>; + let Predicates = [IsBigEndian] in { def : Pat<(v2f64 (scalar_to_vector f64:$A)), (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; @@ -2884,13 +2897,14 @@ //===--------------------------------------------------------------------===// // Maximum/Minimum Type-C/Type-J DP - // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT - def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc, - IIC_VecFP, []>; + def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc, + IIC_VecFP, + [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>; def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; - def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc, - IIC_VecFP, []>; + def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc, + IIC_VecFP, + [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>; def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; @@ -3697,6 +3711,15 @@ def : Pat<(f128 (fpextend f32:$src)), (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)), + (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC), + (COPY_TO_REGCLASS $XB, VSSRC)), + VSSRC))>; + def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)), + (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC), + (COPY_TO_REGCLASS $XB, VSSRC)), + VSSRC))>; + } // end HasP9Vector, AddedComplexity let AddedComplexity = 400 in { Index: test/CodeGen/PowerPC/scalar-min-max.ll =================================================================== --- test/CodeGen/PowerPC/scalar-min-max.ll +++ test/CodeGen/PowerPC/scalar-min-max.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ +; RUN: --enable-no-nans-fp-math \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=NO-FAST-P9 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=NO-FAST-P8 +define dso_local float @testfmax(float %a, float %b) local_unnamed_addr #0 { +; CHECK-LABEL: testfmax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmax: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmax: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp ogt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + +define dso_local double @testdmax(double %a, double %b) local_unnamed_addr #0 { +; CHECK-LABEL: testdmax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmax: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmax: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp ogt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +} + +define dso_local float @testfmin(float %a, float %b) local_unnamed_addr #0 { +; CHECK-LABEL: testfmin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmin: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmincdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmin: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: bltlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp olt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + +define dso_local double @testdmin(double %a, double %b) local_unnamed_addr #0 { +; CHECK-LABEL: testdmin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmin: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmincdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmin: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bltlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp olt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +}