Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -51,6 +51,9 @@ /// FSEL, + /// XSMAXCDP, XSMINCDP - C-type min/max instructions. + XSMAXCDP, XSMINCDP, + /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -548,6 +548,13 @@ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + } + if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. @@ -1294,6 +1301,8 @@ switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; + case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP"; + case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; @@ -7188,17 +7197,15 @@ !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; + bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath; + bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath; // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. - if (!DAG.getTarget().Options.NoInfsFPMath || - !DAG.getTarget().Options.NoNaNsFPMath) + // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the + // presence of infinities. + if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs)) return Op; - // TODO: Propagate flags from the select rather than global settings. - SDNodeFlags Flags; - Flags.setNoInfs(true); - Flags.setNoNaNs(true); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); @@ -7207,6 +7214,27 @@ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) { + switch (CC) { + default: + // Not a min/max but with finite math, we may still be able to use fsel. + if (HasNoInfs && HasNoNaNs) + break; + return Op; + case ISD::SETOGT: + case ISD::SETGT: + return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS); + case ISD::SETOLT: + case ISD::SETLT: + return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS); + } + } + + // TODO: Propagate flags from the select rather than global settings. + SDNodeFlags Flags; + Flags.setNoInfs(true); + Flags.setNoNaNs(true); + // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -117,6 +117,10 @@ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2> ]>; +def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -165,7 +169,8 @@ // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; - +def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>; +def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>; def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1255,6 +1255,55 @@ } // AddedComplexity } // HasVSX +def FpMinMax { + dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $B, VSFRC)), + VSSRC); + dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $B, VSFRC)), + VSSRC); +} + +let AddedComplexity = 400, Predicates = [HasVSX] in { + // f32 Min. + def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)), + (f32 FpMinMax.F32Min)>; + def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)), + (f32 FpMinMax.F32Min)>; + def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Min)>; + def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Min)>; + // F32 Max. + def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)), + (f32 FpMinMax.F32Max)>; + def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)), + (f32 FpMinMax.F32Max)>; + def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Max)>; + def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Max)>; + + // f64 Min. + def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)), + (f64 (XSMINDP $A, $B))>; + def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)), + (f64 (XSMINDP $A, $B))>; + def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))), + (f64 (XSMINDP $A, $B))>; + def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), + (f64 (XSMINDP $A, $B))>; + // f64 Max. + def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)), + (f64 (XSMAXDP $A, $B))>; + def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)), + (f64 (XSMAXDP $A, $B))>; + def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))), + (f64 (XSMAXDP $A, $B))>; + def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), + (f64 (XSMAXDP $A, $B))>; +} + def ScalarLoads { dag Li8 = (i32 (extloadi8 xoaddr:$src)); dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); @@ -2884,13 +2933,14 @@ //===--------------------------------------------------------------------===// // Maximum/Minimum Type-C/Type-J DP - // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT - def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc, - IIC_VecFP, []>; + def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc, + IIC_VecFP, + [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>; def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; - def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc, - IIC_VecFP, []>; + def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc, + IIC_VecFP, + [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>; def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; @@ -3697,6 +3747,15 @@ def : Pat<(f128 (fpextend f32:$src)), (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)), + (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC), + (COPY_TO_REGCLASS $XB, VSSRC)), + VSSRC))>; + def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)), + (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC), + (COPY_TO_REGCLASS $XB, VSSRC)), + VSSRC))>; + } // end HasP9Vector, AddedComplexity let AddedComplexity = 400 in { Index: test/CodeGen/PowerPC/ctr-minmaxnum.ll =================================================================== --- test/CodeGen/PowerPC/ctr-minmaxnum.ll +++ test/CodeGen/PowerPC/ctr-minmaxnum.ll @@ -36,8 +36,8 @@ ; CHECK-LABEL: test1: ; CHECK-NOT: mtctr -; CHECK: bl fminf -; CHECK-NOT: bl fminf +; CHECK: xsmindp +; CHECK-NOT: xsmindp ; CHECK-NOT: mtctr ; CHECK: blr @@ -59,9 +59,9 @@ ; CHECK-LABEL: test1v: ; CHECK: xvminsp -; CHECK-NOT: bl fminf +; CHECK-NOT: xsmindp ; CHECK: mtctr -; CHECK-NOT: bl fminf +; CHECK-NOT: xsmindp ; CHECK: blr ; QPX-LABEL: test1v: @@ -87,8 +87,8 @@ ; CHECK-LABEL: test1a: ; CHECK-NOT: mtctr -; CHECK: bl fminf -; CHECK-NOT: bl fminf +; CHECK: xsmindp +; CHECK-NOT: xsmindp ; CHECK-NOT: mtctr ; CHECK: blr @@ -110,8 +110,8 @@ ; CHECK-LABEL: test2: ; CHECK-NOT: mtctr -; CHECK: bl fmaxf -; CHECK-NOT: bl fmaxf +; CHECK: xsmaxdp +; CHECK-NOT: xsmaxdp ; CHECK-NOT: mtctr ; CHECK: blr @@ -134,9 +134,9 @@ ; CHECK-LABEL: test2v: ; CHECK: xvmaxdp ; CHECK: xvmaxdp -; CHECK-NOT: bl fmax +; CHECK-NOT: xsmaxdp ; CHECK: mtctr -; CHECK-NOT: bl fmax +; CHECK-NOT: xsmaxdp ; CHECK: blr ; QPX-LABEL: test2v: @@ -162,8 +162,8 @@ ; CHECK-LABEL: test2a: ; CHECK-NOT: mtctr -; CHECK: bl fmaxf -; CHECK-NOT: bl fmaxf +; CHECK: xsmaxdp +; CHECK-NOT: xsmaxdp ; CHECK-NOT: mtctr ; CHECK: blr @@ -185,8 +185,8 @@ ; CHECK-LABEL: test3: ; CHECK-NOT: mtctr -; CHECK: bl fmin -; CHECK-NOT: bl fmin +; CHECK: xsmindp +; CHECK-NOT: xsmindp ; CHECK-NOT: mtctr ; CHECK: blr @@ -208,8 +208,8 @@ ; CHECK-LABEL: test3a: ; CHECK-NOT: mtctr -; CHECK: bl fmin -; CHECK-NOT: bl fmin +; CHECK: xsmindp +; CHECK-NOT: xsmindp ; CHECK-NOT: mtctr ; CHECK: blr @@ -231,8 +231,8 @@ ; CHECK-LABEL: test4: ; CHECK-NOT: mtctr -; CHECK: bl fmax -; CHECK-NOT: bl fmax +; CHECK: xsmaxdp +; CHECK-NOT: xsmaxdp ; CHECK-NOT: mtctr ; CHECK: blr @@ -254,8 +254,8 @@ ; CHECK-LABEL: test4a: ; CHECK-NOT: mtctr -; CHECK: bl fmax -; CHECK-NOT: bl fmax +; CHECK: xsmaxdp +; CHECK-NOT: xsmaxdp ; CHECK-NOT: mtctr ; CHECK: blr Index: test/CodeGen/PowerPC/scalar-min-max.ll =================================================================== --- test/CodeGen/PowerPC/scalar-min-max.ll +++ test/CodeGen/PowerPC/scalar-min-max.ll @@ -0,0 +1,203 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ +; RUN: --enable-no-nans-fp-math \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ +; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ +; RUN: --enable-no-nans-fp-math \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=NO-FAST-P9 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=NO-FAST-P8 +define dso_local float @testfmax(float %a, float %b) local_unnamed_addr { +; CHECK-LABEL: testfmax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmax: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmax: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp ogt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + +define dso_local double @testdmax(double %a, double %b) local_unnamed_addr { +; CHECK-LABEL: testdmax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmax: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmax: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp ogt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +} + +define dso_local float @testfmin(float %a, float %b) local_unnamed_addr { +; CHECK-LABEL: testfmin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmin: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmincdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmin: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: bltlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp olt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + +define dso_local double @testdmin(double %a, double %b) local_unnamed_addr { +; CHECK-LABEL: testdmin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmin: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmincdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmin: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bltlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp olt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +} + +define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr { +; CHECK-LABEL: testfmax_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmax_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmax_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp fast ogt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} +define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr { +; CHECK-LABEL: testdmax_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmax_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmax_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp fast ogt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +} +define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr { +; CHECK-LABEL: testfmin_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmin_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmincdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmin_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: bltlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp fast olt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} +define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr { +; CHECK-LABEL: testdmin_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmin_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmincdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmin_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bltlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %cmp = fcmp fast olt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +}