diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -291,11 +291,18 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); + + if (Subtarget.hasFSQRT()) { + setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); + } // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -933,11 +940,19 @@ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } @@ -1001,6 +1016,8 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal); } setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::BSWAP, MVT::v8i16, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2576,10 +2576,10 @@ defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), "fsqrt", "$frD, $frB", IIC_FPSqrtD, - [(set f64:$frD, (fsqrt f64:$frB))]>; + [(set f64:$frD, (any_fsqrt f64:$frB))]>; defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), "fsqrts", "$frD, $frB", IIC_FPSqrtS, - [(set f32:$frD, (fsqrt f32:$frB))]>; + [(set f32:$frD, (any_fsqrt f32:$frB))]>; } } } @@ -3001,40 +3001,40 @@ defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, - [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; + [(set f64:$FRT, (any_fma f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FMADDS : AForm_1r<59, 29, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; + [(set f32:$FRT, (any_fma f32:$FRA, f32:$FRC, f32:$FRB))]>; defm FMSUB : AForm_1r<63, 28, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, - (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; + (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; defm FMSUBS : AForm_1r<59, 28, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, - (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; + (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; defm FNMADD : AForm_1r<63, 31, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, - (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; + (fneg (any_fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; defm FNMADDS : AForm_1r<59, 31, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, - (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; + (fneg (any_fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; defm FNMSUB : AForm_1r<63, 30, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, - [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, + [(set f64:$FRT, (fneg (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; defm FNMSUBS : AForm_1r<59, 30, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, + [(set f32:$FRT, (fneg (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; } // isCommutable } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -409,7 +409,7 @@ def XSMADDADP : XX3Form<60, 33, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, + [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -425,7 +425,7 @@ def XSMSUBADP : XX3Form<60, 49, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, + [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -441,7 +441,7 @@ def XSNMADDADP : XX3Form<60, 161, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, + [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -457,7 +457,7 @@ def XSNMSUBADP : XX3Form<60, 177, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, + [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -473,7 +473,7 @@ def XVMADDADP : XX3Form<60, 97, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, + [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -489,7 +489,7 @@ def XVMADDASP : XX3Form<60, 65, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, + [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -505,7 +505,7 @@ def XVMSUBADP : XX3Form<60, 113, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, + [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -521,7 +521,7 @@ def XVMSUBASP : XX3Form<60, 81, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, + [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -537,7 +537,7 @@ def XVNMADDADP : XX3Form<60, 225, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, + [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -569,7 +569,7 @@ def XVNMSUBADP : XX3Form<60, 241, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, + [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -585,7 +585,7 @@ def XVNMSUBASP : XX3Form<60, 209, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, + [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -604,7 +604,7 @@ def XSSQRTDP : XX2Form<60, 75, (outs vsfrc:$XT), (ins vsfrc:$XB), "xssqrtdp $XT, $XB", IIC_FPSqrtD, - [(set f64:$XT, (fsqrt f64:$XB))]>; + [(set f64:$XT, (any_fsqrt f64:$XB))]>; def XSREDP : XX2Form<60, 90, (outs vsfrc:$XT), (ins vsfrc:$XB), @@ -634,11 +634,11 @@ def XVSQRTDP : XX2Form<60, 203, (outs vsrc:$XT), (ins vsrc:$XB), "xvsqrtdp $XT, $XB", IIC_FPSqrtD, - [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; + [(set v2f64:$XT, (any_fsqrt v2f64:$XB))]>; def XVSQRTSP : XX2Form<60, 139, (outs vsrc:$XT), (ins vsrc:$XB), "xvsqrtsp $XT, $XB", IIC_FPSqrtS, - [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; + [(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>; def XVTDIVDP : XX3Form_1<60, 125, (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), @@ -1157,7 +1157,6 @@ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsdivsp $XT, $XA, $XB", IIC_FPDivS, [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>; - } // mayRaiseFPException def XSRESP : XX2Form<60, 26, (outs vssrc:$XT), (ins vssrc:$XB), @@ -1171,7 +1170,7 @@ def XSSQRTSP : XX2Form<60, 11, (outs vssrc:$XT), (ins vssrc:$XB), "xssqrtsp $XT, $XB", IIC_FPSqrtS, - [(set f32:$XT, (fsqrt f32:$XB))]>; + [(set f32:$XT, (any_fsqrt f32:$XB))]>; def XSRSQRTESP : XX2Form<60, 10, (outs vssrc:$XT), (ins vssrc:$XB), "xsrsqrtesp $XT, $XB", IIC_VecFP, @@ -1184,7 +1183,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, + [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; // FIXME: Setting the hasSideEffects flag here to match current behaviour. @@ -1203,7 +1202,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fma f32:$XA, f32:$XB, + [(set f32:$XT, (any_fma f32:$XA, f32:$XB, (fneg f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -1223,7 +1222,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB, f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -1243,7 +1242,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB, (fneg f32:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -1272,6 +1271,7 @@ "xscvdpspn $XT, $XB", IIC_VecFP, []>; def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // mayRaiseFPException let Predicates = [HasVSX, HasDirectMove] in { // VSX direct move instructions @@ -1367,27 +1367,25 @@ [(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>; def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", [(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>; - } // mayRaiseFPException // Square-Root def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", - [(set f128:$vT, (fsqrt f128:$vB))]>; + [(set f128:$vT, (any_fsqrt f128:$vB))]>; // (Negative) Multiply-{Add/Subtract} def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", [(set f128:$vT, - (fma f128:$vA, f128:$vB, - f128:$vTi))]>; + (any_fma f128:$vA, f128:$vB, f128:$vTi))]>; def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , [(set f128:$vT, - (fma f128:$vA, f128:$vB, - (fneg f128:$vTi)))]>; + (any_fma f128:$vA, f128:$vB, + (fneg f128:$vTi)))]>; def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", [(set f128:$vT, - (fneg (fma f128:$vA, f128:$vB, - f128:$vTi)))]>; + (fneg (any_fma f128:$vA, f128:$vB, + f128:$vTi)))]>; def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", [(set f128:$vT, - (fneg (fma f128:$vA, f128:$vB, - (fneg f128:$vTi))))]>; + (fneg (any_fma f128:$vA, f128:$vB, + (fneg f128:$vTi))))]>; let isCommutable = 1 in { def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", @@ -1429,6 +1427,7 @@ [(set f128:$vT, (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; + } // mayRaiseFPException // FIXME: Setting the hasSideEffects flag here to match current behaviour. // QP Compare Ordered/Unordered @@ -2594,13 +2593,13 @@ def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), (XXSEL $vC, $vB, $vA)>; -def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), +def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)), (v4f32 (XVMAXSP $src1, $src2))>; -def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), +def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)), (v4f32 (XVMINSP $src1, $src2))>; -def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), +def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)), (v2f64 (XVMAXDP $src1, $src2))>; -def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), +def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)), (v2f64 (XVMINDP $src1, $src2))>; // f32 Min. diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll @@ -6,6 +6,9 @@ declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) + define fp128 @fadd_f128(fp128 %f1, fp128 %f2) { ; CHECK-LABEL: fadd_f128: ; CHECK: # %bb.0: @@ -53,3 +56,72 @@ metadata !"fpexcept.strict") ret fp128 %res } + +define fp128 @fmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fmadd_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmaddqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} + +define fp128 @fmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fmsub_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmsubqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %neg = fneg fp128 %f2 + %res = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} + +define fp128 @fnmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fnmadd_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsnmaddqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %fma = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg fp128 %fma + ret fp128 %res +} + +define fp128 @fnmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fnmsub_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsnmsubqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %neg = fneg fp128 %f2 + %fma = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg fp128 %fma + ret fp128 %res +} + + +define fp128 @fsqrt_f128(fp128 %f1) { +; CHECK-LABEL: fsqrt_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xssqrtqp v2, v2 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.sqrt.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll b/llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s + +declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata) + +define <4 x float> @fmaxnum_v4f32(<4 x float> %vf0, <4 x float> %vf1) { +; CHECK-LABEL: fmaxnum_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmaxsp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32( + <4 x float> %vf0, <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fmaxnum_v2f64(<2 x double> %vf0, <2 x double> %vf1) { +; CHECK-LABEL: fmaxnum_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmaxdp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( + <2 x double> %vf0, <2 x double> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res +} + + +define <4 x float> @fminnum_v4f32(<4 x float> %vf0, <4 x float> %vf1) { +; CHECK-LABEL: fminnum_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvminsp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.minnum.v4f32( + <4 x float> %vf0, <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fminnum_v2f64(<2 x double> %vf0, <2 x double> %vf1) { +; CHECK-LABEL: fminnum_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmindp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( + <2 x double> %vf0, <2 x double> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll @@ -23,6 +23,16 @@ declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) + define float @fadd_f32(float %f1, float %f2) { ; CHECK-LABEL: fadd_f32: ; CHECK: # %bb.0: @@ -411,3 +421,513 @@ metadata !"fpexcept.strict") ret double %add } + +define float @fmadd_f32(float %f0, float %f1, float %f2) { +; CHECK-LABEL: fmadd_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmaddasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmadd_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmadds f1, f1, f2, f3 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define double @fmadd_f64(double %f0, double %f1, double %f2) { +; CHECK-LABEL: fmadd_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmaddadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmadd_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmadd f1, f1, f2, f3 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fmadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmaddasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmadd_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: addi r4, r1, -48 +; NOVSX-NEXT: stvx v4, 0, r3 +; NOVSX-NEXT: addi r3, r1, -64 +; NOVSX-NEXT: stvx v3, 0, r4 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: lfs f1, -36(r1) +; NOVSX-NEXT: lfs f2, -52(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: lfs f1, -40(r1) +; NOVSX-NEXT: lfs f2, -56(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: lfs f1, -44(r1) +; NOVSX-NEXT: lfs f2, -60(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: lfs f1, -48(r1) +; NOVSX-NEXT: lfs f2, -64(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fmadd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmaddadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmadd_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmadd f2, f2, f4, f6 +; NOVSX-NEXT: fmadd f1, f1, f3, f5 +; NOVSX-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define float @fmsub_f32(float %f0, float %f1, float %f2) { +; CHECK-LABEL: fmsub_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmsubasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmsub_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmsubs f1, f1, f2, f3 +; NOVSX-NEXT: blr + %neg = fneg float %f2 + %res = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define double @fmsub_f64(double %f0, double %f1, double %f2) { +; CHECK-LABEL: fmsub_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmsubadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmsub_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmsub f1, f1, f2, f3 +; NOVSX-NEXT: blr + %neg = fneg double %f2 + %res = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fmsub_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmsubasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmsub_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: vspltisb v5, -1 +; NOVSX-NEXT: addi r3, r1, -48 +; NOVSX-NEXT: addi r4, r1, -64 +; NOVSX-NEXT: stvx v3, 0, r3 +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r4 +; NOVSX-NEXT: vslw v5, v5, v5 +; NOVSX-NEXT: vsubfp v4, v5, v4 +; NOVSX-NEXT: stvx v4, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -36(r1) +; NOVSX-NEXT: lfs f1, -52(r1) +; NOVSX-NEXT: lfs f2, -20(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: lfs f1, -56(r1) +; NOVSX-NEXT: lfs f2, -24(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -40(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: lfs f1, -60(r1) +; NOVSX-NEXT: lfs f2, -28(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -44(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: lfs f1, -64(r1) +; NOVSX-NEXT: lfs f2, -32(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -48(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %neg = fneg <4 x float> %vf2 + %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fmsub_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmsubadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmsub_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmsub f2, f2, f4, f6 +; NOVSX-NEXT: fmsub f1, f1, f3, f5 +; NOVSX-NEXT: blr + %neg = fneg <2 x double> %vf2 + %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define float @fnmadd_f32(float %f0, float %f1, float %f2) { +; CHECK-LABEL: fnmadd_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsnmaddasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmadd_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fnmadds f1, f1, f2, f3 +; NOVSX-NEXT: blr + %fma = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg float %fma + ret float %res +} + +define double @fnmadd_f64(double %f0, double %f1, double %f2) { +; CHECK-LABEL: fnmadd_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsnmaddadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmadd_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fnmadd f1, f1, f2, f3 +; NOVSX-NEXT: blr + %fma = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg double %fma + ret double %res +} + +define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fnmadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmaddasp v4, v2, v3 +; CHECK-NEXT: xvnegsp v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmadd_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: addi r4, r1, -48 +; NOVSX-NEXT: stvx v4, 0, r3 +; NOVSX-NEXT: addi r3, r1, -64 +; NOVSX-NEXT: stvx v3, 0, r4 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: vspltisb v2, -1 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: lfs f1, -36(r1) +; NOVSX-NEXT: lfs f2, -52(r1) +; NOVSX-NEXT: vslw v2, v2, v2 +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: lfs f1, -40(r1) +; NOVSX-NEXT: lfs f2, -56(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: lfs f1, -44(r1) +; NOVSX-NEXT: lfs f2, -60(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: lfs f1, -48(r1) +; NOVSX-NEXT: lfs f2, -64(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fmadds f0, f2, f1, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v3, 0, r3 +; NOVSX-NEXT: vsubfp v2, v2, v3 +; NOVSX-NEXT: blr + %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg <4 x float> %fma + ret <4 x float> %res +} + +define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fnmadd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvnmaddadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmadd_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fnmadd f2, f2, f4, f6 +; NOVSX-NEXT: fnmadd f1, f1, f3, f5 +; NOVSX-NEXT: blr + %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg <2 x double> %fma + ret <2 x double> %res +} + +define float @fnmsub_f32(float %f0, float %f1, float %f2) { +; CHECK-LABEL: fnmsub_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsnmsubasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmsub_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fnmsubs f1, f1, f2, f3 +; NOVSX-NEXT: blr + %neg = fneg float %f2 + %fma = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg float %fma + ret float %res +} + +define double @fnmsub_f64(double %f0, double %f1, double %f2) { +; CHECK-LABEL: fnmsub_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsnmsubadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmsub_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fnmsub f1, f1, f2, f3 +; NOVSX-NEXT: blr + %neg = fneg double %f2 + %fma = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg double %fma + ret double %res +} + +define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fnmsub_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvnmsubasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmsub_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: vspltisb v5, -1 +; NOVSX-NEXT: addi r3, r1, -48 +; NOVSX-NEXT: addi r4, r1, -64 +; NOVSX-NEXT: stvx v3, 0, r3 +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r4 +; NOVSX-NEXT: vslw v5, v5, v5 +; NOVSX-NEXT: vsubfp v4, v5, v4 +; NOVSX-NEXT: stvx v4, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -36(r1) +; NOVSX-NEXT: lfs f1, -52(r1) +; NOVSX-NEXT: lfs f2, -20(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: lfs f1, -56(r1) +; NOVSX-NEXT: lfs f2, -24(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -40(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: lfs f1, -60(r1) +; NOVSX-NEXT: lfs f2, -28(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -44(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: lfs f1, -64(r1) +; NOVSX-NEXT: lfs f2, -32(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -48(r1) +; NOVSX-NEXT: fmadds f0, f1, f0, f2 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: vsubfp v2, v5, v2 +; NOVSX-NEXT: blr + %neg = fneg <4 x float> %vf2 + %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg <4 x float> %fma + ret <4 x float> %res +} + +define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fnmsub_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvnmsubadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fnmsub_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fnmsub f2, f2, f4, f6 +; NOVSX-NEXT: fnmsub f1, f1, f3, f5 +; NOVSX-NEXT: blr + %neg = fneg <2 x double> %vf2 + %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %res = fneg <2 x double> %fma + ret <2 x double> %res +} + +define float @fsqrt_f32(float %f1) { +; CHECK-LABEL: fsqrt_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xssqrtsp f1, f1 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsqrt_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fsqrts f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define double @fsqrt_f64(double %f1) { +; CHECK-LABEL: fsqrt_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xssqrtdp f1, f1 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsqrt_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fsqrt f1, f1 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) { +; CHECK-LABEL: fsqrt_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvsqrtsp v2, v2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsqrt_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: fsqrts f0, f0 +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fsqrts f0, f0 +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fsqrts f0, f0 +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fsqrts f0, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) { +; CHECK-LABEL: fsqrt_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvsqrtdp v2, v2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsqrt_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fsqrt f2, f2 +; NOVSX-NEXT: fsqrt f1, f1 +; NOVSX-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -1445,8 +1445,8 @@ ; PC64LE-NEXT: lxvd2x 1, 0, 4 ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvsqrtdp 34, 0 -; PC64LE-NEXT: xvsqrtdp 35, 1 +; PC64LE-NEXT: xvsqrtdp 35, 0 +; PC64LE-NEXT: xvsqrtdp 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_sqrt_v4f64: @@ -1456,9 +1456,9 @@ ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI29_1@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI29_1@toc@l -; PC64LE9-NEXT: xvsqrtdp 34, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvsqrtdp 35, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvsqrtdp 34, 0 ; PC64LE9-NEXT: blr entry: %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( @@ -5323,58 +5323,26 @@ define <2 x double> @constrained_vector_maxnum_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_maxnum_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI86_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI86_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI86_0@toc@l(3) -; PC64LE-NEXT: lfs 2, .LCPI86_1@toc@l(4) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI86_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI86_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI86_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI86_2@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI86_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI86_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvmaxdp 34, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI86_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI86_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI86_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI86_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI86_1@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI86_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI86_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI86_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI86_3@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI86_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmaxdp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( @@ -5491,41 +5459,27 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI88_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: stdu 1, -32(1) ; PC64LE-NEXT: addis 3, 2, .LCPI88_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI88_1@toc@l(4) +; PC64LE-NEXT: addis 4, 2, .LCPI88_1@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI88_0@toc@l(3) +; PC64LE-NEXT: lfs 2, .LCPI88_1@toc@l(4) ; PC64LE-NEXT: bl fmax ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI88_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI88_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI88_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI88_2@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI88_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI88_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI88_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 4, 2, .LCPI88_3@toc@ha ; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI88_2@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI88_3@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xvmaxdp 2, 2, 0 +; PC64LE-NEXT: xxswapd 0, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -5534,37 +5488,25 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI88_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI88_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI88_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI88_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI88_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI88_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI88_2@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI88_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI88_3@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI88_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI88_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI88_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI88_5@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI88_3@toc@l ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmaxdp 2, 1, 0 +; PC64LE9-NEXT: xxswapd 1, 2 ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -5579,102 +5521,42 @@ define <4 x double> @constrained_vector_maxnum_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI89_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI89_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI89_0@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI89_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI89_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI89_2@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI89_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI89_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI89_4@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI89_7@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI89_6@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_7@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI89_6@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI89_1@toc@ha +; PC64LE-NEXT: addis 5, 2, .LCPI89_2@toc@ha +; PC64LE-NEXT: addis 6, 2, .LCPI89_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI89_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI89_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: addi 3, 5, .LCPI89_2@toc@l +; PC64LE-NEXT: addi 4, 6, .LCPI89_3@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xxswapd 3, 3 +; PC64LE-NEXT: xvmaxdp 34, 1, 0 +; PC64LE-NEXT: xvmaxdp 35, 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI89_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI89_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI89_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI89_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI89_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI89_2@toc@l +; PC64LE9-NEXT: xvmaxdp 34, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_3@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI89_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI89_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI89_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_5@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI89_6@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI89_6@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI89_7@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_7@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI89_3@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmaxdp 35, 1, 0 ; PC64LE9-NEXT: blr entry: %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( @@ -5732,58 +5614,26 @@ define <2 x double> @constrained_vector_minnum_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_minnum_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI91_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI91_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI91_0@toc@l(3) -; PC64LE-NEXT: lfs 2, .LCPI91_1@toc@l(4) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI91_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI91_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI91_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI91_2@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI91_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI91_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvmindp 34, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI91_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI91_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI91_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI91_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI91_1@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI91_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI91_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI91_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI91_3@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI91_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmindp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( @@ -5900,41 +5750,27 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI93_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: stdu 1, -32(1) ; PC64LE-NEXT: addis 3, 2, .LCPI93_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI93_1@toc@l(4) +; PC64LE-NEXT: addis 4, 2, .LCPI93_1@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI93_0@toc@l(3) +; PC64LE-NEXT: lfs 2, .LCPI93_1@toc@l(4) ; PC64LE-NEXT: bl fmin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI93_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI93_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI93_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI93_2@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI93_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI93_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI93_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 4, 2, .LCPI93_3@toc@ha ; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI93_2@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI93_3@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xvmindp 2, 2, 0 +; PC64LE-NEXT: xxswapd 0, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -5943,37 +5779,25 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI93_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI93_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI93_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI93_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI93_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI93_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI93_2@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI93_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI93_3@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI93_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI93_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI93_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI93_5@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI93_3@toc@l ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmindp 2, 1, 0 +; PC64LE9-NEXT: xxswapd 1, 2 ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -5988,102 +5812,42 @@ define <4 x double> @constrained_vector_minnum_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_minnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI94_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI94_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI94_0@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI94_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI94_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI94_2@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI94_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI94_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI94_4@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI94_7@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI94_6@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_7@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI94_6@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI94_1@toc@ha +; PC64LE-NEXT: addis 5, 2, .LCPI94_2@toc@ha +; PC64LE-NEXT: addis 6, 2, .LCPI94_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI94_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI94_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: addi 3, 5, .LCPI94_2@toc@l +; PC64LE-NEXT: addi 4, 6, .LCPI94_3@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xxswapd 3, 3 +; PC64LE-NEXT: xvmindp 34, 1, 0 +; PC64LE-NEXT: xvmindp 35, 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI94_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI94_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI94_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI94_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI94_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI94_2@toc@l +; PC64LE9-NEXT: xvmindp 34, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_3@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI94_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI94_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI94_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_5@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI94_6@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI94_6@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI94_7@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_7@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI94_3@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmindp 35, 1, 0 ; PC64LE9-NEXT: blr entry: %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(