Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -274,11 +274,32 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); + if (Subtarget.hasFSQRT()) + setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + if (Subtarget.hasFPRND()) { + setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal); + } setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); + if (Subtarget.hasFSQRT()) + setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); + if (Subtarget.hasVSX()) + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal); + if (Subtarget.hasFPRND()) { + setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal); + } // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -332,6 +353,10 @@ setOperationAction(ISD::FROUND, MVT::f32, Legal); } + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + } + // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectively @@ -865,11 +890,29 @@ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } @@ -933,6 +976,17 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal); } setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2383,7 +2383,7 @@ def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), "ftsqrt $crD, $fB", IIC_FPCompare>; -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { let hasSideEffects = 0 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), "fctiw", "$frD, $frB", IIC_FPGeneral, @@ -2397,46 +2397,46 @@ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), "frsp", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (fpround f64:$frB))]>; + [(set f32:$frD, (any_fpround f64:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (fround f64:$frB))]>; + [(set f64:$frD, (any_fround f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (fround f32:$frB))]>; + [(set f32:$frD, (any_fround f32:$frB))]>; } let hasSideEffects = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), "frip", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (fceil f64:$frB))]>; + [(set f64:$frD, (any_fceil f64:$frB))]>; defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), "frip", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (fceil f32:$frB))]>; + [(set f32:$frD, (any_fceil f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), "friz", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (ftrunc f64:$frB))]>; + [(set f64:$frD, (any_ftrunc f64:$frB))]>; defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), "friz", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (ftrunc f32:$frB))]>; + [(set f32:$frD, (any_ftrunc f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (ffloor f64:$frB))]>; + [(set f64:$frD, (any_ffloor f64:$frB))]>; defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (ffloor f32:$frB))]>; + [(set f32:$frD, (any_ffloor f32:$frB))]>; defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), "fsqrt", "$frD, $frB", IIC_FPSqrtD, - [(set f64:$frD, (fsqrt f64:$frB))]>; + [(set f64:$frD, (any_fsqrt f64:$frB))]>; defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), "fsqrts", "$frD, $frB", IIC_FPSqrtS, - [(set f32:$frD, (fsqrt f32:$frB))]>; + [(set f32:$frD, (any_fsqrt f32:$frB))]>; } } } @@ -2861,45 +2861,45 @@ // this type. // let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations. -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { let isCommutable = 1 in { defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, - [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; + [(set f64:$FRT, (any_fma f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FMADDS : AForm_1r<59, 29, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; + [(set f32:$FRT, (any_fma f32:$FRA, f32:$FRC, f32:$FRB))]>; defm FMSUB : AForm_1r<63, 28, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, - (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; + (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; defm FMSUBS : AForm_1r<59, 28, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, - (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; + (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; defm FNMADD : AForm_1r<63, 31, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, - (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; + (fneg (any_fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; defm FNMADDS : AForm_1r<59, 31, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, - (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; + (fneg (any_fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; defm FNMSUB : AForm_1r<63, 30, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, - [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, + [(set f64:$FRT, (fneg (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; defm FNMSUBS : AForm_1r<59, 30, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, + [(set f32:$FRT, (fneg (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; } // isCommutable } @@ -3215,13 +3215,13 @@ let Predicates = [HasFPU] in { // Additional FNMSUB patterns: -a*c + b == -(a*c - b) -def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), +def : Pat<(any_fma (fneg f64:$A), f64:$C, f64:$B), (FNMSUB $A, $C, $B)>; -def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), +def : Pat<(any_fma f64:$A, (fneg f64:$C), f64:$B), (FNMSUB $A, $C, $B)>; -def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B), +def : Pat<(any_fma (fneg f32:$A), f32:$C, f32:$B), (FNMSUBS $A, $C, $B)>; -def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), +def : Pat<(any_fma f32:$A, (fneg f32:$C), f32:$B), (FNMSUBS $A, $C, $B)>; // FCOPYSIGN's operand types need not agree. Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -252,7 +252,7 @@ def XSMADDADP : XX3Form<60, 33, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, + [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -268,7 +268,7 @@ def XSMSUBADP : XX3Form<60, 49, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, + [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -284,7 +284,7 @@ def XSNMADDADP : XX3Form<60, 161, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, + [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -300,7 +300,7 @@ def XSNMSUBADP : XX3Form<60, 177, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, + [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -316,7 +316,7 @@ def XVMADDADP : XX3Form<60, 97, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, + [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -332,7 +332,7 @@ def XVMADDASP : XX3Form<60, 65, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, + [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -348,7 +348,7 @@ def XVMSUBADP : XX3Form<60, 113, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, + [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -364,7 +364,7 @@ def XVMSUBASP : XX3Form<60, 81, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, + [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -380,7 +380,7 @@ def XVNMADDADP : XX3Form<60, 225, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, + [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -396,7 +396,7 @@ def XVNMADDASP : XX3Form<60, 193, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, + [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -412,7 +412,7 @@ def XVNMSUBADP : XX3Form<60, 241, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, + [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -428,7 +428,7 @@ def XVNMSUBASP : XX3Form<60, 209, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, + [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -447,7 +447,7 @@ def XSSQRTDP : XX2Form<60, 75, (outs vsfrc:$XT), (ins vsfrc:$XB), "xssqrtdp $XT, $XB", IIC_FPSqrtD, - [(set f64:$XT, (fsqrt f64:$XB))]>; + [(set f64:$XT, (any_fsqrt f64:$XB))]>; def XSREDP : XX2Form<60, 90, (outs vsfrc:$XT), (ins vsfrc:$XB), @@ -477,11 +477,11 @@ def XVSQRTDP : XX2Form<60, 203, (outs vsrc:$XT), (ins vsrc:$XB), "xvsqrtdp $XT, $XB", IIC_FPSqrtD, - [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; + [(set v2f64:$XT, (any_fsqrt v2f64:$XB))]>; def XVSQRTSP : XX2Form<60, 139, (outs vsrc:$XT), (ins vsrc:$XB), "xvsqrtsp $XT, $XB", IIC_FPSqrtS, - [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; + [(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>; def XVTDIVDP : XX3Form_1<60, 125, (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), @@ -725,65 +725,65 @@ def XSRDPI : XX2Form<60, 73, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpi $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fround f64:$XB))]>; + [(set f64:$XT, (any_fround f64:$XB))]>; def XSRDPIC : XX2Form<60, 107, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpic $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fnearbyint f64:$XB))]>; + [(set f64:$XT, (any_fnearbyint f64:$XB))]>; def XSRDPIM : XX2Form<60, 121, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpim $XT, $XB", IIC_VecFP, - [(set f64:$XT, (ffloor f64:$XB))]>; + [(set f64:$XT, (any_ffloor f64:$XB))]>; def XSRDPIP : XX2Form<60, 105, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpip $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fceil f64:$XB))]>; + [(set f64:$XT, (any_fceil f64:$XB))]>; def XSRDPIZ : XX2Form<60, 89, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpiz $XT, $XB", IIC_VecFP, - [(set f64:$XT, (ftrunc f64:$XB))]>; + [(set f64:$XT, (any_ftrunc f64:$XB))]>; def XVRDPI : XX2Form<60, 201, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpi $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fround v2f64:$XB))]>; + [(set v2f64:$XT, (any_fround v2f64:$XB))]>; def XVRDPIC : XX2Form<60, 235, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpic $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; + [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>; def XVRDPIM : XX2Form<60, 249, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpim $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (ffloor v2f64:$XB))]>; + [(set v2f64:$XT, (any_ffloor v2f64:$XB))]>; def XVRDPIP : XX2Form<60, 233, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpip $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fceil v2f64:$XB))]>; + [(set v2f64:$XT, (any_fceil v2f64:$XB))]>; def XVRDPIZ : XX2Form<60, 217, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpiz $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; + [(set v2f64:$XT, (any_ftrunc v2f64:$XB))]>; def XVRSPI : XX2Form<60, 137, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspi $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fround v4f32:$XB))]>; + [(set v4f32:$XT, (any_fround v4f32:$XB))]>; def XVRSPIC : XX2Form<60, 171, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspic $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; + [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>; def XVRSPIM : XX2Form<60, 185, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspim $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (ffloor v4f32:$XB))]>; + [(set v4f32:$XT, (any_ffloor v4f32:$XB))]>; def XVRSPIP : XX2Form<60, 169, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspip $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fceil v4f32:$XB))]>; + [(set v4f32:$XT, (any_fceil v4f32:$XB))]>; def XVRSPIZ : XX2Form<60, 153, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspiz $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; + [(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>; // Max/Min Instructions let isCommutable = 1 in { @@ -998,19 +998,19 @@ } // Additional fnmsub patterns: -a*c + b == -(a*c - b) -def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), +def : Pat<(any_fma (fneg f64:$A), f64:$C, f64:$B), (XSNMSUBADP $B, $C, $A)>; -def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), +def : Pat<(any_fma f64:$A, (fneg f64:$C), f64:$B), (XSNMSUBADP $B, $C, $A)>; -def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), +def : Pat<(any_fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), (XVNMSUBADP $B, $C, $A)>; -def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), +def : Pat<(any_fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), (XVNMSUBADP $B, $C, $A)>; -def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), +def : Pat<(any_fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), (XVNMSUBASP $B, $C, $A)>; -def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), +def : Pat<(any_fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), (XVNMSUBASP $B, $C, $A)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), @@ -1193,13 +1193,13 @@ def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), (XXSEL $vC, $vB, $vA)>; -def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), +def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)), (v4f32 (XVMAXSP $src1, $src2))>; -def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), +def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)), (v4f32 (XVMINSP $src1, $src2))>; -def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), +def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)), (v2f64 (XVMAXDP $src1, $src2))>; -def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), +def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)), (v2f64 (XVMINDP $src1, $src2))>; let Predicates = [IsLittleEndian] in { @@ -1386,18 +1386,19 @@ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsdivsp $XT, $XA, $XB", IIC_FPDivS, [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>; - } // mayRaiseFPException + def XSRESP : XX2Form<60, 26, (outs vssrc:$XT), (ins vssrc:$XB), "xsresp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfre f32:$XB))]>; def XSRSP : XX2Form<60, 281, (outs vssrc:$XT), (ins vsfrc:$XB), - "xsrsp $XT, $XB", IIC_VecFP, []>; + "xsrsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (any_fpround f64:$XB))]>; def XSSQRTSP : XX2Form<60, 11, (outs vssrc:$XT), (ins vssrc:$XB), "xssqrtsp $XT, $XB", IIC_FPSqrtS, - [(set f32:$XT, (fsqrt f32:$XB))]>; + [(set f32:$XT, (any_fsqrt f32:$XB))]>; def XSRSQRTESP : XX2Form<60, 10, (outs vssrc:$XT), (ins vssrc:$XB), "xsrsqrtesp $XT, $XB", IIC_VecFP, @@ -1410,7 +1411,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, + [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in @@ -1428,7 +1429,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fma f32:$XA, f32:$XB, + [(set f32:$XT, (any_fma f32:$XA, f32:$XB, (fneg f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -1447,7 +1448,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB, f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -1466,7 +1467,7 @@ (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB, (fneg f32:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -1494,7 +1495,7 @@ "xscvdpspn $XT, $XB", IIC_VecFP, []>; def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; - + } // mayRaiseFPException let Predicates = [IsLittleEndian] in { def : Pat; @@ -2524,26 +2525,25 @@ [(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>; def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", [(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>; - } // Square-Root def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", - [(set f128:$vT, (fsqrt f128:$vB))]>; + [(set f128:$vT, (any_fsqrt f128:$vB))]>; // (Negative) Multiply-{Add/Subtract} def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", [(set f128:$vT, - (fma f128:$vA, f128:$vB, + (any_fma f128:$vA, f128:$vB, f128:$vTi))]>; def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , [(set f128:$vT, - (fma f128:$vA, f128:$vB, + (any_fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>; def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", [(set f128:$vT, - (fneg (fma f128:$vA, f128:$vB, + (fneg (any_fma f128:$vA, f128:$vB, f128:$vTi)))]>; def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", [(set f128:$vT, - (fneg (fma f128:$vA, f128:$vB, + (fneg (any_fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; let isCommutable = 1 in { @@ -2586,10 +2586,10 @@ [(set f128:$vT, (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; - + } // Additional fnmsub patterns: -a*c + b == -(a*c - b) - def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; - def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; + def : Pat<(any_fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; + def : Pat<(any_fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: @@ -2626,11 +2626,13 @@ // Quad-Precision Floating-Point Conversion Instructions: // Convert DP -> QP + let mayRaiseFPException = 1 in def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, - [(set f128:$vT, (fpextend f64:$vB))]>; - + [(set f128:$vT, (any_fpextend f64:$vB))]>; + let mayRaiseFPException = 1 in { // Round & Convert QP -> DP (dword[1] is set to zero) def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; + } def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", [(set f64:$vT, (int_ppc_truncf128_round_to_odd @@ -2696,24 +2698,24 @@ !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> { let RC = ex; } - + let mayRaiseFPException = 1 in { // Round to Quad-Precision Integer [with Inexact] def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; - + } // Use current rounding mode - def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; + def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; // Round to nearest, ties away from zero - def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; + def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; // Round towards Zero - def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; + def : Pat<(f128 (any_ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; // Round towards +Inf - def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; + def : Pat<(f128 (any_fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; // Round towards -Inf - def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; + def : Pat<(f128 (any_ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; // Use current rounding mode, [with Inexact] - def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; + def : Pat<(f128 (any_frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; // Round Quad-Precision to Double-Extended Precision (fp80) def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; @@ -3611,11 +3613,11 @@ (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; // Round & Convert QP -> DP/SP - def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; - def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; + def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>; + def : Pat<(f32 (any_fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; // Convert SP -> QP - def : Pat<(f128 (fpextend f32:$src)), + def : Pat<(f128 (any_fpextend f32:$src)), (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; } // end HasP9Vector, AddedComplexity @@ -3657,10 +3659,10 @@ } def DblToFlt { - dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0)))); - dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1)))); - dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); - dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); + dag A0 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 0)))); + dag A1 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 1)))); + dag B0 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 0)))); + dag B1 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 1)))); } def ExtDbl { Index: llvm/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1584,6 +1584,7 @@ ; P9BE-NEXT: lfsux f0, r3, r4 ; P9BE-NEXT: lfs f1, 12(r3) ; P9BE-NEXT: lfs f2, 4(r3) +; P9BE-NEXT: xsrsp f0, f0 ; P9BE-NEXT: xxmrghd vs1, vs2, vs1 ; P9BE-NEXT: xvcvdpsp v2, vs1 ; P9BE-NEXT: lfs f1, 8(r3) @@ -1598,6 +1599,7 @@ ; P9LE-NEXT: sldi r4, r4, 2 ; P9LE-NEXT: lfsux f0, r3, r4 ; P9LE-NEXT: lfs f1, 8(r3) +; P9LE-NEXT: xsrsp f0, f0 ; P9LE-NEXT: xxmrghd vs0, vs1, vs0 ; P9LE-NEXT: lfs f1, 12(r3) ; P9LE-NEXT: xvcvdpsp v2, vs0 @@ -1612,6 +1614,7 @@ ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r4, 2 ; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: xsrsp f0, f0 ; P8BE-NEXT: lfs f1, 12(r3) ; P8BE-NEXT: lfs f2, 4(r3) ; P8BE-NEXT: lfs f3, 8(r3) @@ -1627,6 +1630,7 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: xsrsp f0, f0 ; P8LE-NEXT: lfs f1, 8(r3) ; P8LE-NEXT: lfs f2, 4(r3) ; P8LE-NEXT: lfs f3, 12(r3) @@ -1673,6 +1677,7 @@ ; P9BE-NEXT: lfsux f0, r3, r4 ; P9BE-NEXT: lfs f1, -12(r3) ; P9BE-NEXT: lfs f2, -4(r3) +; P9BE-NEXT: xsrsp f0, f0 ; P9BE-NEXT: xxmrghd vs1, vs2, vs1 ; P9BE-NEXT: xvcvdpsp v2, vs1 ; P9BE-NEXT: lfs f1, -8(r3) @@ -1687,6 +1692,7 @@ ; P9LE-NEXT: sldi r4, r4, 2 ; P9LE-NEXT: lfsux f0, r3, r4 ; P9LE-NEXT: lfs f1, -8(r3) +; P9LE-NEXT: xsrsp f0, f0 ; P9LE-NEXT: xxmrghd vs0, vs1, vs0 ; P9LE-NEXT: lfs f1, -12(r3) ; P9LE-NEXT: xvcvdpsp v2, vs0 @@ -1701,6 +1707,7 @@ ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r4, 2 ; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: xsrsp f0, f0 ; P8BE-NEXT: lfs f1, -12(r3) ; P8BE-NEXT: lfs f2, -4(r3) ; P8BE-NEXT: lfs f3, -8(r3) @@ -1716,6 +1723,7 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: xsrsp f0, f0 ; P8LE-NEXT: lfs f1, -8(r3) ; P8LE-NEXT: lfs f2, -4(r3) ; P8LE-NEXT: lfs f3, -12(r3) @@ -3168,6 +3176,7 @@ ; P9BE-NEXT: lfsux f0, r3, r4 ; P9BE-NEXT: lfs f1, 12(r3) ; P9BE-NEXT: lfs f2, 4(r3) +; P9BE-NEXT: xsrsp f0, f0 ; P9BE-NEXT: xxmrghd vs1, vs2, vs1 ; P9BE-NEXT: xvcvdpsp v2, vs1 ; P9BE-NEXT: lfs f1, 8(r3) @@ -3182,6 +3191,7 @@ ; P9LE-NEXT: sldi r4, r4, 2 ; P9LE-NEXT: lfsux f0, r3, r4 ; P9LE-NEXT: lfs f1, 8(r3) +; P9LE-NEXT: xsrsp f0, f0 ; P9LE-NEXT: xxmrghd vs0, vs1, vs0 ; P9LE-NEXT: lfs f1, 12(r3) ; P9LE-NEXT: xvcvdpsp v2, vs0 @@ -3196,6 +3206,7 @@ ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r4, 2 ; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: xsrsp f0, f0 ; P8BE-NEXT: lfs f1, 12(r3) ; P8BE-NEXT: lfs f2, 4(r3) ; P8BE-NEXT: lfs f3, 8(r3) @@ -3211,6 +3222,7 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: xsrsp f0, f0 ; P8LE-NEXT: lfs f1, 8(r3) ; P8LE-NEXT: lfs f2, 4(r3) ; P8LE-NEXT: lfs f3, 12(r3) @@ -3257,6 +3269,7 @@ ; P9BE-NEXT: lfsux f0, r3, r4 ; P9BE-NEXT: lfs f1, -12(r3) ; P9BE-NEXT: lfs f2, -4(r3) +; P9BE-NEXT: xsrsp f0, f0 ; P9BE-NEXT: xxmrghd vs1, vs2, vs1 ; P9BE-NEXT: xvcvdpsp v2, vs1 ; P9BE-NEXT: lfs f1, -8(r3) @@ -3271,6 +3284,7 @@ ; P9LE-NEXT: sldi r4, r4, 2 ; P9LE-NEXT: lfsux f0, r3, r4 ; P9LE-NEXT: lfs f1, -8(r3) +; P9LE-NEXT: xsrsp f0, f0 ; P9LE-NEXT: xxmrghd vs0, vs1, vs0 ; P9LE-NEXT: lfs f1, -12(r3) ; P9LE-NEXT: xvcvdpsp v2, vs0 @@ -3285,6 +3299,7 @@ ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r4, 2 ; P8BE-NEXT: lfsux f0, r3, r4 +; P8BE-NEXT: xsrsp f0, f0 ; P8BE-NEXT: lfs f1, -12(r3) ; P8BE-NEXT: lfs f2, -4(r3) ; P8BE-NEXT: lfs f3, -8(r3) @@ -3300,6 +3315,7 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: lfsux f0, r3, r4 +; P8LE-NEXT: xsrsp f0, f0 ; P8LE-NEXT: lfs f1, -8(r3) ; P8LE-NEXT: lfs f2, -4(r3) ; P8LE-NEXT: lfs f3, -12(r3) Index: llvm/test/CodeGen/PowerPC/fp-strict-fceil-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fceil-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xsrqpi 1, v2, v2, 2 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.ceil.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fceil.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fceil.ll @@ -0,0 +1,62 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + + +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata) + +define float @f1(float %f1) { +; CHECK-LABEL: f1: +; CHECK: frip f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1: +; NOVSX: frip f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.ceil.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2(double %f1) { +; CHECK-LABEL: f2: +; CHECK: xsrdpip f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2: +; NOVSX: frip f1, f1 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.ceil.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3(<4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvrspip v2, v2 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvrdpip v2, v2 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-ffloor-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-ffloor-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xsrqpi 1, v2, v2, 3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.floor.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-ffloor.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-ffloor.ll @@ -0,0 +1,62 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + + +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata) + +define float @f1(float %f1) { +; CHECK-LABEL: f1: +; CHECK: frim f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1: +; NOVSX: frim f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.floor.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2(double %f1) { +; CHECK-LABEL: f2: +; CHECK: xsrdpim f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2: +; NOVSX: frim f1, f1 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.floor.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3(<4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvrspim v2, v2 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvrdpim v2, v2 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-fma-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fma-f128.ll @@ -0,0 +1,57 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata) + + +define fp128 @f4(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: f4: +; CHECK: xsmaddqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} + + +define fp128 @f4_1(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: f4_1: +; CHECK: xsmsubqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %f2_1 = fneg fp128 %f2 + %res = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %f2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} + +define fp128 @f4_2(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: f4_2: +; CHECK: xsnmaddqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %f3 = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %f4 = fneg fp128 %f3 + ret fp128 %f4; +} + +define fp128 @f4_3(fp128 %f0, fp128 %f1, fp128 %f2) { +; CHECK-LABEL: f4_3: +; CHECK: xsnmsubqp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %f2_1 = fneg fp128 %f2 + %f3 = call fp128 @llvm.experimental.constrained.fma.f128( + fp128 %f0, fp128 %f1, fp128 %f2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %f4 = fneg fp128 %f3 + ret fp128 %f4; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fma.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fma.ll @@ -0,0 +1,248 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) + +define float @f1(float %f0, float %f1, float %f2) { +; CHECK-LABEL: f1: +; CHECK: xsmaddasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1: +; NOVSX: fmadds f1, f1, f2, f3 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2(double %f0, double %f1, double %f2) { +; CHECK-LABEL: f2: +; CHECK: xsmaddadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2: +; NOVSX: fmadd f1, f1, f2, f3 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: f3: +; CHECK: xvmaddasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: f4: +; CHECK: xvmaddadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + +define float @f1_1(float %f0, float %f1, float %f2) { +; CHECK-LABEL: f1_1: +; CHECK: xsmsubasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1_1: +; NOVSX: fmsubs f1, f1, f2, f3 +; NOVSX-NEXT: blr + %f2_1 = fneg float %f2 + %res = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %f2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2_1(double %f0, double %f1, double %f2) { +; CHECK-LABEL: f2_1: +; CHECK: xsmsubadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2_1: +; NOVSX: fmsub f1, f1, f2, f3 +; NOVSX-NEXT: blr + %f2_1 = fneg double %f2 + %res = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %f2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3_1(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: f3_1: +; CHECK: xvmsubasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %vf2_1 = fneg <4 x float> %vf2 + %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4_1(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: f4_1: +; CHECK: xvmsubadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %vf2_1 = fneg <2 x double> %vf2 + %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + +define float @f1_2(float %f0, float %f1, float %f2) { +; CHECK-LABEL: f1_2: +; CHECK: xsnmaddasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1_2: +; NOVSX: fnmadds f1, f1, f2, f3 +; NOVSX-NEXT: blr + %f3 = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %f4 = fneg float %f3 + ret float %f4 +} + +define double @f2_2(double %f0, double %f1, double %f2) { +; CHECK-LABEL: f2_2: +; CHECK: xsnmaddadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2_2: +; NOVSX: fnmadd f1, f1, f2, f3 +; NOVSX-NEXT: blr + %f3 = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %f4 = fneg double %f3 + ret double %f4; +} + +define <4 x float> @f3_2(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: f3_2: +; CHECK: xvnmaddasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %vf3 = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %vf4 = fneg <4 x float> %vf3 + ret <4 x float> %vf4; +} + +define <2 x double> @f4_2(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: f4_2: +; CHECK: xvnmaddadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %vf3 = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %vf4 = fneg <2 x double> %vf3 + ret <2 x double> %vf4; +} + +define float @f1_3(float %f0, float %f1, float %f2) { +; CHECK-LABEL: f1_3: +; CHECK: xsnmsubasp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1_3: +; NOVSX: fnmsubs f1, f1, f2, f3 +; NOVSX-NEXT: blr + %f2_1 = fneg float %f2 + %f3 = call float @llvm.experimental.constrained.fma.f32( + float %f0, float %f1, float %f2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %f4 = fneg float %f3 + ret float %f4 +} + +define double @f2_3(double %f0, double %f1, double %f2) { +; CHECK-LABEL: f2_3: +; CHECK: xsnmsubadp f3, f1, f2 +; CHECK-NEXT: fmr f1, f3 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2_3: +; NOVSX: fnmsub f1, f1, f2, f3 +; NOVSX-NEXT: blr + %f2_1 = fneg double %f2 + %f3 = call double @llvm.experimental.constrained.fma.f64( + double %f0, double %f1, double %f2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %f4 = fneg double %f3 + ret double %f4; +} + +define <4 x float> @f3_3(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: f3_3: +; CHECK: xvnmsubasp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %vf2_1 = fneg <4 x float> %vf2 + %vf3 = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %vf4 = fneg <4 x float> %vf3 + ret <4 x float> %vf4; +} + +define <2 x double> @f4_3(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: f4_3: +; CHECK: xvnmsubadp v4, v2, v3 +; CHECK-NEXT: vmr v2, v4 +; CHECK-NEXT: blr + %vf2_1 = fneg <2 x double> %vf2 + %vf3 = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2_1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %vf4 = fneg <2 x double> %vf3 + ret <2 x double> %vf4; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fmaxnum.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fmaxnum.ll @@ -0,0 +1,29 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s + + +declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +define <4 x float> @f3(<4 x float> %vf0, <4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvmaxsp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32( + <4 x float> %vf0, <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf0, <2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvmaxdp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( + <2 x double> %vf0, <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-fminnum.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fminnum.ll @@ -0,0 +1,29 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s + + +declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +define <4 x float> @f3(<4 x float> %vf0, <4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvminsp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.minnum.v4f32( + <4 x float> %vf0, <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf0, <2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvmindp v2, v2, v3 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( + <2 x double> %vf0, <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-fnearbyint-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fnearbyint-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xsrqpi 0, v2, v2, 3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.nearbyint.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fnearbyint.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fnearbyint.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s + +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) + + +define double @f2(double %f1, double %f2) { +; CHECK-LABEL: f2: +; CHECK: xsrdpic f1, f1 +; CHECK-NEXT: blr + + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @f3(<4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: f3: +; CHECK: xvrspic v2, v2 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @f4(<2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: f4: +; CHECK: xvrdpic v2, v2 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-fpext-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fpext-f128.ll @@ -0,0 +1,27 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + +declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) + +define fp128 @f1(float %f1) { +; CHECK-LABEL: f1: +; CHECK: xscpsgndp v2, f1, f1 +; CHECK-NEXT: xscvdpqp v2, v2 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32( + float %f1, + metadata !"fpexcept.strict") + ret fp128 %res; +} + +define fp128 @f2(double %f1) { +; CHECK-LABEL: f2: +; CHECK: xscpsgndp v2, f1, f1 +; CHECK-NEXT: xscvdpqp v2, v2 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64( + double %f1, + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fpext.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fpext.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s -check-prefix=PWR9 + + +declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) + +define <4 x double> @f1(<4 x float> %vf1) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-NEXT: xxswapd vs3, v2 +; CHECK-NEXT: xscvspdpn f2, v2 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvspdpn f1, vs1 +; CHECK-NEXT: xscvspdpn f3, vs3 +; CHECK-NEXT: xxmrghd v2, vs2, vs0 +; CHECK-NEXT: xxmrghd v3, vs3, vs1 +; CHECK-NEXT: blr +; +; PWR9-LABEL: f1: +; PWR9: # %bb.0: +; PWR9-NEXT: xxsldwi vs0, v2, v2, 3 +; PWR9-NEXT: xxswapd vs1, v2 +; PWR9-NEXT: xscvspdpn f0, vs0 +; PWR9-NEXT: xscvspdpn f1, vs1 +; PWR9-NEXT: xxsldwi vs2, v2, v2, 1 +; PWR9-NEXT: xscvspdpn f2, vs2 +; PWR9-NEXT: xxmrghd vs0, vs1, vs0 +; PWR9-NEXT: xscvspdpn f1, v2 +; PWR9-NEXT: xxmrghd v3, vs1, vs2 +; PWR9-NEXT: xxlor v2, vs0, vs0 +; PWR9-NEXT: blr + + %res = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( + <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x double> %res; +} + +define <2 x double> @f2(<2 x float> %vf1) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-NEXT: blr +; +; PWR9-LABEL: f2: +; PWR9: # %bb.0: +; PWR9-NEXT: xxsldwi vs0, v2, v2, 3 +; PWR9-NEXT: xxswapd vs1, v2 +; PWR9-NEXT: xscvspdpn f0, vs0 +; PWR9-NEXT: xscvspdpn f1, vs1 +; PWR9-NEXT: xxmrghd v2, vs1, vs0 +; PWR9-NEXT: blr + + %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( + <2 x float> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + + Index: llvm/test/CodeGen/PowerPC/fp-strict-fpround-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fpround-f128.ll @@ -0,0 +1,27 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + +declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) + +define double @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xscvqpdp v2, v2 +; CHECK-NEXT: xscpsgndp f1, v2, v2 +; CHECK-NEXT: blr + %res = call double @llvm.experimental.constrained.fptrunc.f64.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define float @f2(fp128 %f1) { +; CHECK-LABEL: f2: +; CHECK: xscvqpdpo v2, v2 +; CHECK-NEXT: xsrsp f1, v2 + %res = call float @llvm.experimental.constrained.fptrunc.f32.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fpround.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fpround.ll @@ -0,0 +1,74 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s -check-prefix=PWR9 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + + +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) + +define float @f(double %f1) { +; CHECK-LABEL: f: +; CHECK: xsrsp f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f: +; NOVSX: frsp f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.fptrunc.f32.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define <4 x float> @f1(<4 x double> %vf1) { +; CHECK-LABEL: f1: +; CHECK: xxmrgld vs0, v2, v3 +; CHECK-NEXT: xxmrghd vs1, v2, v3 +; CHECK-NEXT: xvcvdpsp v2, vs0 +; CHECK-NEXT: xvcvdpsp v3, vs1 +; CHECK-NEXT: vmrgew v2, v3, v2 +; CHECK-NEXT: blr + +; PWR9-LABEL: f1: +; PWR9: xxmrgld vs0, v3, v2 +; PWR9-NEXT: xvcvdpsp v4, vs0 +; PWR9-NEXT: xxmrghd vs0, v3, v2 +; PWR9-NEXT: xvcvdpsp v2, vs0 +; PWR9-NEXT: vmrgew v2, v2, v4 +; PWR9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( + <4 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x float> @f2(<2 x double> %vf1) { +; CHECK-LABEL: f2: +; CHECK: xxswapd vs0, v2 +; CHECK-NEXT: xsrsp f1, v2 +; CHECK-NEXT: xsrsp f0, f0 +; CHECK-NEXT: xscvdpspn v2, f1 +; CHECK-NEXT: xscvdpspn v3, f0 +; CHECK-NEXT: vmrghw v2, v2, v3 +; CHECK-NEXT: blr + +; PWR9-LABEL: f2: +; PWR9: xsrsp f0, v2 +; PWR9-NEXT: xscvdpspn vs0, f0 +; PWR9-NEXT: xxsldwi v3, vs0, vs0, 1 +; PWR9-NEXT: xxswapd vs0, v2 +; PWR9-NEXT: xsrsp f0, f0 +; PWR9-NEXT: xscvdpspn vs0, f0 +; PWR9-NEXT: xxsldwi v2, vs0, vs0, 1 +; PWR9-NEXT: vmrglw v2, v3, v2 + %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x float> %res; +} + + Index: llvm/test/CodeGen/PowerPC/fp-strict-frint-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-frint-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xsrqpix 0, v2, v2, 3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.rint.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fround-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fround-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xsrqpi 0, v2, v2, 0 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.round.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-fround.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fround.ll @@ -0,0 +1,62 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + + +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata) + +define float @f1(float %f1) { +; CHECK-LABEL: f1: +; CHECK: frin f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1: +; NOVSX: frin f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.round.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2(double %f1) { +; CHECK-LABEL: f2: +; CHECK: xsrdpi f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2: +; NOVSX: frin f1, f1 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.round.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3(<4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvrspi v2, v2 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.round.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvrdpi v2, v2 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.round.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-ftrunc-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-ftrunc-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xsrqpi 1, v2, v2, 1 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.trunc.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-ftrunc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-ftrunc.ll @@ -0,0 +1,62 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + + +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata) + +define float @f1(float %f1) { +; CHECK-LABEL: f1: +; CHECK: friz f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1: +; NOVSX: friz f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.trunc.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2(double %f1) { +; CHECK-LABEL: f2: +; CHECK: xsrdpiz f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2: +; NOVSX: friz f1, f1 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.trunc.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3(<4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvrspiz v2, v2 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvrdpiz v2, v2 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/fp-strict-sqrt-f128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-sqrt-f128.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + + + +declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) + +define fp128 @f1(fp128 %f1) { +; CHECK-LABEL: f1: +; CHECK: xssqrtqp v2, v2 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.sqrt.f128( + fp128 %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res; +} Index: llvm/test/CodeGen/PowerPC/fp-strict-sqrt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-sqrt.ll @@ -0,0 +1,62 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + + +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) + +define float @f1(float %f1) { +; CHECK-LABEL: f1: +; CHECK: xssqrtsp f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f1: +; NOVSX: fsqrts f1, f1 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @f2(double %f1) { +; CHECK-LABEL: f2: +; CHECK: xssqrtdp f1, f1 +; CHECK-NEXT: blr + +; NOVSX-LABEL: f2: +; NOVSX: fsqrt f1, f1 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.sqrt.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @f3(<4 x float> %vf1) { +; CHECK-LABEL: f3: +; CHECK: xvsqrtsp v2, v2 +; CHECK-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @f4(<2 x double> %vf1) { +; CHECK-LABEL: f4: +; CHECK: xvsqrtdp v2, v2 +; CHECK-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + Index: llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -1479,8 +1479,8 @@ ; PC64LE-NEXT: lxvd2x 1, 0, 4 ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvsqrtdp 34, 0 -; PC64LE-NEXT: xvsqrtdp 35, 1 +; PC64LE-NEXT: xvsqrtdp 35, 0 +; PC64LE-NEXT: xvsqrtdp 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_sqrt_v4f64: @@ -1490,9 +1490,9 @@ ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI29_1@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI29_1@toc@l -; PC64LE9-NEXT: xvsqrtdp 34, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvsqrtdp 35, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvsqrtdp 34, 0 ; PC64LE9-NEXT: blr entry: %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( @@ -5566,51 +5566,31 @@ define <3 x double> @constrained_vector_nearby_v3f64() { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 32 -; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop ; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) ; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xsrdpic 3, 1 ; PC64LE-NEXT: xvrdpic 2, 0 -; PC64LE-NEXT: xxswapd 0, 2 +; PC64LE-NEXT: xxswapd 1, 2 ; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 32 -; PC64LE9-NEXT: .cfi_offset lr, 16 ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l +; PC64LE9-NEXT: xsrdpic 3, 0 ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvrdpic 2, 0 -; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 2 ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( @@ -5631,8 +5611,8 @@ ; PC64LE-NEXT: lxvd2x 1, 0, 3 ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvrdpic 34, 0 -; PC64LE-NEXT: xvrdpic 35, 1 +; PC64LE-NEXT: xvrdpic 35, 0 +; PC64LE-NEXT: xvrdpic 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: @@ -5642,9 +5622,9 @@ ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l -; PC64LE9-NEXT: xvrdpic 34, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvrdpic 35, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 34, 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( @@ -6288,7 +6268,7 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE-NEXT: frsp 0, 0 +; PC64LE-NEXT: xsrsp 0, 0 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: blr @@ -6297,7 +6277,7 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: blr @@ -6316,8 +6296,8 @@ ; PC64LE-NEXT: addis 4, 2, .LCPI96_1@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI96_0@toc@l(3) ; PC64LE-NEXT: lfd 1, .LCPI96_1@toc@l(4) -; PC64LE-NEXT: frsp 0, 0 -; PC64LE-NEXT: frsp 1, 1 +; PC64LE-NEXT: xsrsp 0, 0 +; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -6330,11 +6310,11 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI96_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI96_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI96_1@toc@ha -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: lfd 0, .LCPI96_1@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 @@ -6355,12 +6335,12 @@ ; PC64LE-NEXT: lfd 0, .LCPI97_0@toc@l(3) ; PC64LE-NEXT: lfd 1, .LCPI97_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI97_3@toc@ha -; PC64LE-NEXT: frsp 0, 0 +; PC64LE-NEXT: xsrsp 0, 0 ; PC64LE-NEXT: lfd 2, .LCPI97_3@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI97_2@toc@ha -; PC64LE-NEXT: frsp 1, 1 +; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI97_2@toc@l -; PC64LE-NEXT: frsp 2, 2 +; PC64LE-NEXT: xsrsp 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -6377,20 +6357,20 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI97_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI97_1@toc@ha -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: lfd 0, .LCPI97_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI97_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI97_2@toc@l -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI97_3@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI97_3@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 @@ -6409,12 +6389,16 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI98_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI98_1@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI98_2@toc@ha -; PC64LE-NEXT: addis 6, 2, .LCPI98_3@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI98_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI98_2@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI98_1@toc@l(4) -; PC64LE-NEXT: lfd 2, .LCPI98_2@toc@l(5) -; PC64LE-NEXT: lfd 3, .LCPI98_3@toc@l(6) +; PC64LE-NEXT: addis 4, 2, .LCPI98_3@toc@ha +; PC64LE-NEXT: lfd 2, .LCPI98_2@toc@l(3) +; PC64LE-NEXT: lfd 3, .LCPI98_3@toc@l(4) +; PC64LE-NEXT: xsrsp 0, 0 +; PC64LE-NEXT: xsrsp 1, 1 +; PC64LE-NEXT: xsrsp 2, 2 +; PC64LE-NEXT: xsrsp 3, 3 ; PC64LE-NEXT: xxmrghd 0, 1, 0 ; PC64LE-NEXT: xxmrghd 1, 3, 2 ; PC64LE-NEXT: xvcvdpsp 34, 0 @@ -6429,11 +6413,15 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI98_1@toc@ha ; PC64LE9-NEXT: lfd 1, .LCPI98_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI98_2@toc@ha +; PC64LE9-NEXT: xsrsp 0, 0 +; PC64LE9-NEXT: xsrsp 1, 1 ; PC64LE9-NEXT: xxmrghd 0, 1, 0 ; PC64LE9-NEXT: xvcvdpsp 34, 0 ; PC64LE9-NEXT: lfd 0, .LCPI98_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI98_3@toc@ha ; PC64LE9-NEXT: lfd 1, .LCPI98_3@toc@l(3) +; PC64LE9-NEXT: xsrsp 0, 0 +; PC64LE9-NEXT: xsrsp 1, 1 ; PC64LE9-NEXT: xxmrghd 0, 1, 0 ; PC64LE9-NEXT: xvcvdpsp 35, 0 ; PC64LE9-NEXT: vmrgew 2, 3, 2