diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -292,18 +292,33 @@ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); + if (Subtarget.hasVSX()) + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal); if (Subtarget.hasFSQRT()) { setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); } + if (Subtarget.hasFPRND()) { + setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal); + + setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal); + } + // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); @@ -945,6 +960,11 @@ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); @@ -954,6 +974,11 @@ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } @@ -1019,6 +1044,15 @@ setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal); } setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::BSWAP, MVT::v8i16, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2539,7 +2539,7 @@ def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), "ftsqrt $crD, $fB", IIC_FPCompare>; -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { let hasSideEffects = 0 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), "fctiw", "$frD, $frB", IIC_FPGeneral, @@ -2553,39 +2553,39 @@ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), "frsp", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (fpround f64:$frB))]>; + [(set f32:$frD, (any_fpround f64:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (fround f64:$frB))]>; + [(set f64:$frD, (any_fround f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (fround f32:$frB))]>; + [(set f32:$frD, (any_fround f32:$frB))]>; } let hasSideEffects = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), "frip", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (fceil f64:$frB))]>; + [(set f64:$frD, (any_fceil f64:$frB))]>; defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), "frip", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (fceil f32:$frB))]>; + [(set f32:$frD, (any_fceil f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), "friz", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (ftrunc f64:$frB))]>; + [(set f64:$frD, (any_ftrunc f64:$frB))]>; defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), "friz", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (ftrunc f32:$frB))]>; + [(set f32:$frD, (any_ftrunc f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (ffloor f64:$frB))]>; + [(set f64:$frD, (any_ffloor f64:$frB))]>; defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (ffloor f32:$frB))]>; + [(set f32:$frD, (any_ffloor f32:$frB))]>; defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), "fsqrt", "$frD, $frB", IIC_FPSqrtD, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -882,65 +882,65 @@ def XSRDPI : XX2Form<60, 73, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpi $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fround f64:$XB))]>; + [(set f64:$XT, (any_fround f64:$XB))]>; def XSRDPIC : XX2Form<60, 107, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpic $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fnearbyint f64:$XB))]>; + [(set f64:$XT, (any_fnearbyint f64:$XB))]>; def XSRDPIM : XX2Form<60, 121, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpim $XT, $XB", IIC_VecFP, - [(set f64:$XT, (ffloor f64:$XB))]>; + [(set f64:$XT, (any_ffloor f64:$XB))]>; def XSRDPIP : XX2Form<60, 105, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpip $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fceil f64:$XB))]>; + [(set f64:$XT, (any_fceil f64:$XB))]>; def XSRDPIZ : XX2Form<60, 89, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpiz $XT, $XB", IIC_VecFP, - [(set f64:$XT, (ftrunc f64:$XB))]>; + [(set f64:$XT, (any_ftrunc f64:$XB))]>; def XVRDPI : XX2Form<60, 201, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpi $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fround v2f64:$XB))]>; + [(set v2f64:$XT, (any_fround v2f64:$XB))]>; def XVRDPIC : XX2Form<60, 235, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpic $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; + [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>; def XVRDPIM : XX2Form<60, 249, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpim $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (ffloor v2f64:$XB))]>; + [(set v2f64:$XT, (any_ffloor v2f64:$XB))]>; def XVRDPIP : XX2Form<60, 233, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpip $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fceil v2f64:$XB))]>; + [(set v2f64:$XT, (any_fceil v2f64:$XB))]>; def XVRDPIZ : XX2Form<60, 217, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpiz $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; + [(set v2f64:$XT, (any_ftrunc v2f64:$XB))]>; def XVRSPI : XX2Form<60, 137, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspi $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fround v4f32:$XB))]>; + [(set v4f32:$XT, (any_fround v4f32:$XB))]>; def XVRSPIC : XX2Form<60, 171, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspic $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; + [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>; def XVRSPIM : XX2Form<60, 185, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspim $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (ffloor v4f32:$XB))]>; + [(set v4f32:$XT, (any_ffloor v4f32:$XB))]>; def XVRSPIP : XX2Form<60, 169, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspip $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fceil v4f32:$XB))]>; + [(set v4f32:$XT, (any_fceil v4f32:$XB))]>; def XVRSPIZ : XX2Form<60, 153, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspiz $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; + [(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>; // Max/Min Instructions let isCommutable = 1 in { @@ -1163,10 +1163,11 @@ "xsresp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfre f32:$XB))]>; // FIXME: Setting the hasSideEffects flag here to match current behaviour. - let hasSideEffects = 1 in + let hasSideEffects = 1, mayRaiseFPException = 1 in def XSRSP : XX2Form<60, 281, (outs vssrc:$XT), (ins vsfrc:$XB), - "xsrsp $XT, $XB", IIC_VecFP, []>; + "xsrsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (any_fpround f64:$XB))]>; def XSSQRTSP : XX2Form<60, 11, (outs vssrc:$XT), (ins vssrc:$XB), "xssqrtsp $XT, $XB", IIC_FPSqrtS, @@ -1455,16 +1456,18 @@ //===--------------------------------------------------------------------===// // Quad-Precision Floating-Point Conversion Instructions: - // Convert DP -> QP - def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, - [(set f128:$vT, (fpextend f64:$vB))]>; - - // Round & Convert QP -> DP (dword[1] is set to zero) - def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; - def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", - [(set f64:$vT, - (int_ppc_truncf128_round_to_odd - f128:$vB))]>; + let mayRaiseFPException = 1 in { + // Convert DP -> QP + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, + [(set f128:$vT, (any_fpextend f64:$vB))]>; + + // Round & Convert QP -> DP (dword[1] is set to zero) + def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; + def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", + [(set f64:$vT, + (int_ppc_truncf128_round_to_odd + f128:$vB))]>; + } // FIXME: Setting the hasSideEffects flag here to match current behaviour. // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) @@ -1497,9 +1500,11 @@ [(set v4f32:$XT, (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; - // Round to Quad-Precision Integer [with Inexact] - def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; - def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; + let mayRaiseFPException = 1 in { + // Round to Quad-Precision Integer [with Inexact] + def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; + def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; + } // Round Quad-Precision to Double-Extended Precision (fp80) // FIXME: Setting the hasSideEffects flag here to match current behaviour. @@ -2174,10 +2179,10 @@ } def DblToFlt { - dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0)))); - dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1)))); - dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); - dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); + dag A0 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 0)))); + dag A1 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 1)))); + dag B0 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 0)))); + dag B1 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 1)))); } def ExtDbl { @@ -2664,22 +2669,22 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; // Rounding for single precision. -def : Pat<(f32 (fround f32:$S)), +def : Pat<(f32 (any_fround f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (fnearbyint f32:$S)), +def : Pat<(f32 (any_fnearbyint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (ffloor f32:$S)), +def : Pat<(f32 (any_ffloor f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIM (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (fceil f32:$S)), +def : Pat<(f32 (any_fceil f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIP (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (ftrunc f32:$S)), +def : Pat<(f32 (any_ftrunc f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIZ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (frint f32:$S)), +def : Pat<(f32 (any_frint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; @@ -2985,7 +2990,7 @@ (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), (f32 (XFLOADf32 xoaddr:$src))>; -def : Pat<(f64 (fpextend f32:$src)), +def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), @@ -3560,17 +3565,17 @@ (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; // Use current rounding mode -def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; +def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; // Round to nearest, ties away from zero -def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; +def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; // Round towards Zero -def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; +def : Pat<(f128 (any_ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; // Round towards +Inf -def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; +def : Pat<(f128 (any_fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; // Round towards -Inf -def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; +def : Pat<(f128 (any_ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; // Use current rounding mode, [with Inexact] -def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; +def : Pat<(f128 (any_frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)), (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>; @@ -3784,11 +3789,11 @@ (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; // Round & Convert QP -> DP/SP -def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; -def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; +def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>; +def : Pat<(f32 (any_fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; // Convert SP -> QP -def : Pat<(f128 (fpextend f32:$src)), +def : Pat<(f128 (any_fpextend f32:$src)), (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)), diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr | FileCheck \ +; RUN: --check-prefix=P8 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr | FileCheck \ +; RUN: --check-prefix=P9 %s + +; FIXME: Constrained fpext would fail if VSX feature disabled. Add no-vsx + +declare float @llvm.experimental.constrained.ceil.f32(float, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata) +declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) + +declare float @llvm.experimental.constrained.floor.f32(float, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata) +declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) + +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) + +declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) + +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.round.f32(float, metadata) +declare double @llvm.experimental.constrained.round.f64(double, metadata) +declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) + +declare float @llvm.experimental.constrained.trunc.f32(float, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata) +declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) + +define float @ceil_f32(float %f1) { +; P8-LABEL: ceil_f32: +; P8: # %bb.0: +; P8-NEXT: xsrdpip f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: ceil_f32: +; P9: # %bb.0: +; P9-NEXT: xsrdpip f1, f1 +; P9-NEXT: blr + %res = call float @llvm.experimental.constrained.ceil.f32( + float %f1, + metadata !"fpexcept.strict") + ret float %res +} + +define double @ceil_f64(double %f1) { +; P8-LABEL: ceil_f64: +; P8: # %bb.0: +; P8-NEXT: xsrdpip f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: ceil_f64: +; P9: # %bb.0: +; P9-NEXT: xsrdpip f1, f1 +; P9-NEXT: blr + %res = call double @llvm.experimental.constrained.ceil.f64( + double %f1, + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @ceil_v4f32(<4 x float> %vf1) { +; P8-LABEL: ceil_v4f32: +; P8: # %bb.0: +; P8-NEXT: xvrspip v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: ceil_v4f32: +; P9: # %bb.0: +; P9-NEXT: xvrspip v2, v2 +; P9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32( + <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @ceil_v2f64(<2 x double> %vf1) { +; P8-LABEL: ceil_v2f64: +; P8: # %bb.0: +; P8-NEXT: xvrdpip v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: ceil_v2f64: +; P9: # %bb.0: +; P9-NEXT: xvrdpip v2, v2 +; P9-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( + <2 x double> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define float @floor_f32(float %f1) { +; P8-LABEL: floor_f32: +; P8: # %bb.0: +; P8-NEXT: xsrdpim f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: floor_f32: +; P9: # %bb.0: +; P9-NEXT: xsrdpim f1, f1 +; P9-NEXT: blr + %res = call float @llvm.experimental.constrained.floor.f32( + float %f1, + metadata !"fpexcept.strict") + ret float %res +} + +define double @floor_f64(double %f1) { +; P8-LABEL: floor_f64: +; P8: # %bb.0: +; P8-NEXT: xsrdpim f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: floor_f64: +; P9: # %bb.0: +; P9-NEXT: xsrdpim f1, f1 +; P9-NEXT: blr + %res = call double @llvm.experimental.constrained.floor.f64( + double %f1, + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @floor_v4f32(<4 x float> %vf1) { +; P8-LABEL: floor_v4f32: +; P8: # %bb.0: +; P8-NEXT: xvrspim v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: floor_v4f32: +; P9: # %bb.0: +; P9-NEXT: xvrspim v2, v2 +; P9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32( + <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @floor_v2f64(<2 x double> %vf1) { +; P8-LABEL: floor_v2f64: +; P8: # %bb.0: +; P8-NEXT: xvrdpim v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: floor_v2f64: +; P9: # %bb.0: +; P9-NEXT: xvrdpim v2, v2 +; P9-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64( + <2 x double> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + +define double @nearbyint_f64(double %f1, double %f2) { +; P8-LABEL: nearbyint_f64: +; P8: # %bb.0: +; P8-NEXT: xsrdpic f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: nearbyint_f64: +; P9: # %bb.0: +; P9-NEXT: xsrdpic f1, f1 +; P9-NEXT: blr + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) { +; P8-LABEL: nearbyint_v4f32: +; P8: # %bb.0: +; P8-NEXT: xvrspic v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: nearbyint_v4f32: +; P9: # %bb.0: +; P9-NEXT: xvrspic v2, v2 +; P9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( + <4 x float> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) { +; P8-LABEL: nearbyint_v2f64: +; P8: # %bb.0: +; P8-NEXT: xvrdpic v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: nearbyint_v2f64: +; P9: # %bb.0: +; P9-NEXT: xvrdpic v2, v2 +; P9-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <4 x double> @fpext_v4f64_v4f32(<4 x float> %vf1) { +; P8-LABEL: fpext_v4f64_v4f32: +; P8: # %bb.0: +; P8-NEXT: xxsldwi vs0, v2, v2, 1 +; P8-NEXT: xxsldwi vs1, v2, v2, 3 +; P8-NEXT: xxswapd vs3, v2 +; P8-NEXT: xscvspdpn f2, v2 +; P8-NEXT: xscvspdpn f0, vs0 +; P8-NEXT: xscvspdpn f1, vs1 +; P8-NEXT: xscvspdpn f3, vs3 +; P8-NEXT: xxmrghd v2, vs2, vs0 +; P8-NEXT: xxmrghd v3, vs3, vs1 +; P8-NEXT: blr +; +; P9-LABEL: fpext_v4f64_v4f32: +; P9: # %bb.0: +; P9-NEXT: xxsldwi vs0, v2, v2, 3 +; P9-NEXT: xxswapd vs1, v2 +; P9-NEXT: xscvspdpn f0, vs0 +; P9-NEXT: xscvspdpn f1, vs1 +; P9-NEXT: xxsldwi vs2, v2, v2, 1 +; P9-NEXT: xscvspdpn f2, vs2 +; P9-NEXT: xxmrghd vs0, vs1, vs0 +; P9-NEXT: xscvspdpn f1, v2 +; P9-NEXT: xxmrghd v3, vs1, vs2 +; P9-NEXT: xxlor v2, vs0, vs0 +; P9-NEXT: blr + %res = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( + <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x double> %res +} + +define <2 x double> @fpext_v2f64_v2f32(<2 x float> %vf1) { +; P8-LABEL: fpext_v2f64_v2f32: +; P8: # %bb.0: +; P8-NEXT: xxsldwi vs0, v2, v2, 1 +; P8-NEXT: xscvspdpn f1, v2 +; P8-NEXT: xscvspdpn f0, vs0 +; P8-NEXT: xxmrghd v2, vs1, vs0 +; P8-NEXT: blr +; +; P9-LABEL: fpext_v2f64_v2f32: +; P9: # %bb.0: +; P9-NEXT: xxsldwi vs0, v2, v2, 3 +; P9-NEXT: xxswapd vs1, v2 +; P9-NEXT: xscvspdpn f0, vs0 +; P9-NEXT: xscvspdpn f1, vs1 +; P9-NEXT: xxmrghd v2, vs1, vs0 +; P9-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( + <2 x float> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define float @fptrunc_f32_f64(double %f1) { +; P8-LABEL: fptrunc_f32_f64: +; P8: # %bb.0: +; P8-NEXT: xsrsp f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: fptrunc_f32_f64: +; P9: # %bb.0: +; P9-NEXT: xsrsp f1, f1 +; P9-NEXT: blr + %res = call float @llvm.experimental.constrained.fptrunc.f32.f64( + double %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define <4 x float> @fptrunc_v4f32_v4f64(<4 x double> %vf1) { +; P8-LABEL: fptrunc_v4f32_v4f64: +; P8: # %bb.0: +; P8-NEXT: xxmrgld vs0, v2, v3 +; P8-NEXT: xxmrghd vs1, v2, v3 +; P8-NEXT: xvcvdpsp v2, vs0 +; P8-NEXT: xvcvdpsp v3, vs1 +; P8-NEXT: vmrgew v2, v3, v2 +; P8-NEXT: blr +; +; P9-LABEL: fptrunc_v4f32_v4f64: +; P9: # %bb.0: +; P9-NEXT: xxmrgld vs0, v3, v2 +; P9-NEXT: xvcvdpsp v4, vs0 +; P9-NEXT: xxmrghd vs0, v3, v2 +; P9-NEXT: xvcvdpsp v2, vs0 +; P9-NEXT: vmrgew v2, v2, v4 +; P9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( + <4 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %vf1) { +; P8-LABEL: fptrunc_v2f32_v2f64: +; P8: # %bb.0: +; P8-NEXT: xxswapd vs0, v2 +; P8-NEXT: xsrsp f1, v2 +; P8-NEXT: xsrsp f0, f0 +; P8-NEXT: xscvdpspn v2, f1 +; P8-NEXT: xscvdpspn v3, f0 +; P8-NEXT: vmrghw v2, v2, v3 +; P8-NEXT: blr +; +; P9-LABEL: fptrunc_v2f32_v2f64: +; P9: # %bb.0: +; P9-NEXT: xsrsp f0, v2 +; P9-NEXT: xscvdpspn vs0, f0 +; P9-NEXT: xxsldwi v3, vs0, vs0, 1 +; P9-NEXT: xxswapd vs0, v2 +; P9-NEXT: xsrsp f0, f0 +; P9-NEXT: xscvdpspn vs0, f0 +; P9-NEXT: xxsldwi v2, vs0, vs0, 1 +; P9-NEXT: vmrglw v2, v3, v2 +; P9-NEXT: blr + %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( + <2 x double> %vf1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x float> %res +} + +define float @round_f32(float %f1) { +; P8-LABEL: round_f32: +; P8: # %bb.0: +; P8-NEXT: xsrdpi f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: round_f32: +; P9: # %bb.0: +; P9-NEXT: xsrdpi f1, f1 +; P9-NEXT: blr + %res = call float @llvm.experimental.constrained.round.f32( + float %f1, + metadata !"fpexcept.strict") + ret float %res +} + +define double @round_f64(double %f1) { +; P8-LABEL: round_f64: +; P8: # %bb.0: +; P8-NEXT: xsrdpi f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: round_f64: +; P9: # %bb.0: +; P9-NEXT: xsrdpi f1, f1 +; P9-NEXT: blr + %res = call double @llvm.experimental.constrained.round.f64( + double %f1, + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @round_v4f32(<4 x float> %vf1) { +; P8-LABEL: round_v4f32: +; P8: # %bb.0: +; P8-NEXT: xvrspi v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: round_v4f32: +; P9: # %bb.0: +; P9-NEXT: xvrspi v2, v2 +; P9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.round.v4f32( + <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @round_v2f64(<2 x double> %vf1) { +; P8-LABEL: round_v2f64: +; P8: # %bb.0: +; P8-NEXT: xvrdpi v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: round_v2f64: +; P9: # %bb.0: +; P9-NEXT: xvrdpi v2, v2 +; P9-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.round.v2f64( + <2 x double> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define float @trunc_f32(float %f1) { +; P8-LABEL: trunc_f32: +; P8: # %bb.0: +; P8-NEXT: xsrdpiz f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: trunc_f32: +; P9: # %bb.0: +; P9-NEXT: xsrdpiz f1, f1 +; P9-NEXT: blr + %res = call float @llvm.experimental.constrained.trunc.f32( + float %f1, + metadata !"fpexcept.strict") + ret float %res +} + +define double @trunc_f64(double %f1) { +; P8-LABEL: trunc_f64: +; P8: # %bb.0: +; P8-NEXT: xsrdpiz f1, f1 +; P8-NEXT: blr +; +; P9-LABEL: trunc_f64: +; P9: # %bb.0: +; P9-NEXT: xsrdpiz f1, f1 +; P9-NEXT: blr + %res = call double @llvm.experimental.constrained.trunc.f64( + double %f1, + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @trunc_v4f32(<4 x float> %vf1) { +; P8-LABEL: trunc_v4f32: +; P8: # %bb.0: +; P8-NEXT: xvrspiz v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: trunc_v4f32: +; P9: # %bb.0: +; P9-NEXT: xvrspiz v2, v2 +; P9-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32( + <4 x float> %vf1, + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @trunc_v2f64(<2 x double> %vf1) { +; P8-LABEL: trunc_v2f64: +; P8: # %bb.0: +; P8-NEXT: xvrdpiz v2, v2 +; P8-NEXT: blr +; +; P9-LABEL: trunc_v2f64: +; P9: # %bb.0: +; P9-NEXT: xvrdpiz v2, v2 +; P9-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( + <2 x double> %vf1, + metadata !"fpexcept.strict") + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll b/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll --- a/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll +++ b/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll @@ -10,7 +10,7 @@ define float @test(float %a) { ; CHECK: stack: ; CHECK-NEXT: - { id: 0, size: 4, alignment: 4 } -; CHECK: %2:f8rc = FCTIWZ killed %1, implicit $rm +; CHECK: %2:f8rc = nofpexcept FCTIWZ killed %1, implicit $rm ; CHECK: STFIWX killed %2, $zero8, %3 ; CHECK-NEXT: %4:f8rc = LFIWAX $zero8, %3 :: (load 4 from %stack.0) ; CHECK-NEXT: %5:f4rc = FCFIDS killed %4, implicit $rm @@ -19,21 +19,21 @@ ; CHECK-P6: stack: ; CHECK-P6-NEXT: - { id: 0, size: 4, alignment: 4 } -; CHECK-P6: %2:f8rc = FCTIWZ killed %1, implicit $rm +; CHECK-P6: %2:f8rc = nofpexcept FCTIWZ killed %1, implicit $rm ; CHECK-P6: STFIWX killed %2, $zero, %3 ; CHECK-P6-NEXT: %4:f8rc = LFIWAX $zero, %3 :: (load 4 from %stack.0) ; CHECK-P6-NEXT: %5:f8rc = FCFID killed %4, implicit $rm -; CHECK-P6-NEXT: %6:f4rc = FRSP killed %5, implicit $rm +; CHECK-P6-NEXT: %6:f4rc = nofpexcept FRSP killed %5, implicit $rm ; CHECK-P6-NEXT: $f1 = COPY %6 ; CHECK-P6-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 ; CHECK-P6-64: stack: ; CHECK-P6-64-NEXT: - { id: 0, size: 4, alignment: 4 } -; CHECK-P6-64: %2:f8rc = FCTIWZ killed %1, implicit $rm +; CHECK-P6-64: %2:f8rc = nofpexcept FCTIWZ killed %1, implicit $rm ; CHECK-P6-64: STFIWX killed %2, $zero8, %3 ; CHECK-P6-64-NEXT: %4:f8rc = LFIWAX $zero8, %3 :: (load 4 from %stack.0) ; CHECK-P6-64-NEXT: %5:f8rc = FCFID killed %4, implicit $rm -; CHECK-P6-64-NEXT: %6:f4rc = FRSP killed %5, implicit $rm +; CHECK-P6-64-NEXT: %6:f4rc = nofpexcept FRSP killed %5, implicit $rm ; CHECK-P6-64-NEXT: $f1 = COPY %6 ; CHECK-P6-64-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1070,12 +1070,12 @@ define float @test_fptrunc_ppc_fp128_f32(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptrunc_ppc_fp128_f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: frsp 1, 1 +; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptrunc_ppc_fp128_f32: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: frsp 1, 1 +; PC64LE9-NEXT: xsrsp 1, 1 ; PC64LE9-NEXT: blr ; ; PC64-LABEL: test_fptrunc_ppc_fp128_f32: @@ -1368,7 +1368,7 @@ ; PC64LE-NEXT: stfd 30, 32(30) ; PC64LE-NEXT: bl __powitf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: frsp 0, 1 +; PC64LE-NEXT: xsrsp 0, 1 ; PC64LE-NEXT: stfsx 0, 0, 29 ; PC64LE-NEXT: stfd 1, -16(30) ; PC64LE-NEXT: stfd 2, -8(30) @@ -1406,8 +1406,8 @@ ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: fmr 4, 2 -; PC64LE9-NEXT: fmr 30, 2 -; PC64LE9-NEXT: fmr 29, 1 +; PC64LE9-NEXT: fmr 30, 1 +; PC64LE9-NEXT: fmr 29, 2 ; PC64LE9-NEXT: stfd 2, 24(30) ; PC64LE9-NEXT: stfd 1, 16(30) ; PC64LE9-NEXT: bl __gcc_qmul @@ -1415,11 +1415,11 @@ ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: xxlxor 2, 2, 2 ; PC64LE9-NEXT: li 5, 2 -; PC64LE9-NEXT: stfd 30, 40(30) -; PC64LE9-NEXT: stfd 29, 32(30) +; PC64LE9-NEXT: stfd 29, 40(30) +; PC64LE9-NEXT: stfd 30, 32(30) ; PC64LE9-NEXT: bl __powitf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: frsp 0, 1 +; PC64LE9-NEXT: xsrsp 0, 1 ; PC64LE9-NEXT: stfs 0, 0(29) ; PC64LE9-NEXT: stfd 1, -16(30) ; PC64LE9-NEXT: stfd 2, -8(30) diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -226,7 +226,7 @@ ; CHECK-P8-NEXT: xsmaddadp 4, 2, 0 ; CHECK-P8-NEXT: xsmuldp 0, 0, 5 ; CHECK-P8-NEXT: xsmuldp 0, 0, 4 -; CHECK-P8-NEXT: frsp 0, 0 +; CHECK-P8-NEXT: xsrsp 0, 0 ; CHECK-P8-NEXT: xsmulsp 1, 1, 0 ; CHECK-P8-NEXT: blr ; @@ -246,7 +246,7 @@ ; CHECK-P9-NEXT: xsmaddadp 4, 2, 0 ; CHECK-P9-NEXT: xsmuldp 0, 0, 3 ; CHECK-P9-NEXT: xsmuldp 0, 0, 4 -; CHECK-P9-NEXT: frsp 0, 0 +; CHECK-P9-NEXT: xsrsp 0, 0 ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 ; CHECK-P9-NEXT: blr %x = call reassoc arcp double @llvm.sqrt.f64(double %b) @@ -266,14 +266,14 @@ ; CHECK-P8-LABEL: food_safe: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: xssqrtdp 0, 2 -; CHECK-P8-NEXT: frsp 0, 0 +; CHECK-P8-NEXT: xsrsp 0, 0 ; CHECK-P8-NEXT: xsdivsp 1, 1, 0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: food_safe: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xssqrtdp 0, 2 -; CHECK-P9-NEXT: frsp 0, 0 +; CHECK-P9-NEXT: xsrsp 0, 0 ; CHECK-P9-NEXT: xsdivsp 1, 1, 0 ; CHECK-P9-NEXT: blr %x = call double @llvm.sqrt.f64(double %b) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -4899,50 +4899,19 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xvrdpic 34, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 34, 0 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -5041,72 +5010,31 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 -; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xsrdpic 3, 1 +; PC64LE-NEXT: xvrdpic 2, 0 +; PC64LE-NEXT: xxswapd 1, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 -; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l +; PC64LE9-NEXT: xsrdpic 3, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 2, 0 +; PC64LE9-NEXT: xxswapd 1, 2 ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( @@ -5119,86 +5047,28 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha -; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvrdpic 35, 0 +; PC64LE-NEXT: xvrdpic 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l +; PC64LE9-NEXT: xvrdpic 35, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvrdpic 34, 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( @@ -5788,14 +5658,14 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE-NEXT: frsp 1, 0 +; PC64LE-NEXT: xsrsp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptrunc_v1f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE9-NEXT: frsp 1, 0 +; PC64LE9-NEXT: xsrsp 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64( @@ -5812,8 +5682,8 @@ ; PC64LE-NEXT: addis 4, 2, .LCPI96_1@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI96_0@toc@l(3) ; PC64LE-NEXT: lfd 1, .LCPI96_1@toc@l(4) -; PC64LE-NEXT: frsp 0, 0 -; PC64LE-NEXT: frsp 1, 1 +; PC64LE-NEXT: xsrsp 0, 0 +; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -5826,11 +5696,11 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI96_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI96_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI96_1@toc@ha -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: lfd 0, .LCPI96_1@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 @@ -5851,12 +5721,12 @@ ; PC64LE-NEXT: lfd 0, .LCPI97_0@toc@l(3) ; PC64LE-NEXT: lfd 1, .LCPI97_1@toc@l(4) ; PC64LE-NEXT: addis 3, 2, .LCPI97_3@toc@ha -; PC64LE-NEXT: frsp 0, 0 +; PC64LE-NEXT: xsrsp 0, 0 ; PC64LE-NEXT: lfd 2, .LCPI97_3@toc@l(3) ; PC64LE-NEXT: addis 3, 2, .LCPI97_2@toc@ha -; PC64LE-NEXT: frsp 1, 1 +; PC64LE-NEXT: xsrsp 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI97_2@toc@l -; PC64LE-NEXT: frsp 2, 2 +; PC64LE-NEXT: xsrsp 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 @@ -5873,20 +5743,20 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI97_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI97_1@toc@ha -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: lfd 0, .LCPI97_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI97_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI97_2@toc@l -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI97_3@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI97_3@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 +; PC64LE9-NEXT: xsrsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 @@ -6055,13 +5925,19 @@ ; PC64LE-LABEL: constrained_vector_ceil_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI103_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI103_0@toc@l(3) +; PC64LE-NEXT: lfs 0, .LCPI103_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI103_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI103_1@toc@l(3) +; PC64LE-NEXT: xsrdpip 0, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI103_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI103_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI103_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI103_1@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI103_1@toc@l(3) +; PC64LE9-NEXT: xsrdpip 0, 0 ; PC64LE9-NEXT: blr entry: %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( @@ -6074,16 +5950,24 @@ ; PC64LE-LABEL: constrained_vector_ceil_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI104_0@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI104_1@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI104_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: addi 3, 4, .LCPI104_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xvrdpip 0, 0 +; PC64LE-NEXT: xxswapd 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI104_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI104_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI104_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI104_1@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xvrdpip 0, 0 ; PC64LE9-NEXT: blr entry: %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( @@ -6096,15 +5980,33 @@ ; PC64LE-LABEL: constrained_vector_ceil_v3f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l +; PC64LE-NEXT: addis 4, 2, .LCPI105_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI105_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI105_2@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI105_1@toc@l(4) +; PC64LE-NEXT: lfs 2, .LCPI105_2@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI105_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI105_3@toc@l +; PC64LE-NEXT: xsrdpip 0, 0 ; PC64LE-NEXT: lvx 2, 0, 3 +; PC64LE-NEXT: xsrdpip 0, 1 +; PC64LE-NEXT: xsrdpip 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l +; PC64LE9-NEXT: lfs 0, .LCPI105_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI105_1@toc@ha +; PC64LE9-NEXT: xsrdpip 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI105_1@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI105_2@toc@ha +; PC64LE9-NEXT: xsrdpip 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI105_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI105_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI105_3@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xsrdpip 0, 0 ; PC64LE9-NEXT: blr entry: %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( @@ -6117,7 +6019,14 @@ ; PC64LE-LABEL: constrained_vector_ceil_v3f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI106_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI106_0@toc@l(3) +; PC64LE-NEXT: addis 4, 2, .LCPI106_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI106_0@toc@l(3) +; PC64LE-NEXT: addi 3, 4, .LCPI106_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: addis 3, 2, .LCPI106_2@toc@ha +; PC64LE-NEXT: xsrdpip 0, 0 +; PC64LE-NEXT: xvrdpip 0, 1 +; PC64LE-NEXT: lfs 1, .LCPI106_2@toc@l(3) ; PC64LE-NEXT: fmr 2, 1 ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: blr @@ -6125,7 +6034,14 @@ ; PC64LE9-LABEL: constrained_vector_ceil_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI106_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI106_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI106_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI106_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI106_1@toc@l +; PC64LE9-NEXT: xsrdpip 0, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI106_2@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI106_2@toc@l(3) +; PC64LE9-NEXT: xvrdpip 0, 0 ; PC64LE9-NEXT: fmr 2, 1 ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: blr @@ -6140,13 +6056,19 @@ ; PC64LE-LABEL: constrained_vector_floor_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI107_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI107_0@toc@l(3) +; PC64LE-NEXT: lfs 0, .LCPI107_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI107_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI107_1@toc@l(3) +; PC64LE-NEXT: xsrdpim 0, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI107_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI107_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI107_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI107_1@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI107_1@toc@l(3) +; PC64LE9-NEXT: xsrdpim 0, 0 ; PC64LE9-NEXT: blr entry: %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( @@ -6160,16 +6082,24 @@ ; PC64LE-LABEL: constrained_vector_floor_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI108_0@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI108_1@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI108_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: addi 3, 4, .LCPI108_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xvrdpim 0, 0 +; PC64LE-NEXT: xxswapd 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI108_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI108_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI108_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI108_1@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xvrdpim 0, 0 ; PC64LE9-NEXT: blr entry: %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64( @@ -6182,15 +6112,33 @@ ; PC64LE-LABEL: constrained_vector_floor_v3f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI109_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI109_0@toc@l +; PC64LE-NEXT: addis 4, 2, .LCPI109_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI109_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI109_2@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI109_1@toc@l(4) +; PC64LE-NEXT: lfs 2, .LCPI109_2@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI109_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI109_3@toc@l +; PC64LE-NEXT: xsrdpim 0, 0 ; PC64LE-NEXT: lvx 2, 0, 3 +; PC64LE-NEXT: xsrdpim 0, 1 +; PC64LE-NEXT: xsrdpim 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI109_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI109_0@toc@l +; PC64LE9-NEXT: lfs 0, .LCPI109_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI109_1@toc@ha +; PC64LE9-NEXT: xsrdpim 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI109_1@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI109_2@toc@ha +; PC64LE9-NEXT: xsrdpim 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI109_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI109_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI109_3@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xsrdpim 0, 0 ; PC64LE9-NEXT: blr entry: %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( @@ -6203,7 +6151,14 @@ ; PC64LE-LABEL: constrained_vector_floor_v3f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI110_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI110_0@toc@l(3) +; PC64LE-NEXT: addis 4, 2, .LCPI110_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI110_0@toc@l(3) +; PC64LE-NEXT: addi 3, 4, .LCPI110_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: addis 3, 2, .LCPI110_2@toc@ha +; PC64LE-NEXT: xsrdpim 0, 0 +; PC64LE-NEXT: xvrdpim 0, 1 +; PC64LE-NEXT: lfs 1, .LCPI110_2@toc@l(3) ; PC64LE-NEXT: fmr 2, 1 ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: blr @@ -6211,7 +6166,14 @@ ; PC64LE9-LABEL: constrained_vector_floor_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI110_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI110_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI110_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI110_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI110_1@toc@l +; PC64LE9-NEXT: xsrdpim 0, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI110_2@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI110_2@toc@l(3) +; PC64LE9-NEXT: xvrdpim 0, 0 ; PC64LE9-NEXT: fmr 2, 1 ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: blr @@ -6226,13 +6188,19 @@ ; PC64LE-LABEL: constrained_vector_round_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI111_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI111_0@toc@l(3) +; PC64LE-NEXT: lfs 0, .LCPI111_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI111_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI111_1@toc@l(3) +; PC64LE-NEXT: xsrdpi 0, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI111_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI111_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI111_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI111_1@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI111_1@toc@l(3) +; PC64LE9-NEXT: xsrdpi 0, 0 ; PC64LE9-NEXT: blr entry: %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( @@ -6245,16 +6213,24 @@ ; PC64LE-LABEL: constrained_vector_round_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI112_0@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI112_1@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI112_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: addi 3, 4, .LCPI112_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xvrdpi 0, 0 +; PC64LE-NEXT: xxswapd 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI112_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI112_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI112_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI112_1@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xvrdpi 0, 0 ; PC64LE9-NEXT: blr entry: %round = call <2 x double> @llvm.experimental.constrained.round.v2f64( @@ -6267,15 +6243,33 @@ ; PC64LE-LABEL: constrained_vector_round_v3f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l +; PC64LE-NEXT: addis 4, 2, .LCPI113_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI113_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI113_2@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI113_1@toc@l(4) +; PC64LE-NEXT: lfs 2, .LCPI113_2@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI113_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI113_3@toc@l +; PC64LE-NEXT: xsrdpi 0, 0 ; PC64LE-NEXT: lvx 2, 0, 3 +; PC64LE-NEXT: xsrdpi 0, 1 +; PC64LE-NEXT: xsrdpi 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l +; PC64LE9-NEXT: lfs 0, .LCPI113_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI113_1@toc@ha +; PC64LE9-NEXT: xsrdpi 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI113_1@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI113_2@toc@ha +; PC64LE9-NEXT: xsrdpi 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI113_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI113_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI113_3@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xsrdpi 0, 0 ; PC64LE9-NEXT: blr entry: %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( @@ -6290,17 +6284,31 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 4, 2, .LCPI114_1@toc@ha ; PC64LE-NEXT: addis 3, 2, .LCPI114_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI114_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI114_0@toc@l(3) +; PC64LE-NEXT: addi 4, 4, .LCPI114_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: addis 4, 2, .LCPI114_3@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI114_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI114_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI114_3@toc@l(4) +; PC64LE-NEXT: xsrdpi 0, 0 +; PC64LE-NEXT: xvrdpi 0, 1 +; PC64LE-NEXT: lfs 1, .LCPI114_2@toc@l(3) ; PC64LE-NEXT: fmr 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI114_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI114_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI114_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI114_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI114_1@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI114_1@toc@l +; PC64LE9-NEXT: xsrdpi 0, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI114_2@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI114_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI114_3@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI114_3@toc@l(3) +; PC64LE9-NEXT: xvrdpi 0, 0 ; PC64LE9-NEXT: fmr 3, 2 ; PC64LE9-NEXT: blr entry: @@ -6314,13 +6322,19 @@ ; PC64LE-LABEL: constrained_vector_trunc_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI115_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI115_0@toc@l(3) +; PC64LE-NEXT: lfs 0, .LCPI115_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI115_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI115_1@toc@l(3) +; PC64LE-NEXT: xsrdpiz 0, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI115_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI115_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI115_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI115_1@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI115_1@toc@l(3) +; PC64LE9-NEXT: xsrdpiz 0, 0 ; PC64LE9-NEXT: blr entry: %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( @@ -6333,16 +6347,24 @@ ; PC64LE-LABEL: constrained_vector_trunc_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI116_0@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI116_1@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI116_0@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: addi 3, 4, .LCPI116_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: xvrdpiz 0, 0 +; PC64LE-NEXT: xxswapd 34, 1 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI116_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI116_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI116_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI116_1@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xvrdpiz 0, 0 ; PC64LE9-NEXT: blr entry: %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( @@ -6355,15 +6377,33 @@ ; PC64LE-LABEL: constrained_vector_trunc_v3f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI117_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI117_0@toc@l +; PC64LE-NEXT: addis 4, 2, .LCPI117_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI117_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI117_2@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI117_1@toc@l(4) +; PC64LE-NEXT: lfs 2, .LCPI117_2@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI117_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI117_3@toc@l +; PC64LE-NEXT: xsrdpiz 0, 0 ; PC64LE-NEXT: lvx 2, 0, 3 +; PC64LE-NEXT: xsrdpiz 0, 1 +; PC64LE-NEXT: xsrdpiz 0, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI117_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI117_0@toc@l +; PC64LE9-NEXT: lfs 0, .LCPI117_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI117_1@toc@ha +; PC64LE9-NEXT: xsrdpiz 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI117_1@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI117_2@toc@ha +; PC64LE9-NEXT: xsrdpiz 0, 0 +; PC64LE9-NEXT: lfs 0, .LCPI117_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI117_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI117_3@toc@l ; PC64LE9-NEXT: lxvx 34, 0, 3 +; PC64LE9-NEXT: xsrdpiz 0, 0 ; PC64LE9-NEXT: blr entry: %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( @@ -6376,7 +6416,14 @@ ; PC64LE-LABEL: constrained_vector_trunc_v3f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI118_0@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI118_0@toc@l(3) +; PC64LE-NEXT: addis 4, 2, .LCPI118_1@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI118_0@toc@l(3) +; PC64LE-NEXT: addi 3, 4, .LCPI118_1@toc@l +; PC64LE-NEXT: lxvd2x 1, 0, 3 +; PC64LE-NEXT: addis 3, 2, .LCPI118_2@toc@ha +; PC64LE-NEXT: xsrdpiz 0, 0 +; PC64LE-NEXT: xvrdpiz 0, 1 +; PC64LE-NEXT: lfs 1, .LCPI118_2@toc@l(3) ; PC64LE-NEXT: fmr 2, 1 ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: blr @@ -6384,7 +6431,14 @@ ; PC64LE9-LABEL: constrained_vector_trunc_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI118_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI118_0@toc@l(3) +; PC64LE9-NEXT: lfs 0, .LCPI118_0@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI118_1@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI118_1@toc@l +; PC64LE9-NEXT: xsrdpiz 0, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI118_2@toc@ha +; PC64LE9-NEXT: lfs 1, .LCPI118_2@toc@l(3) +; PC64LE9-NEXT: xvrdpiz 0, 0 ; PC64LE9-NEXT: fmr 2, 1 ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: blr