diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -269,6 +269,18 @@ setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + // Handle constrained floating-point operations of scalar. + // TODO: Handle SPE specific operation. + setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + + setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); @@ -894,6 +906,19 @@ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + // Handle constrained floating-point operations of vector. + // The predictor is `hasVSX` because altivec instruction has + // no exception but VSX vector instruction has. + setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } @@ -950,6 +975,12 @@ setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FPOWI, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); + + // Handle constrained floating-point operations of fp128 + setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal); } setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::BSWAP, MVT::v8i16, Legal); @@ -1123,6 +1154,8 @@ setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); } + + // TODO: Handle constrained floating-point operations of v4f64 } if (Subtarget.has64BitSupport()) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3039,43 +3039,43 @@ (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { let isCommutable = 1 in { defm FADD : AForm_2r<63, 21, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, - [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (any_fadd f64:$FRA, f64:$FRB))]>; defm FADDS : AForm_2r<59, 21, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; + [(set f32:$FRT, (any_fadd f32:$FRA, f32:$FRB))]>; } // isCommutable defm FDIV : AForm_2r<63, 18, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, - [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (any_fdiv f64:$FRA, f64:$FRB))]>; defm FDIVS : AForm_2r<59, 18, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, - [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; + [(set f32:$FRT, (any_fdiv f32:$FRA, f32:$FRB))]>; let isCommutable = 1 in { defm FMUL : AForm_3r<63, 25, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, - [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; + [(set f64:$FRT, (any_fmul f64:$FRA, f64:$FRC))]>; defm FMULS : AForm_3r<59, 25, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, - [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; + [(set f32:$FRT, (any_fmul f32:$FRA, f32:$FRC))]>; } // isCommutable defm FSUB : AForm_2r<63, 20, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, - [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (any_fsub f64:$FRA, f64:$FRB))]>; defm FSUBS : AForm_2r<59, 20, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, - [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; + [(set f32:$FRT, (any_fsub f32:$FRA, f32:$FRB))]>; } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -213,53 +213,53 @@ } } // mayStore - let Uses = [RM] in { + let Uses = [RM], mayRaiseFPException = 1 in { // Add/Mul Instructions let isCommutable = 1 in { def XSADDDP : XX3Form<60, 32, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsadddp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; + [(set f64:$XT, (any_fadd f64:$XA, f64:$XB))]>; def XSMULDP : XX3Form<60, 48, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmuldp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; + [(set f64:$XT, (any_fmul f64:$XA, f64:$XB))]>; def XVADDDP : XX3Form<60, 96, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvadddp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>; + [(set v2f64:$XT, (any_fadd v2f64:$XA, v2f64:$XB))]>; def XVADDSP : XX3Form<60, 64, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvaddsp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>; + [(set v4f32:$XT, (any_fadd v4f32:$XA, v4f32:$XB))]>; def XVMULDP : XX3Form<60, 112, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmuldp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>; + [(set v2f64:$XT, (any_fmul v2f64:$XA, v2f64:$XB))]>; def XVMULSP : XX3Form<60, 80, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmulsp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>; + [(set v4f32:$XT, (any_fmul v4f32:$XA, v4f32:$XB))]>; } // Subtract Instructions def XSSUBDP : XX3Form<60, 40, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xssubdp $XT, $XA, $XB", IIC_VecFP, - [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; + [(set f64:$XT, (any_fsub f64:$XA, f64:$XB))]>; def XVSUBDP : XX3Form<60, 104, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvsubdp $XT, $XA, $XB", IIC_VecFP, - [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>; + [(set v2f64:$XT, (any_fsub v2f64:$XA, v2f64:$XB))]>; def XVSUBSP : XX3Form<60, 72, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvsubsp $XT, $XA, $XB", IIC_VecFP, - [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>; + [(set v4f32:$XT, (any_fsub v4f32:$XA, v4f32:$XB))]>; // FMA Instructions let BaseName = "XSMADDADP" in { @@ -458,7 +458,7 @@ def XSDIVDP : XX3Form<60, 56, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsdivdp $XT, $XA, $XB", IIC_FPDivD, - [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; + [(set f64:$XT, (any_fdiv f64:$XA, f64:$XB))]>; def XSSQRTDP : XX2Form<60, 75, (outs vsfrc:$XT), (ins vsfrc:$XB), "xssqrtdp $XT, $XB", IIC_FPSqrtD, @@ -483,11 +483,11 @@ def XVDIVDP : XX3Form<60, 120, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvdivdp $XT, $XA, $XB", IIC_FPDivD, - [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>; + [(set v2f64:$XT, (any_fdiv v2f64:$XA, v2f64:$XB))]>; def XVDIVSP : XX3Form<60, 88, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvdivsp $XT, $XA, $XB", IIC_FPDivS, - [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>; + [(set v4f32:$XT, (any_fdiv v4f32:$XA, v4f32:$XB))]>; def XVSQRTDP : XX2Form<60, 203, (outs vsrc:$XT), (ins vsrc:$XB), @@ -835,7 +835,7 @@ [(set vsrc:$XT, (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; } // isCommutable -} // Uses = [RM] +} // Uses = [RM], mayRaiseFPException // Logical Instructions let isCommutable = 1 in @@ -1451,24 +1451,26 @@ (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; // VSX Elementary Scalar FP arithmetic (SP) + let mayRaiseFPException = 1 in { let isCommutable = 1 in { def XSADDSP : XX3Form<60, 0, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsaddsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>; + [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>; def XSMULSP : XX3Form<60, 16, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsmulsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; + [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>; } // isCommutable def XSSUBSP : XX3Form<60, 8, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xssubsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; + [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>; def XSDIVSP : XX3Form<60, 24, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsdivsp $XT, $XA, $XB", IIC_FPDivS, - [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>; + [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>; + } // mayRaiseFPException def XSRESP : XX2Form<60, 26, (outs vssrc:$XT), (ins vssrc:$XB), "xsresp $XT, $XB", IIC_VecFP, @@ -2670,16 +2672,18 @@ // Quad-Precision Scalar Floating-Point Arithmetic Instructions: // Add/Divide/Multiply/Subtract + let mayRaiseFPException = 1 in { let isCommutable = 1 in { def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp", - [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>; + [(set f128:$vT, (any_fadd f128:$vA, f128:$vB))]>; def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", - [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; + [(set f128:$vT, (any_fmul f128:$vA, f128:$vB))]>; } def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , - [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; + [(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>; def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", - [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; + [(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>; + } // mayRaiseFPException // Square-Root def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", [(set f128:$vT, (fsqrt f128:$vB))]>; diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -enable-ppc-quad-precision=true | FileCheck %s + +declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata) + +define fp128 @fadd_f128(fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fadd_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsaddqp v2, v2, v3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fadd.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} + +define fp128 @fsub_f128(fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fsub_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xssubqp v2, v2, v3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fsub.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} + +define fp128 @fmul_f128(fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fmul_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmulqp v2, v2, v3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fmul.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} + +define fp128 @fdiv_f128(fp128 %f1, fp128 %f2) { +; CHECK-LABEL: fdiv_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xsdivqp v2, v2, v3 +; CHECK-NEXT: blr + %res = call fp128 @llvm.experimental.constrained.fdiv.f128( + fp128 %f1, fp128 %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret fp128 %res +} diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll @@ -0,0 +1,413 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) + +define float @fadd_f32(float %f1, float %f2) { +; CHECK-LABEL: fadd_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsaddsp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fadd_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fadds f1, f1, f2 +; NOVSX-NEXT: blr + %res = call float @llvm.experimental.constrained.fadd.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res +} + +define double @fadd_f64(double %f1, double %f2) { +; CHECK-LABEL: fadd_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsadddp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fadd_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fadd f1, f1, f2 +; NOVSX-NEXT: blr + %res = call double @llvm.experimental.constrained.fadd.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res +} + +define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvaddsp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fadd_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: addi r4, r1, -48 +; NOVSX-NEXT: stvx v3, 0, r3 +; NOVSX-NEXT: stvx v2, 0, r4 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: lfs f1, -36(r1) +; NOVSX-NEXT: fadds f0, f1, f0 +; NOVSX-NEXT: lfs f1, -40(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fadds f0, f1, f0 +; NOVSX-NEXT: lfs f1, -44(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fadds f0, f1, f0 +; NOVSX-NEXT: lfs f1, -48(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fadds f0, f1, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32( + <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fadd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadddp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fadd_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fadd f2, f2, f4 +; NOVSX-NEXT: fadd f1, f1, f3 +; NOVSX-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( + <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define float @fsub_f32(float %f1, float %f2) { +; CHECK-LABEL: fsub_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xssubsp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsub_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fsubs f1, f1, f2 +; NOVSX-NEXT: blr + + %res = call float @llvm.experimental.constrained.fsub.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @fsub_f64(double %f1, double %f2) { +; CHECK-LABEL: fsub_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xssubdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsub_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fsub f1, f1, f2 +; NOVSX-NEXT: blr + + %res = call double @llvm.experimental.constrained.fsub.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fsub_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvsubsp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsub_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: addi r4, r1, -48 +; NOVSX-NEXT: stvx v3, 0, r3 +; NOVSX-NEXT: stvx v2, 0, r4 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: lfs f1, -36(r1) +; NOVSX-NEXT: fsubs f0, f1, f0 +; NOVSX-NEXT: lfs f1, -40(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fsubs f0, f1, f0 +; NOVSX-NEXT: lfs f1, -44(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fsubs f0, f1, f0 +; NOVSX-NEXT: lfs f1, -48(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fsubs f0, f1, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32( + <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fsub_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvsubdp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fsub_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fsub f2, f2, f4 +; NOVSX-NEXT: fsub f1, f1, f3 +; NOVSX-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( + <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + +define float @fmul_f32(float %f1, float %f2) { +; CHECK-LABEL: fmul_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmulsp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmul_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmuls f1, f1, f2 +; NOVSX-NEXT: blr + + %res = call float @llvm.experimental.constrained.fmul.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @fmul_f64(double %f1, double %f2) { +; CHECK-LABEL: fmul_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmuldp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmul_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmul f1, f1, f2 +; NOVSX-NEXT: blr + + %res = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fmul_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmulsp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmul_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: addi r4, r1, -48 +; NOVSX-NEXT: stvx v3, 0, r3 +; NOVSX-NEXT: stvx v2, 0, r4 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: lfs f1, -36(r1) +; NOVSX-NEXT: fmuls f0, f1, f0 +; NOVSX-NEXT: lfs f1, -40(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fmuls f0, f1, f0 +; NOVSX-NEXT: lfs f1, -44(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fmuls f0, f1, f0 +; NOVSX-NEXT: lfs f1, -48(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fmuls f0, f1, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32( + <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res; +} + +define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fmul_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvmuldp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fmul_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmul f2, f2, f4 +; NOVSX-NEXT: fmul f1, f1, f3 +; NOVSX-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( + <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res; +} + +define float @fdiv_f32(float %f1, float %f2) { +; CHECK-LABEL: fdiv_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xsdivsp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fdiv_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fdivs f1, f1, f2 +; NOVSX-NEXT: blr + + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %f1, float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %res; +} + +define double @fdiv_f64(double %f1, double %f2) { +; CHECK-LABEL: fdiv_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xsdivdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fdiv_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fdiv f1, f1, f2 +; NOVSX-NEXT: blr + + %res = call double @llvm.experimental.constrained.fdiv.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %res; +} + +define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) { +; CHECK-LABEL: fdiv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvdivsp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fdiv_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: addi r4, r1, -48 +; NOVSX-NEXT: stvx v3, 0, r3 +; NOVSX-NEXT: stvx v2, 0, r4 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: lfs f1, -36(r1) +; NOVSX-NEXT: fdivs f0, f1, f0 +; NOVSX-NEXT: lfs f1, -40(r1) +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: fdivs f0, f1, f0 +; NOVSX-NEXT: lfs f1, -44(r1) +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: fdivs f0, f1, f0 +; NOVSX-NEXT: lfs f1, -48(r1) +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: fdivs f0, f1, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr + %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32( + <4 x float> %vf1, <4 x float> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) { +; CHECK-LABEL: fdiv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvdivdp v2, v2, v3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: fdiv_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fdiv f2, f2, f4 +; NOVSX-NEXT: fdiv f1, f1, f3 +; NOVSX-NEXT: blr + %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( + <2 x double> %vf1, <2 x double> %vf2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define double @no_fma_fold(double %f1, double %f2, double %f3) { +; CHECK-LABEL: no_fma_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmuldp f0, f1, f2 +; CHECK-NEXT: xsadddp f1, f0, f3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: no_fma_fold: +; NOVSX: # %bb.0: +; NOVSX-NEXT: fmul f0, f1, f2 +; NOVSX-NEXT: fadd f1, f0, f3 +; NOVSX-NEXT: blr + %mul = call double @llvm.experimental.constrained.fmul.f64( + double %f1, double %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %add = call double @llvm.experimental.constrained.fadd.f64( + double %mul, double %f3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %add +} diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -191,8 +191,8 @@ ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 ; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvdivdp 34, 1, 0 -; PC64LE-NEXT: xvdivdp 35, 2, 0 +; PC64LE-NEXT: xvdivdp 35, 1, 0 +; PC64LE-NEXT: xvdivdp 34, 2, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fdiv_v4f64: @@ -205,9 +205,9 @@ ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI4_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI4_2@toc@l -; PC64LE9-NEXT: xvdivdp 34, 1, 0 -; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: xvdivdp 35, 1, 0 +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvdivdp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64( @@ -829,8 +829,8 @@ ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 ; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvmuldp 34, 1, 0 -; PC64LE-NEXT: xvmuldp 35, 1, 2 +; PC64LE-NEXT: xvmuldp 35, 1, 0 +; PC64LE-NEXT: xvmuldp 34, 1, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fmul_v4f64: @@ -843,9 +843,9 @@ ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI14_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI14_2@toc@l -; PC64LE9-NEXT: xvmuldp 34, 1, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvmuldp 35, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvmuldp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64( @@ -1045,8 +1045,8 @@ ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 ; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvadddp 34, 1, 0 -; PC64LE-NEXT: xvadddp 35, 1, 2 +; PC64LE-NEXT: xvadddp 35, 1, 0 +; PC64LE-NEXT: xvadddp 34, 1, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fadd_v4f64: @@ -1059,9 +1059,9 @@ ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI19_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI19_2@toc@l -; PC64LE9-NEXT: xvadddp 34, 1, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvadddp 35, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvadddp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( @@ -1261,8 +1261,8 @@ ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 ; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvsubdp 34, 1, 0 -; PC64LE-NEXT: xvsubdp 35, 1, 2 +; PC64LE-NEXT: xvsubdp 35, 1, 0 +; PC64LE-NEXT: xvsubdp 34, 1, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fsub_v4f64: @@ -1275,9 +1275,9 @@ ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI24_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI24_2@toc@l -; PC64LE9-NEXT: xvsubdp 34, 1, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvsubdp 35, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: xvsubdp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(