diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15193,6 +15193,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + // __builtin_ppc_fnmsub, __builtin_ppc_fnmsubs SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { @@ -15778,6 +15779,8 @@ return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } // FMA variations + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: case PPC::BI__builtin_vsx_xvnmaddadp: @@ -15816,6 +15819,8 @@ F, {X, Y, Builder.CreateFNeg(Z, "neg")}); else return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: if (Builder.getIsFPConstrained()) @@ -15824,10 +15829,9 @@ F, {X, Y, Builder.CreateFNeg(Z, "neg")}), "neg"); else - return Builder.CreateFNeg( - Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), - "neg"); - } + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); + } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c @@ -32,12 +32,8 @@ // CHECK: <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]]) vf = __builtin_vsx_xvnmsubasp(vf, vf, vf); - // CHECK: [[RESULT:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK: [[RESULT2:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT]]) - // CHECK: fneg <4 x float> [[RESULT2]] + // CHECK: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) vd = __builtin_vsx_xvnmsubadp(vd, vd, vd); - // CHECK: [[RESULT:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK: [[RESULT2:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]]) - // CHECK: fneg <2 x double> [[RESULT2]] + // CHECK: call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c @@ -142,9 +142,7 @@ vf = __builtin_vsx_xvnmsubasp(vf, vf, vf); // CHECK-LABEL: try-xvnmsubasp - // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]]) - // CHECK-UNCONSTRAINED: fneg <4 x float> [[RESULT1]] + // CHECK-UNCONSTRAINED: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}} // CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-CONSTRAINED: fneg <4 x float> [[RESULT1]] @@ -152,9 +150,7 @@ vd = __builtin_vsx_xvnmsubadp(vd, vd, vd); // CHECK-LABEL: try-xvnmsubadp - // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]]) - // CHECK-UNCONSTRAINED: fneg <2 x double> [[RESULT1]] + // CHECK-UNCONSTRAINED: call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}} // CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-CONSTRAINED: fneg <2 x double> [[RESULT1]] diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c @@ -894,20 +894,12 @@ // CHECK-LE-NEXT: fneg <2 x double> %[[FM]] res_vf = vec_nmsub(vf, vf, vf); -// CHECK: fneg <4 x float> %{{[0-9]+}} -// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> -// CHECK: fneg <4 x float> %{{[0-9]+}} -// CHECK-LE: fneg <4 x float> %{{[0-9]+}} -// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> -// CHECK-LE: fneg <4 x float> %{{[0-9]+}} +// CHECK: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> +// CHECK-LE: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> res_vd = vec_nmsub(vd, vd, vd); -// CHECK: fneg <2 x double> %{{[0-9]+}} -// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> -// CHECK-NEXT: fneg <2 x double> %[[FM]] -// CHECK-LE: fneg <2 x double> %{{[0-9]+}} -// CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> -// CHECK-LE-NEXT: fneg <2 x double> %[[FM]] +// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> +// CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> /* vec_nor */ res_vsll = vec_nor(vsll, vsll); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c @@ -95,10 +95,11 @@ // CHECK-LABEL: @fnmsub( // CHECK: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], double* [[D_ADDR]], align 8 +// CHECK-COUNT-3: load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.ppc.fnmsub(double [[TMP0]], double [[TMP1]], double [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.ppc.fnmsub.f64(double [[TMP0]], double [[TMP1]], double [[TMP2]]) // CHECK-NEXT: ret double [[TMP3]] // double fnmsub (double d) { @@ -108,10 +109,11 @@ // CHECK-LABEL: @fnmsubs( // CHECK: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], float* [[F_ADDR]], align 4 +// CHECK-COUNT-3: load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.ppc.fnmsubs(float [[TMP0]], float [[TMP1]], float [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.ppc.fnmsub.f32(float [[TMP0]], float [[TMP1]], float [[TMP2]]) // CHECK-NEXT: ret float [[TMP3]] // float fnmsubs (float f) { diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1722,15 +1722,9 @@ [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_ppc_fnmsub - : GCCBuiltin<"__builtin_ppc_fnmsub">, - Intrinsic <[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem]>; - def int_ppc_fnmsubs - : GCCBuiltin<"__builtin_ppc_fnmsubs">, - Intrinsic <[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem]>; + : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_ppc_fre : GCCBuiltin<"__builtin_ppc_fre">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -627,6 +627,8 @@ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); @@ -10549,6 +10551,16 @@ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}), 0); } + case Intrinsic::ppc_fnmsub: { + EVT VT = Op.getOperand(1).getValueType(); + if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128)) + return DAG.getNode( + ISD::FNEG, dl, VT, + DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2), + DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3)))); + return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } case Intrinsic::ppc_convert_f128_to_ppcf128: case Intrinsic::ppc_convert_ppcf128_to_f128: { RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128 @@ -11220,6 +11232,7 @@ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, N->getOperand(2), N->getOperand(1))); break; + case Intrinsic::ppc_fnmsub: case Intrinsic::ppc_convert_f128_to_ppcf128: Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3728,8 +3728,6 @@ // XL Compat intrinsics. def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (FMSUB $A, $B, $C)>; def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (FMSUBS $A, $B, $C)>; -def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (FNMSUB $A, $B, $C)>; -def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (FNMSUBS $A, $B, $C)>; def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (FNMADD $A, $B, $C)>; def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (FNMADDS $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (FRE $A)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2897,7 +2897,6 @@ // XL Compat builtins. def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>; -def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (XSNMSUBMDP $A, $B, $C)>; def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>; def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>; @@ -3311,7 +3310,6 @@ // XL Compat builtins. def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (XSMSUBMSP $A, $B, $C)>; -def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (XSNMSUBMSP $A, $B, $C)>; def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (XSNMADDMSP $A, $B, $C)>; def : Pat<(int_ppc_fres f32:$A), (XSRESP $A)>; def : Pat<(i32 (int_ppc_extract_exp f64:$A)), diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll @@ -98,49 +98,104 @@ declare float @llvm.ppc.fnmadds(float, float, float) -define dso_local double @fnmsub_t0(double %d, double %d2, double %d3) { -; CHECK-PWR8-LABEL: fnmsub_t0: +define dso_local float @fnmsub_f32(float %f, float %f2, float %f3) { +; CHECK-PWR8-LABEL: fnmsub_f32: ; CHECK-PWR8: # %bb.0: # %entry -; CHECK-PWR8-NEXT: xsnmsubmdp 1, 2, 3 +; CHECK-PWR8-NEXT: xsnmsubasp 3, 1, 2 +; CHECK-PWR8-NEXT: fmr 1, 3 ; CHECK-PWR8-NEXT: blr ; -; CHECK-NOVSX-LABEL: fnmsub_t0: +; CHECK-NOVSX-LABEL: fnmsub_f32: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: fnmsub_f32: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call float @llvm.ppc.fnmsub.f32(float %f, float %f2, float %f3) + ret float %0 +} + +declare float @llvm.ppc.fnmsub.f32(float, float, float) + +define dso_local double @fnmsub_f64(double %f, double %f2, double %f3) { +; CHECK-PWR8-LABEL: fnmsub_f64: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xsnmsubadp 3, 1, 2 +; CHECK-PWR8-NEXT: fmr 1, 3 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: fnmsub_f64: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: fnmsub 1, 1, 2, 3 ; CHECK-NOVSX-NEXT: blr ; -; CHECK-PWR7-LABEL: fnmsub_t0: +; CHECK-PWR7-LABEL: fnmsub_f64: ; CHECK-PWR7: # %bb.0: # %entry -; CHECK-PWR7-NEXT: xsnmsubmdp 1, 2, 3 +; CHECK-PWR7-NEXT: xsnmsubadp 3, 1, 2 +; CHECK-PWR7-NEXT: fmr 1, 3 ; CHECK-PWR7-NEXT: blr entry: - %0 = tail call double @llvm.ppc.fnmsub(double %d, double %d2, double %d3) + %0 = tail call double @llvm.ppc.fnmsub.f64(double %f, double %f2, double %f3) ret double %0 } -declare double @llvm.ppc.fnmsub(double, double, double) +declare double @llvm.ppc.fnmsub.f64(double, double, double) -define dso_local float @fnmsubs_t0(float %f, float %f2, float %f3) { -; CHECK-PWR8-LABEL: fnmsubs_t0: +define dso_local <4 x float> @fnmsub_v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) { +; CHECK-PWR8-LABEL: fnmsub_v4f32: ; CHECK-PWR8: # %bb.0: # %entry -; CHECK-PWR8-NEXT: xsnmsubmsp 1, 2, 3 +; CHECK-PWR8-NEXT: xvnmsubasp 36, 34, 35 +; CHECK-PWR8-NEXT: vmr 2, 4 ; CHECK-PWR8-NEXT: blr ; -; CHECK-NOVSX-LABEL: fnmsubs_t0: +; CHECK-NOVSX-LABEL: fnmsub_v4f32: ; CHECK-NOVSX: # %bb.0: # %entry -; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 5, 9 +; CHECK-NOVSX-NEXT: fnmsubs 2, 2, 6, 10 +; CHECK-NOVSX-NEXT: fnmsubs 3, 3, 7, 11 +; CHECK-NOVSX-NEXT: fnmsubs 4, 4, 8, 12 ; CHECK-NOVSX-NEXT: blr ; -; CHECK-PWR7-LABEL: fnmsubs_t0: +; CHECK-PWR7-LABEL: fnmsub_v4f32: ; CHECK-PWR7: # %bb.0: # %entry -; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-PWR7-NEXT: xvnmsubasp 36, 34, 35 +; CHECK-PWR7-NEXT: vmr 2, 4 ; CHECK-PWR7-NEXT: blr entry: - %0 = tail call float @llvm.ppc.fnmsubs(float %f, float %f2, float %f3) - ret float %0 + %0 = tail call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define dso_local <2 x double> @fnmsub_v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) { +; CHECK-PWR8-LABEL: fnmsub_v2f64: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xvnmsubadp 36, 34, 35 +; CHECK-PWR8-NEXT: vmr 2, 4 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: fnmsub_v2f64: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsub 1, 1, 3, 5 +; CHECK-NOVSX-NEXT: fnmsub 2, 2, 4, 6 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: fnmsub_v2f64: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: xvnmsubadp 36, 34, 35 +; CHECK-PWR7-NEXT: vmr 2, 4 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) + ret <2 x double> %0 } -declare float @llvm.ppc.fnmsubs(float, float, float) +declare <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double>, <2 x double>, <2 x double>) define dso_local double @fre(double %d) { ; CHECK-PWR8-LABEL: fre: