diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15875,10 +15875,9 @@ F, {X, Y, Builder.CreateFNeg(Z, "neg")}), "neg"); else - return Builder.CreateFNeg( - Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), - "neg"); - } + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_nmsub, ResultType), {X, Y, Z}); + } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } diff --git a/clang/test/CodeGen/builtins-ppc-fma.c b/clang/test/CodeGen/builtins-ppc-fma.c --- a/clang/test/CodeGen/builtins-ppc-fma.c +++ b/clang/test/CodeGen/builtins-ppc-fma.c @@ -32,12 +32,8 @@ // CHECK: <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]]) vf = __builtin_vsx_xvnmsubasp(vf, vf, vf); - // CHECK: [[RESULT:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK: [[RESULT2:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT]]) - // CHECK: fneg <4 x float> [[RESULT2]] + // CHECK: call <4 x float> @llvm.ppc.nmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) vd = __builtin_vsx_xvnmsubadp(vd, vd, vd); - // CHECK: [[RESULT:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK: [[RESULT2:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]]) - // CHECK: fneg <2 x double> [[RESULT2]] + // CHECK: call <2 x double> @llvm.ppc.nmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) } diff --git a/clang/test/CodeGen/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/builtins-ppc-fpconstrained.c --- a/clang/test/CodeGen/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/builtins-ppc-fpconstrained.c @@ -142,9 +142,7 @@ vf = __builtin_vsx_xvnmsubasp(vf, vf, vf); // CHECK-LABEL: try-xvnmsubasp - // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]]) - // CHECK-UNCONSTRAINED: fneg <4 x float> [[RESULT1]] + // CHECK-UNCONSTRAINED: call <4 x float> @llvm.ppc.nmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}} // CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-CONSTRAINED: fneg <4 x float> [[RESULT1]] @@ -152,9 +150,7 @@ vd = __builtin_vsx_xvnmsubadp(vd, vd, vd); // CHECK-LABEL: try-xvnmsubadp - // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]]) - // CHECK-UNCONSTRAINED: fneg <2 x double> [[RESULT1]] + // CHECK-UNCONSTRAINED: call <2 x double> @llvm.ppc.nmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}} // CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-CONSTRAINED: fneg <2 x double> [[RESULT1]] diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -894,20 +894,12 @@ // CHECK-LE-NEXT: fneg <2 x double> %[[FM]] res_vf = vec_nmsub(vf, vf, vf); -// CHECK: fneg <4 x float> %{{[0-9]+}} -// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> -// CHECK: fneg <4 x float> %{{[0-9]+}} -// CHECK-LE: fneg <4 x float> %{{[0-9]+}} -// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> -// CHECK-LE: fneg <4 x float> %{{[0-9]+}} +// CHECK: call <4 x float> @llvm.ppc.nmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> +// CHECK-LE: call <4 x float> @llvm.ppc.nmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> res_vd = vec_nmsub(vd, vd, vd); -// CHECK: fneg <2 x double> %{{[0-9]+}} -// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> -// CHECK-NEXT: fneg <2 x double> %[[FM]] -// CHECK-LE: fneg <2 x double> %{{[0-9]+}} -// CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> -// CHECK-LE-NEXT: fneg <2 x double> %[[FM]] +// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.nmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> +// CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.nmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> /* vec_nor */ res_vsll = vec_nor(vsll, vsll); diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1731,6 +1731,10 @@ Intrinsic <[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_ppc_nmsub + : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_ppc_fre : GCCBuiltin<"__builtin_ppc_fre">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -603,6 +603,8 @@ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); @@ -10509,6 +10511,16 @@ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}), 0); } + case Intrinsic::ppc_nmsub: { + EVT VT = Op.getOperand(1).getValueType(); + if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128)) + return DAG.getNode( + ISD::FNEG, dl, VT, + DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2), + DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3)))); + return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } case Intrinsic::ppc_convert_f128_to_ppcf128: case Intrinsic::ppc_convert_ppcf128_to_f128: { RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128 @@ -11154,6 +11166,7 @@ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, N->getOperand(2), N->getOperand(1))); break; + case Intrinsic::ppc_nmsub: case Intrinsic::ppc_convert_f128_to_ppcf128: Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); break; diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll @@ -142,6 +142,105 @@ declare float @llvm.ppc.fnmsubs(float, float, float) +define dso_local float @nmsub_f32(float %f, float %f2, float %f3) { +; CHECK-PWR8-LABEL: nmsub_f32: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xsnmsubasp 3, 1, 2 +; CHECK-PWR8-NEXT: fmr 1, 3 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: nmsub_f32: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: nmsub_f32: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call float @llvm.ppc.nmsub.f32(float %f, float %f2, float %f3) + ret float %0 +} + +declare float @llvm.ppc.nmsub.f32(float, float, float) + +define dso_local double @nmsub_f64(double %f, double %f2, double %f3) { +; CHECK-PWR8-LABEL: nmsub_f64: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xsnmsubadp 3, 1, 2 +; CHECK-PWR8-NEXT: fmr 1, 3 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: nmsub_f64: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsub 1, 1, 2, 3 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: nmsub_f64: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: xsnmsubadp 3, 1, 2 +; CHECK-PWR7-NEXT: fmr 1, 3 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call double @llvm.ppc.nmsub.f64(double %f, double %f2, double %f3) + ret double %0 +} + +declare double @llvm.ppc.nmsub.f64(double, double, double) + +define dso_local <4 x float> @nmsub_v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) { +; CHECK-PWR8-LABEL: nmsub_v4f32: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xvnmsubasp 36, 34, 35 +; CHECK-PWR8-NEXT: vmr 2, 4 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: nmsub_v4f32: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 5, 9 +; CHECK-NOVSX-NEXT: fnmsubs 2, 2, 6, 10 +; CHECK-NOVSX-NEXT: fnmsubs 3, 3, 7, 11 +; CHECK-NOVSX-NEXT: fnmsubs 4, 4, 8, 12 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: nmsub_v4f32: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: xvnmsubasp 36, 34, 35 +; CHECK-PWR7-NEXT: vmr 2, 4 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.ppc.nmsub.v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.ppc.nmsub.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define dso_local <2 x double> @nmsub_v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) { +; CHECK-PWR8-LABEL: nmsub_v2f64: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xvnmsubadp 36, 34, 35 +; CHECK-PWR8-NEXT: vmr 2, 4 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: nmsub_v2f64: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsub 1, 1, 3, 5 +; CHECK-NOVSX-NEXT: fnmsub 2, 2, 4, 6 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: nmsub_v2f64: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: xvnmsubadp 36, 34, 35 +; CHECK-PWR7-NEXT: vmr 2, 4 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.ppc.nmsub.v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) + ret <2 x double> %0 +} + +declare <2 x double> @llvm.ppc.nmsub.v2f64(<2 x double>, <2 x double>, <2 x double>) + define dso_local double @fre(double %d) { ; CHECK-PWR8-LABEL: fre: ; CHECK-PWR8: # %bb.0: # %entry