diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -464,6 +464,7 @@ SDValue visitFREEZE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); + SDValue visitSTRICT_FADD(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); @@ -1650,6 +1651,7 @@ case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); + case ISD::STRICT_FADD: return visitSTRICT_FADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); @@ -12833,6 +12835,33 @@ return SDValue(); } +SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N0 = N->getOperand(1); + SDValue N1 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT ChainVT = N->getValueType(1); + SDLoc DL(N); + const SDNodeFlags Flags = N->getFlags(); + + // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B) + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT)) + if (SDValue NegN1 = TLI.getCheaperNegatedExpression( + N1, DAG, LegalOperations, ForCodeSize)) { + return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT), + {Chain, N0, NegN1}, Flags); + } + + // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A) + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT)) + if (SDValue NegN0 = TLI.getCheaperNegatedExpression( + N0, DAG, LegalOperations, ForCodeSize)) { + return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT), + {Chain, N1, NegN0}, Flags); + } + return SDValue(); +} + SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll @@ -112,7 +112,7 @@ ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_f32_e64 v0, -|v0|, v1 +; GCN-NEXT: v_sub_f32_e64 v0, v1, |v0| ; GCN-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) %neg.fabs.x = fneg float %fabs.x diff --git a/llvm/test/CodeGen/X86/strict-fadd-combines.ll b/llvm/test/CodeGen/X86/strict-fadd-combines.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/strict-fadd-combines.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) { + ; CHECK: subss %{{.*}}, %{{.*}} + ; CHECK-NEXT: retq + %neg = fneg float %y + %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %neg, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret float %add +} + +define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) { + ; CHECK: subss %{{.*}}, %{{.*}} + ; CHECK-NEXT: retq + %neg = fneg float %y + %add = call float @llvm.experimental.constrained.fadd.f32(float %neg, float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret float %add +} + +define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) { + ; CHECK: subsd %{{.*}}, %{{.*}} + ; CHECK-NEXT: retq + %neg = fneg double %y + %add = call double @llvm.experimental.constrained.fadd.f64(double %x, double %neg, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret double %add +} + +define double @fneg_strict_fadd_to_strict_fsub_2d(double %x, double %y) { + ; CHECK: subsd %{{.*}}, %{{.*}} + ; CHECK-NEXT: retq + %neg = fneg double %y + %add = call double @llvm.experimental.constrained.fadd.f64(double %neg, double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret double %add +} + + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)