Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46129,14 +46129,23 @@ if (!TLI.isTypeLegal(VT)) return SDValue(); - EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) - return SDValue(); - SDValue A = N->getOperand(IsStrict ? 1 : 0); SDValue B = N->getOperand(IsStrict ? 2 : 1); SDValue C = N->getOperand(IsStrict ? 3 : 2); + // If the operation allows fast-math and the target does not support FMA, + // split this into mul+add to avoid a libcall. + SDNodeFlags Flags = N->getFlags(); + if (!IsStrict && Flags.hasAllowReassociation() && + TLI.isOperationExpand(ISD::FMA, VT)) { + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags); + return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags); + } + + EVT ScalarVT = VT.getScalarType(); + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) + return SDValue(); + auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); bool LegalOperations = !DCI.isBeforeLegalizeOps(); Index: llvm/test/CodeGen/X86/fma.ll =================================================================== --- llvm/test/CodeGen/X86/fma.ll +++ llvm/test/CodeGen/X86/fma.ll @@ -73,9 +73,15 @@ ; ; FMACALL32-LABEL: test_f32_reassoc: ; FMACALL32: ## %bb.0: -; FMACALL32-NEXT: jmp _fmaf ## TAILCALL -; FMACALL32-NEXT: ## encoding: [0xeb,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1 +; FMACALL32-NEXT: pushl %eax ## encoding: [0x50] +; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] +; FMACALL32-NEXT: ## xmm0 = mem[0],zero,zero,zero +; FMACALL32-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0x44,0x24,0x0c] +; FMACALL32-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x44,0x24,0x10] +; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24] +; FMACALL32-NEXT: popl %eax ## encoding: [0x58] +; FMACALL32-NEXT: retl ## encoding: [0xc3] ; ; FMA64-LABEL: test_f32_reassoc: ; FMA64: ## %bb.0: @@ -85,9 +91,9 @@ ; ; FMACALL64-LABEL: test_f32_reassoc: ; FMACALL64: ## %bb.0: -; FMACALL64-NEXT: jmp _fmaf ## TAILCALL -; FMACALL64-NEXT: ## encoding: [0xeb,A] -; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1 +; FMACALL64-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1] +; FMACALL64-NEXT: addss %xmm2, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc2] +; FMACALL64-NEXT: retq ## encoding: [0xc3] ; ; AVX512-LABEL: test_f32_reassoc: ; AVX512: ## %bb.0: