Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -2700,6 +2700,14 @@ A->claim(); } + // If -ffp-contract=off has been specified on the command line, then we must + // suppress the emission of -ffast-math and -menable-unsafe-fp-math to cc1. + bool FPContractDisabled = false; + if (!FPContract.empty()) { + CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract)); + FPContractDisabled = FPContract.equals("off"); + } + if (!HonorINFs) CmdArgs.push_back("-menable-no-infs"); @@ -2710,7 +2718,7 @@ CmdArgs.push_back("-fmath-errno"); if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && - !TrappingMath) + !TrappingMath && !FPContractDisabled) CmdArgs.push_back("-menable-unsafe-fp-math"); if (!SignedZeros) @@ -2753,7 +2761,8 @@ // that's consistent with gcc's behaviour. if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) { - CmdArgs.push_back("-ffast-math"); + if (!FPContractDisabled) + CmdArgs.push_back("-ffast-math"); if (FPModel.equals("fast")) { if (FPContract.equals("fast")) // All set, do nothing. Index: clang/test/Driver/fast-math.c =================================================================== --- clang/test/Driver/fast-math.c +++ clang/test/Driver/fast-math.c @@ -180,6 +180,13 @@ // CHECK-FAST-MATH: "-ffast-math" // CHECK-FAST-MATH: "-ffinite-math-only" // +// -ffp-contract=off must disable the fast-math umbrella, and the unsafe-fp-math +// umbrella. +// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s +// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s +// // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \ Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11506,7 +11506,7 @@ static bool isContractable(SDNode *N) { SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasAllowReassociation(); + return F.hasAllowContract(); } /// Try to perform FMA combining on a given FADD node. Index: llvm/test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -58,17 +58,19 @@ ret float %add } -; Reassociation implies that FMA contraction is allowed. +; On the FMF test, reassociation alone does _not_ imply that FMA contraction is +; allowed (in particular, we need to be able to disable FMA even when +; reassociation is enabled). ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc1: @@ -84,14 +86,14 @@ ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc2: @@ -104,6 +106,52 @@ ret float %add } +; Reassociation applied with contract enables FMA contraction (of course). + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract_reassoc1:' +; FMFDEBUG: fma contract reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract_reassoc1:' + +define float @fmul_fadd_contract_reassoc1(float %x, float %y, float %z) { +; FMF-LABEL: fmul_fadd_contract_reassoc1: +; FMF: # %bb.0: +; FMF-NEXT: xsmaddasp 3, 1, 2 +; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fadd_contract_reassoc1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xsmaddasp 3, 1, 2 +; GLOBAL-NEXT: fmr 1, 3 +; GLOBAL-NEXT: blr + %mul = fmul contract float %x, %y + %add = fadd contract reassoc float %mul, %z + ret float %add +} + +; This shouldn't change anything - the intermediate fmul result is now also flagged. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract_reassoc2:' +; FMFDEBUG: fma contract reassoc {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract_reassoc2:' + +define float @fmul_fadd_contract_reassoc2(float %x, float %y, float %z) { +; FMF-LABEL: fmul_fadd_contract_reassoc2: +; FMF: # %bb.0: +; FMF-NEXT: xsmaddasp 3, 1, 2 +; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fadd_contract_reassoc2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xsmaddasp 3, 1, 2 +; GLOBAL-NEXT: fmr 1, 3 +; GLOBAL-NEXT: blr + %mul = fmul contract reassoc float %x, %y + %add = fadd contract reassoc float %mul, %z + ret float %add +} + ; The fadd is now fully 'fast'. This implies that contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' @@ -122,7 +170,7 @@ ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr - %mul = fmul fast float %x, %y + %mul = fmul float %x, %y %add = fadd fast float %mul, %z ret float %add } @@ -151,10 +199,69 @@ } ; fma(X, 7.0, X * 42.0) --> X * 49.0 -; This is the minimum FMF needed for this transform - the FMA allows reassociation. +; This is the minimum FMF needed for this transform - the 'contract' allows the needed reassociation. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract1:' +; FMFDEBUG: fmul contract {{t[0-9]+}}, +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract1:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract1:' +; GLOBALDEBUG: fmul contract {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract1:' + +define float @fmul_fma_contract1(float %x) { +; FMF-LABEL: fmul_fma_contract1: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul float %x, 42.0 + %fma = call contract float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + +; This shouldn't change anything - the intermediate fmul result is now also flagged. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract2:' +; FMFDEBUG: fmul contract {{t[0-9]+}}, +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract2:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract2:' +; GLOBALDEBUG: fmul contract {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract2:' + +define float @fmul_fma_contract2(float %x) { +; FMF-LABEL: fmul_fma_contract2: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul contract float %x, 42.0 + %fma = call contract float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + +; On the FMF test, reassociation alone does _not_ imply that FMA contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' -; FMFDEBUG: fmul reassoc {{t[0-9]+}}, +; FMFDEBUG: fmul {{t[0-9]+}}, +; FMFDEBUG: fma reassoc {{t[0-9]+}}, ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' @@ -164,15 +271,19 @@ define float @fmul_fma_reassoc1(float %x) { ; FMF-LABEL: fmul_fma_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI6_0@toc@l(3) -; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI10_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI10_1@toc@l(3) +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmaddasp 0, 1, 2 +; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -184,6 +295,7 @@ ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' ; FMFDEBUG: fmul reassoc {{t[0-9]+}} +; FMFDEBUG: fma reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' @@ -193,15 +305,19 @@ define float @fmul_fma_reassoc2(float %x) { ; FMF-LABEL: fmul_fma_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) -; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI11_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmaddasp 0, 1, 2 +; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul reassoc float %x, 42.0 @@ -209,6 +325,64 @@ ret float %fma } +; Reassociation applied with contract enables FMA contraction (of course). + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' +; FMFDEBUG: fmul contract reassoc {{t[0-9]+}}, +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' +; GLOBALDEBUG: fmul contract reassoc {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' + +define float @fmul_fma_contract_reassoc1(float %x) { +; FMF-LABEL: fmul_fma_contract_reassoc1: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI12_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract_reassoc1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI12_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul float %x, 42.0 + %fma = call contract reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + +; This shouldn't change anything - the intermediate fmul result is now also flagged. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' +; FMFDEBUG: fmul contract reassoc {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' +; GLOBALDEBUG: fmul contract reassoc {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' + +define float @fmul_fma_contract_reassoc2(float %x) { +; FMF-LABEL: fmul_fma_contract_reassoc2: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI13_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract_reassoc2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul contract reassoc float %x, 42.0 + %fma = call contract reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + ; The FMA is now fully 'fast'. This implies that reassociation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' @@ -222,15 +396,15 @@ define float @fmul_fma_fast1(float %x) { ; FMF-LABEL: fmul_fma_fast1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI14_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI14_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -251,15 +425,15 @@ define float @fmul_fma_fast2(float %x) { ; FMF-LABEL: fmul_fma_fast2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul fast float %x, 42.0 @@ -282,19 +456,19 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB10_2 +; FMF-NEXT: beq 0, .LBB16_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI16_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI16_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI16_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmulsp 1, 1, 2 ; FMF-NEXT: xsaddsp 0, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 -; FMF-NEXT: .LBB10_2: +; FMF-NEXT: .LBB16_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -302,18 +476,18 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB10_2 +; GLOBAL-NEXT: beq 0, .LBB16_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI16_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI16_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI16_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB10_2: +; GLOBAL-NEXT: .LBB16_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call afn float @llvm.sqrt.f32(float %x) @@ -335,18 +509,18 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB11_2 +; FMF-NEXT: beq 0, .LBB17_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI17_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI17_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI17_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI17_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB11_2: +; FMF-NEXT: .LBB17_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -354,18 +528,18 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB11_2 +; GLOBAL-NEXT: beq 0, .LBB17_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI17_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI17_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI17_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI17_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: +; GLOBAL-NEXT: .LBB17_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call fast float @llvm.sqrt.f32(float %x) @@ -387,10 +561,10 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: xscmpudp 0, 1, 0 -; FMF-NEXT: blt 0, .LBB12_2 +; FMF-NEXT: blt 0, .LBB18_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 -; FMF-NEXT: .LBB12_2: +; FMF-NEXT: .LBB18_2: ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; @@ -398,10 +572,10 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 -; GLOBAL-NEXT: blt 0, .LBB12_2 +; GLOBAL-NEXT: blt 0, .LBB18_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: fmr 3, 2 -; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: .LBB18_2: ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr %cmp = fcmp nnan ult double %a, 0.0 Index: llvm/test/CodeGen/X86/fp-contract.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/fp-contract.ll @@ -0,0 +1,204 @@ +; Tests for -ffp-contract/-ffast-math interaction. +; Specifically, -ffp-contract=off must suppress the use of FMA. + +; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=FMA + +; Scalar versions: + +define float @MulAddPlain(float %a, float %b, float %c) { +; FMA-LABEL: MulAddPlain: +; FMA: vmulss +; FMA-NEXT: vaddss +; FMA-NEXT: ret + %mul = fmul float %a, %b + %add = fadd float %mul, %c + ret float %add +} + +define float @MulAddFast(float %a, float %b, float %c) { +; FMA-LABEL: MulAddFast: +; FMA: vfmadd213ss +; FMA-NEXT: ret + %mul = fmul fast float %a, %b + %add = fadd fast float %mul, %c + ret float %add +} + +define float @MulAddContract(float %a, float %b, float %c) { +; FMA-LABEL: MulAddContract: +; FMA: vfmadd213ss +; FMA-NEXT: ret + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %c + ret float %add +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define float @MulAddFastNoContract(float %a, float %b, float %c) { +; FMA-LABEL: MulAddFastNoContract: +; FMA: vmulss +; FMA-NEXT: vaddss +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b + %add = fadd nnan ninf nsz arcp afn reassoc float %mul, %c + ret float %add +} + +define float @MulAddReassoc(float %a, float %b, float %c) { +; FMA-LABEL: MulAddReassoc: +; FMA: vmulss +; FMA-NEXT: vaddss +; FMA-NEXT: ret + %mul = fmul reassoc float %a, %b + %add = fadd reassoc float %mul, %c + ret float %add +} + +define float @MulSubPlain(float %a, float %b, float %c) { +; FMA-LABEL: MulSubPlain: +; FMA: vmulss +; FMA-NEXT: vsubss +; FMA-NEXT: ret + %mul = fmul float %a, %b + %sub = fsub float %mul, %c + ret float %sub +} + +define float @MulSubFast(float %a, float %b, float %c) { +; FMA-LABEL: MulSubFast: +; FMA: vfmsub213ss +; FMA-NEXT: ret + %mul = fmul fast float %a, %b + %sub = fsub fast float %mul, %c + ret float %sub +} + +define float @MulSubContract(float %a, float %b, float %c) { +; FMA-LABEL: MulSubContract: +; FMA: vfmsub213ss +; FMA-NEXT: ret + %mul = fmul contract float %a, %b + %sub = fsub contract float %mul, %c + ret float %sub +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define float @MulSubFastNoContract(float %a, float %b, float %c) { +; FMA-LABEL: MulSubFastNoContract: +; FMA: vmulss +; FMA-NEXT: vsubss +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b + %sub = fsub nnan ninf nsz arcp afn reassoc float %mul, %c + ret float %sub +} + +define float @MulSubReassoc(float %a, float %b, float %c) { +; FMA-LABEL: MulSubReassoc: +; FMA: vmulss +; FMA-NEXT: vsubss +; FMA-NEXT: ret + %mul = fmul reassoc float %a, %b + %sub = fsub reassoc float %mul, %c + ret float %sub +} + +; Vector versions: + +define <4 x float> @VecMulAddPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddPlain: +; FMA: vmulps +; FMA-NEXT: vaddps +; FMA-NEXT: ret + %mul = fmul <4 x float> %a, %b + %add = fadd <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulAddFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddFast: +; FMA: vfmadd213ps +; FMA-NEXT: ret + %mul = fmul fast <4 x float> %a, %b + %add = fadd fast <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulAddContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddContract: +; FMA: vfmadd213ps +; FMA-NEXT: ret + %mul = fmul contract <4 x float> %a, %b + %add = fadd contract <4 x float> %mul, %c + ret <4 x float> %add +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define <4 x float> @VecMulAddFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddFastNoContract: +; FMA: vmulps +; FMA-NEXT: vaddps +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b + %add = fadd nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulAddReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddReassoc: +; FMA: vmulps +; FMA-NEXT: vaddps +; FMA-NEXT: ret + %mul = fmul reassoc <4 x float> %a, %b + %add = fadd reassoc <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulSubPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubPlain: +; FMA: vmulps +; FMA-NEXT: vsubps +; FMA-NEXT: ret + %mul = fmul <4 x float> %a, %b + %sub = fsub <4 x float> %mul, %c + ret <4 x float> %sub +} + +define <4 x float> @VecMulSubFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubFast: +; FMA: vfmsub213ps +; FMA-NEXT: ret + %mul = fmul fast <4 x float> %a, %b + %sub = fsub fast <4 x float> %mul, %c + ret <4 x float> %sub +} + +define <4 x float> @VecMulSubContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubContract: +; FMA: vfmsub213ps +; FMA-NEXT: ret + %mul = fmul contract <4 x float> %a, %b + %sub = fsub contract <4 x float> %mul, %c + ret <4 x float> %sub +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define <4 x float> @VecMulSubFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubFastNoContract: +; FMA: vmulps +; FMA-NEXT: vsubps +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b + %sub = fsub nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c + ret <4 x float> %sub +} + +define <4 x float> @VecMulSubReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubReassoc: +; FMA: vmulps +; FMA-NEXT: vsubps +; FMA-NEXT: ret + %mul = fmul reassoc <4 x float> %a, %b + %sub = fsub reassoc <4 x float> %mul, %c + ret <4 x float> %sub +}