Diff 150433

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,621 Lines • ▼ Show 20 Lines	bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));		(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

// No valid opcode, do not combine.		// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)		if (!HasFMAD && !HasFMA)
return SDValue();		return SDValue();

SDNodeFlags Flags = N->getFlags();		SDNodeFlags Flags = N->getFlags();
		bool CanFuse = Options.UnsafeFPMath \|\| isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|		bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
Options.UnsafeFPMath \|\| HasFMAD);		CanFuse \|\| HasFMAD);
// If the addition is not contractable, do not combine.		// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))		if (!AllowFusionGlobally && !isContractable(N))
return SDValue();		return SDValue();

const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();		const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))		if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	if (isContractableFMUL(N10) &&
DAG.getNode(ISD::FP_EXTEND, SL, VT,		DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)), N0, Flags);		N10.getOperand(1)), N0, Flags);
}		}
}		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))		// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath &&
N0.getOpcode() == PreferredFusedOpcode &&		N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&		N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {		N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
N1, Flags), Flags);		N1, Flags), Flags);
}		}

// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))		// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath &&
N1->getOpcode() == PreferredFusedOpcode &&		N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL &&		N1.getOperand(2).getOpcode() == ISD::FMUL &&
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {		N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),		N1.getOperand(0), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),		N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),		N1.getOperand(2).getOperand(1),
▲ Show 20 Lines • Show All 107 Lines • ▼ Show 20 Lines	bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));		(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

// No valid opcode, do not combine.		// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)		if (!HasFMAD && !HasFMA)
return SDValue();		return SDValue();

const SDNodeFlags Flags = N->getFlags();		const SDNodeFlags Flags = N->getFlags();
		bool CanFuse = Options.UnsafeFPMath \|\| isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|		bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
Options.UnsafeFPMath \|\| HasFMAD);		CanFuse \|\| HasFMAD);

// If the subtraction is not contractable, do not combine.		// If the subtraction is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))		if (!AllowFusionGlobally && !isContractable(N))
return SDValue();		return SDValue();

const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();		const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))		if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();		return SDValue();
Show All 14 Lines	SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N0) && (Aggressive \|\| N0->hasOneUse())) {		if (isContractableFMUL(N0) && (Aggressive \|\| N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);		DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}		}

// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)		// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.		// Note: Commutes FSUB operands.
if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse()))		if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1), N0, Flags);		N1.getOperand(1), N0, Flags);
		}

// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))		// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&		if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {		(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);		SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);		SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,		DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines	if (N00.getOpcode() == ISD::FP_EXTEND) {
}		}
}		}
}		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)		// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))		// -> (fma x, y (fma u, v, (fneg z)))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&		isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {		N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1), Flags), Flags);		N1), Flags), Flags);
}		}

// fold (fsub x, (fma y, z, (fmul u, v)))		// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))		// -> (fma (fneg y), z, (fma (fneg u), v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N1.getOperand(2))) {		isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);		SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);		SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1),		N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
▲ Show 20 Lines • Show All 603 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);		ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);		ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
SDLoc DL(N);		SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;		const TargetOptions &Options = DAG.getTarget().Options;

// FMA nodes have flags that propagate to the created nodes.		// FMA nodes have flags that propagate to the created nodes.
const SDNodeFlags Flags = N->getFlags();		const SDNodeFlags Flags = N->getFlags();
		bool UnsafeFPMath = Options.UnsafeFPMath \|\| isContractable(N);

// Constant fold FMA.		// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&		if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&		isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {		isa<ConstantFPSDNode>(N2)) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);		return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}		}

if (Options.UnsafeFPMath) {		if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())		if (N0CFP && N0CFP->isZero())
return N2;		return N2;
if (N1CFP && N1CFP->isZero())		if (N1CFP && N1CFP->isZero())
return N2;		return N2;
}		}
// TODO: The FMA node should have flags that propagate to these nodes.		// TODO: The FMA node should have flags that propagate to these nodes.
if (N0CFP && N0CFP->isExactlyValue(1.0))		if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);		return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))		if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);		return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);

// Canonicalize (fma c, x, y) -> (fma x, c, y)		// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (isConstantFPBuildVectorOrConstantFP(N0) &&		if (isConstantFPBuildVectorOrConstantFP(N0) &&
!isConstantFPBuildVectorOrConstantFP(N1))		!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);		return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);

if (Options.UnsafeFPMath) {		if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)		// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&		if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
isConstantFPBuildVectorOrConstantFP(N1) &&		isConstantFPBuildVectorOrConstantFP(N1) &&
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {		isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,		return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),		DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
Flags), Flags);		Flags), Flags);
}		}
Show All 29 Lines	if (N1CFP) {
if (N0.getOpcode() == ISD::FNEG &&		if (N0.getOpcode() == ISD::FNEG &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|		(TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {		(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),		return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);		DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
}		}
}		}

if (Options.UnsafeFPMath) {		if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))		// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {		if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,		return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1,		DAG.getNode(ISD::FADD, DL, VT, N1,
DAG.getConstantFP(1.0, DL, VT), Flags),		DAG.getConstantFP(1.0, DL, VT), Flags),
Flags);		Flags);
}		}

▲ Show 20 Lines • Show All 7,436 Lines • Show Last 20 Lines

test/CodeGen/AArch64/fma-aggressive.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s \| FileCheck %s

				define float @test1(float %u , float %v , float %x, float %y, float %z) {
				; CHECK-LABEL: test1
				; CHECK-EVEN: fmadd {{s[0-9][02468]}}, {{s[0-9]}}, {{s[0-9]}}, {{s[0-9][02468]}}
				; CHECK-EVEN: fmadd {{s[0-9][02468]}}, {{s[0-9]}}, {{s[0-9]}}, {{s[0-9][02468]}}
				%mul.1 = fmul fast float %u, %v
				%mul.2 = fmul fast float %x, %y
				%fma = fadd fast float %mul.2, %mul.1
				%res = fadd fast float %fma, %z
				ret float %res
				}

				define float @test2(float %u , float %v , float %x, float %y, float %z) {
				; CHECK-LABEL: test2
				; CHECK-EVEN: fmadd {{s[0-9][02468]}}, {{s[0-9]}}, {{s[0-9]}}, {{s[0-9][02468]}}
				; CHECK-EVEN: fmadd {{s[0-9][02468]}}, {{s[0-9]}}, {{s[0-9]}}, {{s[0-9][02468]}}
				%mul.1 = fmul fast float %y, %z
				%mul.2 = fmul fast float %u, %v
				%fma = fadd fast float %mul.2, %mul.1
				%res = fadd fast float %x, %fma
				ret float %res
				}

				define float @test3(float %u , float %v , float %x, float %y, float %z) {
				; CHECK-LABEL: test3
				; CHECK-EVEN: fnmsub {{s[0-9][02468]}}, {{s[0-9]}}, {{s[0-9]}}, {{s[0-9][02468]}}
				%mul.1 = fmul fast float %x, %y
				%res = fsub fast float %mul.1, %z
				ret float %res
				}

				define float @test4(float %u , float %v , float %x, float %y, float %z) {
				; CHECK-LABEL: test4
				; CHECK-EVEN: fnmadd {{s[0-9][02468]}}, {{s[0-9]}}, {{s[0-9]}}, {{s[0-9][02468]}}
				%mul.1 = fmul fast float %x, %y
				%neg = fsub fast float -0.0, %mul.1
				%res = fsub fast float %neg, %z
				ret float %res
				}

test/CodeGen/AArch64/neon-fma-FMF.ll

	; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s			; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s

	define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) {			define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
	; CHECK-LABEL: fma:			; CHECK-LABEL: fma_1:
	; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s			; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
	%tmp1 = fmul contract <2 x float> %A, %B;			%tmp1 = fmul contract <2 x float> %A, %B;
	%tmp2 = fadd contract <2 x float> %C, %tmp1;			%tmp2 = fadd contract <2 x float> %C, %tmp1;
	ret <2 x float> %tmp2			ret <2 x float> %tmp2
	}			}

				; This case will fold as it was only available through unsafe before, now available from
				; the contract on the fadd
				define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
				; CHECK-LABEL: fma_2:
				; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
				%tmp1 = fmul <2 x float> %A, %B;
				%tmp2 = fadd contract <2 x float> %C, %tmp1;
				ret <2 x float> %tmp2
				}

	define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {			define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
	; CHECK-LABEL: no_fma_1:			; CHECK-LABEL: no_fma_1:
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fadd			; CHECK: fadd
	%tmp1 = fmul contract <2 x float> %A, %B;			%tmp1 = fmul contract <2 x float> %A, %B;
	%tmp2 = fadd <2 x float> %C, %tmp1;			%tmp2 = fadd <2 x float> %C, %tmp1;
	ret <2 x float> %tmp2			ret <2 x float> %tmp2
	}			}

	define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {			define <2 x float> @fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
	; CHECK-LABEL: no_fma_2:			; CHECK-LABEL: fma_sub_1:
	; CHECK: fmul			; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
	; CHECK: fadd			%tmp1 = fmul contract <2 x float> %A, %B;
	%tmp1 = fmul <2 x float> %A, %B;			%tmp2 = fsub contract <2 x float> %C, %tmp1;
	%tmp2 = fadd contract <2 x float> %C, %tmp1;
	ret <2 x float> %tmp2			ret <2 x float> %tmp2
	}			}

	define <2 x float> @fma_sub(<2 x float> %A, <2 x float> %B, <2 x float> %C) {			; This case will fold as it was only available through unsafe before, now available from
	; CHECK-LABEL: fma_sub:			; the contract on the fsub
				define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
				; CHECK-LABEL: fma_sub_2:
	; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s			; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
	%tmp1 = fmul contract <2 x float> %A, %B;			%tmp1 = fmul <2 x float> %A, %B;
	%tmp2 = fsub contract <2 x float> %C, %tmp1;			%tmp2 = fsub contract <2 x float> %C, %tmp1;
	ret <2 x float> %tmp2			ret <2 x float> %tmp2
	}			}

	define <2 x float> @no_fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {			define <2 x float> @no_fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
	; CHECK-LABEL: no_fma_sub_1:			; CHECK-LABEL: no_fma_sub_1:
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fsub			; CHECK: fsub
	%tmp1 = fmul contract <2 x float> %A, %B;			%tmp1 = fmul contract <2 x float> %A, %B;
	%tmp2 = fsub <2 x float> %C, %tmp1;			%tmp2 = fsub <2 x float> %C, %tmp1;
	ret <2 x float> %tmp2			ret <2 x float> %tmp2
	}			}

	define <2 x float> @no_fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
	; CHECK-LABEL: no_fma_sub_2:
	; CHECK: fmul
	; CHECK: fsub
	%tmp1 = fmul <2 x float> %A, %B;
	%tmp2 = fsub contract <2 x float> %C, %tmp1;
	ret <2 x float> %tmp2
	}

test/CodeGen/PowerPC/fma-aggr-FMF.ll

	Show All 16 Lines
	}			}

	; There is no contract on the mul with no extra use so we can't fuse that.			; There is no contract on the mul with no extra use so we can't fuse that.
	; Since we are fusing with the mul with an extra use, the fmul needs to stick			; Since we are fusing with the mul with an extra use, the fmul needs to stick
	; around beside the fma.			; around beside the fma.
	define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {			define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
	; CHECK-LABEL: no_fma_with_fewer_uses:			; CHECK-LABEL: no_fma_with_fewer_uses:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: xsmulsp 0, 3, 4			; CHECK-NEXT: xsmulsp 0, 1, 2
	; CHECK-NEXT: xsmulsp 13, 1, 2			; CHECK-NEXT: fmr 1, 0
	; CHECK-NEXT: xsmaddasp 0, 1, 2			; CHECK-NEXT: xsmaddasp 1, 3, 4
	; CHECK-NEXT: xsdivsp 1, 13, 0			; CHECK-NEXT: xsdivsp 1, 0, 1
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	%mul1 = fmul contract float %f1, %f2			%mul1 = fmul contract float %f1, %f2
	%mul2 = fmul float %f3, %f4			%mul2 = fmul float %f3, %f4
	%add = fadd contract float %mul1, %mul2			%add = fadd contract float %mul1, %mul2
	%second_use_of_mul1 = fdiv float %mul1, %add			%second_use_of_mul1 = fdiv float %mul1, %add
	ret float %second_use_of_mul1			ret float %second_use_of_mul1
	}			}

test/CodeGen/PowerPC/fmf-propagation.ll

Show All 9 Lines
; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.		; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.

declare float @llvm.fma.f32(float, float, float)		declare float @llvm.fma.f32(float, float, float)
declare float @llvm.sqrt.f32(float)		declare float @llvm.sqrt.f32(float)

; X * Y + Z --> fma(X, Y, Z)		; X * Y + Z --> fma(X, Y, Z)

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}}		; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: fadd contract {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'

define float @fmul_fadd_contract1(float %x, float %y, float %z) {		define float @fmul_fadd_contract1(float %x, float %y, float %z) {
; FMF-LABEL: fmul_fadd_contract1:		; FMF-LABEL: fmul_fadd_contract1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: xsmulsp 0, 1, 2		; FMF-NEXT: xsmaddasp 3, 1, 2
; FMF-NEXT: xsaddsp 1, 0, 3		; FMF-NEXT: fmr 1, 3
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fadd_contract1:		; GLOBAL-LABEL: fmul_fadd_contract1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xsmaddasp 3, 1, 2		; GLOBAL-NEXT: xsmaddasp 3, 1, 2
; GLOBAL-NEXT: fmr 1, 3		; GLOBAL-NEXT: fmr 1, 3
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, %y		%mul = fmul float %x, %y
Show All 22 Lines	; GLOBAL-NEXT: blr
%mul = fmul contract float %x, %y		%mul = fmul contract float %x, %y
%add = fadd contract float %mul, %z		%add = fadd contract float %mul, %z
ret float %add		ret float %add
}		}

; Reassociation implies that FMA contraction is allowed.		; Reassociation implies that FMA contraction is allowed.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}}		; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'

define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {		define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
; FMF-LABEL: fmul_fadd_reassoc1:		; FMF-LABEL: fmul_fadd_reassoc1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: xsmulsp 0, 1, 2		; FMF-NEXT: xsmaddasp 3, 1, 2
; FMF-NEXT: xsaddsp 1, 0, 3		; FMF-NEXT: fmr 1, 3
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fadd_reassoc1:		; GLOBAL-LABEL: fmul_fadd_reassoc1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xsmaddasp 3, 1, 2		; GLOBAL-NEXT: xsmaddasp 3, 1, 2
; GLOBAL-NEXT: fmr 1, 3		; GLOBAL-NEXT: fmr 1, 3
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, %y		%mul = fmul float %x, %y
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	; GLOBAL-NEXT: blr
%add = fadd fast float %mul, %z		%add = fadd fast float %mul, %z
ret float %add		ret float %add
}		}

; fma(X, 7.0, X * 42.0) --> X * 49.0		; fma(X, 7.0, X * 42.0) --> X * 49.0
; This is the minimum FMF needed for this transform - the FMA allows reassociation.		; This is the minimum FMF needed for this transform - the FMA allows reassociation.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
; FMFDEBUG: fma reassoc {{t[0-9]+}}		; FMFDEBUG: fmul reassoc {{t[0-9]+}},
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'

define float @fmul_fma_reassoc1(float %x) {		define float @fmul_fma_reassoc1(float %x) {
; FMF-LABEL: fmul_fma_reassoc1:		; FMF-LABEL: fmul_fma_reassoc1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI6_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI6_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_reassoc1:		; GLOBAL-LABEL: fmul_fma_reassoc1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, 42.0		%mul = fmul float %x, 42.0
%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)		%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
ret float %fma		ret float %fma
}		}

; This shouldn't change anything - the intermediate fmul result is now also flagged.		; This shouldn't change anything - the intermediate fmul result is now also flagged.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
; FMFDEBUG: fmul reassoc {{t[0-9]+}}		; FMFDEBUG: fmul reassoc {{t[0-9]+}}
; FMFDEBUG: fma reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'

define float @fmul_fma_reassoc2(float %x) {		define float @fmul_fma_reassoc2(float %x) {
; FMF-LABEL: fmul_fma_reassoc2:		; FMF-LABEL: fmul_fma_reassoc2:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI7_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI7_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_reassoc2:		; GLOBAL-LABEL: fmul_fma_reassoc2:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul reassoc float %x, 42.0		%mul = fmul reassoc float %x, 42.0
%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)		%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
ret float %fma		ret float %fma
}		}

; The FMA is now fully 'fast'. This implies that reassociation is allowed.		; The FMA is now fully 'fast'. This implies that reassociation is allowed.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'

define float @fmul_fma_fast1(float %x) {		define float @fmul_fma_fast1(float %x) {
; FMF-LABEL: fmul_fma_fast1:		; FMF-LABEL: fmul_fma_fast1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI8_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI8_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_fast1:		; GLOBAL-LABEL: fmul_fma_fast1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, 42.0		%mul = fmul float %x, 42.0
%fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)		%fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
ret float %fma		ret float %fma
}		}

; This shouldn't change anything - the intermediate fmul result is now also flagged.		; This shouldn't change anything - the intermediate fmul result is now also flagged.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'

define float @fmul_fma_fast2(float %x) {		define float @fmul_fma_fast2(float %x) {
; FMF-LABEL: fmul_fma_fast2:		; FMF-LABEL: fmul_fma_fast2:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI9_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI9_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_fast2:		; GLOBAL-LABEL: fmul_fma_fast2:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
Show All 28 Lines
; FMF-NEXT: xsmulsp 4, 3, 3		; FMF-NEXT: xsmulsp 4, 3, 3
; FMF-NEXT: xssubsp 2, 2, 1		; FMF-NEXT: xssubsp 2, 2, 1
; FMF-NEXT: xsmulsp 2, 2, 4		; FMF-NEXT: xsmulsp 2, 2, 4
; FMF-NEXT: xssubsp 0, 0, 2		; FMF-NEXT: xssubsp 0, 0, 2
; FMF-NEXT: xsmulsp 0, 3, 0		; FMF-NEXT: xsmulsp 0, 3, 0
; FMF-NEXT: xsmulsp 0, 0, 1		; FMF-NEXT: xsmulsp 0, 0, 1
; FMF-NEXT: .LBB10_2:		; FMF-NEXT: .LBB10_2:
; FMF-NEXT: fmr 1, 0		; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions No worries here, this is cross patching cruft, next update will have this sync up. mcberg2017: No worries here, this is cross patching cruft, next update will have this sync up.
;		;
; GLOBAL-LABEL: sqrt_afn:		; GLOBAL-LABEL: sqrt_afn:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0		; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: fcmpu 0, 1, 0		; GLOBAL-NEXT: fcmpu 0, 1, 0
; GLOBAL-NEXT: beq 0, .LBB10_2		; GLOBAL-NEXT: beq 0, .LBB10_2
; GLOBAL-NEXT: # %bb.1:		; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 2, 1		; GLOBAL-NEXT: xsrsqrtesp 2, 1
▲ Show 20 Lines • Show All 178 Lines • Show Last 20 Lines

test/CodeGen/X86/fmf-flags_fma.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -mattr=+avx2,+fma -mtriple=x86_64-apple-macosx10.8.0 \| FileCheck %s -check-prefix=X64
				spatelUnsubmitted Not Done Reply Inline Actions avx2 shouldn't be necessary? spatel: avx2 shouldn't be necessary?

				declare float @llvm.fma.f32(float %a, float %b, float %c);

				define float @fast_fmuladd_rep1(float %a , float %b , float %c) {
				; X64-LABEL: fast_fmuladd_rep1:
				; X64: # %bb.0:
				; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
				%mul.1 = fmul fast float %a, %b
				%res = fadd fast float %mul.1, %c
				spatelUnsubmitted Not Done Reply Inline Actions This case is already folded independently of this patch, right? Please check in all new tests with current codegen as a preliminary step. The case where this patch will make a difference is something like this? define float @fadd_contract_with_strict_fmul(float %a , float %b , float %c) { %m = fmul float %a, %b %a = fadd contract float %m, %c ret float %a } spatel: This case is already folded independently of this patch, right? Please check in all new tests…
				ret float %res
				}

				define float @fast_fmuladd_rep2(float %a , float %b , float %c) {
				; X64-LABEL: fast_fmuladd_rep2:
				; X64: # %bb.0:
				; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
				%mul.1 = fmul fast float %a, %b
				%res = fadd fast float %c, %mul.1
				ret float %res
				}

				define float @fast_fmuladd_rep3(half %a , half %b , float %c) {
				; X64-LABEL: fast_fmuladd_rep3:
				; X64: # %bb.0:
				; X64: vfmadd213ss {{[0-9]+}}(%rsp), %xmm1, %xmm0
				%mul.1 = fmul fast half %a, %b
				%ext = fpext half %mul.1 to float
				%res = fadd fast float %ext, %c
				ret float %res
				}

				define float @fast_fmuladd_rep4(half %a , half %b , float %c) {
				; X64-LABEL: fast_fmuladd_rep4:
				; X64: # %bb.0:
				; X64: vfmadd213ss {{[0-9]+}}(%rsp), %xmm1, %xmm0
				%mul.1 = fmul fast half %a, %b
				%ext = fpext half %mul.1 to float
				%res = fadd fast float %c, %ext
				ret float %res
				}

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support for fma
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 150433

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/AArch64/fma-aggressive.ll

test/CodeGen/AArch64/neon-fma-FMF.ll

test/CodeGen/PowerPC/fma-aggr-FMF.ll

test/CodeGen/PowerPC/fmf-propagation.ll

test/CodeGen/X86/fmf-flags_fma.ll

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support for fmaClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 150433

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/AArch64/fma-aggressive.ll

test/CodeGen/AArch64/neon-fma-FMF.ll

test/CodeGen/PowerPC/fma-aggr-FMF.ll

test/CodeGen/PowerPC/fmf-propagation.ll

test/CodeGen/X86/fmf-flags_fma.ll

Utilize new SDNode flag functionality to expand current support for fma
ClosedPublic