Diff 150875

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,668 Lines • ▼ Show 20 Lines	bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));		(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

// No valid opcode, do not combine.		// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)		if (!HasFMAD && !HasFMA)
return SDValue();		return SDValue();

SDNodeFlags Flags = N->getFlags();		SDNodeFlags Flags = N->getFlags();
		bool CanFuse = Options.UnsafeFPMath \|\| isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|		bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
Options.UnsafeFPMath \|\| HasFMAD);		CanFuse \|\| HasFMAD);
// If the addition is not contractable, do not combine.		// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))		if (!AllowFusionGlobally && !isContractable(N))
return SDValue();		return SDValue();

const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();		const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))		if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	if (isContractableFMUL(N10) &&
DAG.getNode(ISD::FP_EXTEND, SL, VT,		DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)), N0, Flags);		N10.getOperand(1)), N0, Flags);
}		}
}		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))		// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath &&
N0.getOpcode() == PreferredFusedOpcode &&		N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&		N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {		N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
N1, Flags), Flags);		N1, Flags), Flags);
}		}

// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))		// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath &&
N1->getOpcode() == PreferredFusedOpcode &&		N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL &&		N1.getOperand(2).getOpcode() == ISD::FMUL &&
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {		N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),		N1.getOperand(0), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),		N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),		N1.getOperand(2).getOperand(1),
▲ Show 20 Lines • Show All 107 Lines • ▼ Show 20 Lines	bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));		(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

// No valid opcode, do not combine.		// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)		if (!HasFMAD && !HasFMA)
return SDValue();		return SDValue();

const SDNodeFlags Flags = N->getFlags();		const SDNodeFlags Flags = N->getFlags();
		bool CanFuse = Options.UnsafeFPMath \|\| isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|		bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
Options.UnsafeFPMath \|\| HasFMAD);		CanFuse \|\| HasFMAD);

// If the subtraction is not contractable, do not combine.		// If the subtraction is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))		if (!AllowFusionGlobally && !isContractable(N))
return SDValue();		return SDValue();

const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();		const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))		if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();		return SDValue();
Show All 14 Lines	SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N0) && (Aggressive \|\| N0->hasOneUse())) {		if (isContractableFMUL(N0) && (Aggressive \|\| N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);		DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}		}

// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)		// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.		// Note: Commutes FSUB operands.
if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse()))		if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1), N0, Flags);		N1.getOperand(1), N0, Flags);
		}

// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))		// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&		if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {		(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);		SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);		SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,		DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines	if (N00.getOpcode() == ISD::FP_EXTEND) {
}		}
}		}
}		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)		// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))		// -> (fma x, y (fma u, v, (fneg z)))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&		isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {		N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1), Flags), Flags);		N1), Flags), Flags);
}		}

// fold (fsub x, (fma y, z, (fmul u, v)))		// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))		// -> (fma (fneg y), z, (fma (fneg u), v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF		if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
// are currently only supported on binary nodes.
if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N1.getOperand(2))) {		isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);		SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);		SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1),		N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
▲ Show 20 Lines • Show All 605 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);		ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);		ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
SDLoc DL(N);		SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;		const TargetOptions &Options = DAG.getTarget().Options;

// FMA nodes have flags that propagate to the created nodes.		// FMA nodes have flags that propagate to the created nodes.
const SDNodeFlags Flags = N->getFlags();		const SDNodeFlags Flags = N->getFlags();
		bool UnsafeFPMath = Options.UnsafeFPMath \|\| isContractable(N);

// Constant fold FMA.		// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&		if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&		isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {		isa<ConstantFPSDNode>(N2)) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);		return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}		}

if (Options.UnsafeFPMath) {		if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())		if (N0CFP && N0CFP->isZero())
return N2;		return N2;
if (N1CFP && N1CFP->isZero())		if (N1CFP && N1CFP->isZero())
return N2;		return N2;
}		}
// TODO: The FMA node should have flags that propagate to these nodes.		// TODO: The FMA node should have flags that propagate to these nodes.
if (N0CFP && N0CFP->isExactlyValue(1.0))		if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);		return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))		if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);		return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);

// Canonicalize (fma c, x, y) -> (fma x, c, y)		// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (isConstantFPBuildVectorOrConstantFP(N0) &&		if (isConstantFPBuildVectorOrConstantFP(N0) &&
!isConstantFPBuildVectorOrConstantFP(N1))		!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);		return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);

if (Options.UnsafeFPMath) {		if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)		// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&		if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
isConstantFPBuildVectorOrConstantFP(N1) &&		isConstantFPBuildVectorOrConstantFP(N1) &&
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {		isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,		return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),		DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
Flags), Flags);		Flags), Flags);
}		}
Show All 29 Lines	if (N1CFP) {
if (N0.getOpcode() == ISD::FNEG &&		if (N0.getOpcode() == ISD::FNEG &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|		(TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {		(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),		return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);		DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
}		}
}		}

if (Options.UnsafeFPMath) {		if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))		// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {		if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,		return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1,		DAG.getNode(ISD::FADD, DL, VT, N1,
DAG.getConstantFP(1.0, DL, VT), Flags),		DAG.getConstantFP(1.0, DL, VT), Flags),
Flags);		Flags);
}		}

▲ Show 20 Lines • Show All 7,445 Lines • Show Last 20 Lines

test/CodeGen/AArch64/neon-fma-FMF.ll

; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s		; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s

define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {		define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; CHECK-LABEL: fma_1:		; CHECK-LABEL: fma_1:
; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s		; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
%tmp1 = fmul contract <2 x float> %A, %B;		%tmp1 = fmul contract <2 x float> %A, %B;
%tmp2 = fadd contract <2 x float> %C, %tmp1;		%tmp2 = fadd contract <2 x float> %C, %tmp1;
ret <2 x float> %tmp2		ret <2 x float> %tmp2
}		}

; This case will fold as it was only available through unsafe before, now available from		; This case will fold as it was only available through unsafe before, now available from
; the contract on the fadd		; the contract on the fadd
define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {		define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; CHECK-LABEL: fma_2:		; CHECK-LABEL: fma_2:
; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s		; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
%tmp1 = fmul <2 x float> %A, %B;		%tmp1 = fmul <2 x float> %A, %B;
%tmp2 = fadd contract <2 x float> %C, %tmp1;		%tmp2 = fadd contract <2 x float> %C, %tmp1;
ret <2 x float> %tmp2		ret <2 x float> %tmp2
}		}

define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {		define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; CHECK-LABEL: no_fma_1:		; CHECK-LABEL: no_fma_1:
; CHECK: fmul		; CHECK: fmul
Show All 10 Lines	; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
%tmp2 = fsub contract <2 x float> %C, %tmp1;		%tmp2 = fsub contract <2 x float> %C, %tmp1;
ret <2 x float> %tmp2		ret <2 x float> %tmp2
}		}

; This case will fold as it was only available through unsafe before, now available from		; This case will fold as it was only available through unsafe before, now available from
; the contract on the fsub		; the contract on the fsub
define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {		define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; CHECK-LABEL: fma_sub_2:		; CHECK-LABEL: fma_sub_2:
; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s		; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
; CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
%tmp1 = fmul <2 x float> %A, %B;		%tmp1 = fmul <2 x float> %A, %B;
%tmp2 = fsub contract <2 x float> %C, %tmp1;		%tmp2 = fsub contract <2 x float> %C, %tmp1;
ret <2 x float> %tmp2		ret <2 x float> %tmp2
}		}

define <2 x float> @no_fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {		define <2 x float> @no_fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; CHECK-LABEL: no_fma_sub_1:		; CHECK-LABEL: no_fma_sub_1:
; CHECK: fmul		; CHECK: fmul
; CHECK: fsub		; CHECK: fsub
%tmp1 = fmul contract <2 x float> %A, %B;		%tmp1 = fmul contract <2 x float> %A, %B;
%tmp2 = fsub <2 x float> %C, %tmp1;		%tmp2 = fsub <2 x float> %C, %tmp1;
ret <2 x float> %tmp2		ret <2 x float> %tmp2
}		}

test/CodeGen/PowerPC/fma-aggr-FMF.ll

	Show All 16 Lines
	}			}

	; There is no contract on the mul with no extra use so we can't fuse that.			; There is no contract on the mul with no extra use so we can't fuse that.
	; Since we are fusing with the mul with an extra use, the fmul needs to stick			; Since we are fusing with the mul with an extra use, the fmul needs to stick
	; around beside the fma.			; around beside the fma.
	define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {			define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
	; CHECK-LABEL: no_fma_with_fewer_uses:			; CHECK-LABEL: no_fma_with_fewer_uses:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: xsmulsp 0, 3, 4			; CHECK-NEXT: xsmulsp 0, 1, 2
	; CHECK-NEXT: xsmulsp 13, 1, 2			; CHECK-NEXT: fmr 1, 0
	; CHECK-NEXT: xsmaddasp 0, 1, 2			; CHECK-NEXT: xsmaddasp 1, 3, 4
	; CHECK-NEXT: xsdivsp 1, 13, 0			; CHECK-NEXT: xsdivsp 1, 0, 1
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	%mul1 = fmul contract float %f1, %f2			%mul1 = fmul contract float %f1, %f2
	%mul2 = fmul float %f3, %f4			%mul2 = fmul float %f3, %f4
	%add = fadd contract float %mul1, %mul2			%add = fadd contract float %mul1, %mul2
	%second_use_of_mul1 = fdiv float %mul1, %add			%second_use_of_mul1 = fdiv float %mul1, %add
	ret float %second_use_of_mul1			ret float %second_use_of_mul1
	}			}

test/CodeGen/PowerPC/fmf-propagation.ll

Show All 9 Lines
; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.		; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.

declare float @llvm.fma.f32(float, float, float)		declare float @llvm.fma.f32(float, float, float)
declare float @llvm.sqrt.f32(float)		declare float @llvm.sqrt.f32(float)

; X * Y + Z --> fma(X, Y, Z)		; X * Y + Z --> fma(X, Y, Z)

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}}		; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: fadd contract {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'

define float @fmul_fadd_contract1(float %x, float %y, float %z) {		define float @fmul_fadd_contract1(float %x, float %y, float %z) {
; FMF-LABEL: fmul_fadd_contract1:		; FMF-LABEL: fmul_fadd_contract1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: xsmulsp 0, 1, 2		; FMF-NEXT: xsmaddasp 3, 1, 2
; FMF-NEXT: xsaddsp 1, 0, 3		; FMF-NEXT: fmr 1, 3
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fadd_contract1:		; GLOBAL-LABEL: fmul_fadd_contract1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xsmaddasp 3, 1, 2		; GLOBAL-NEXT: xsmaddasp 3, 1, 2
; GLOBAL-NEXT: fmr 1, 3		; GLOBAL-NEXT: fmr 1, 3
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, %y		%mul = fmul float %x, %y
Show All 22 Lines	; GLOBAL-NEXT: blr
%mul = fmul contract float %x, %y		%mul = fmul contract float %x, %y
%add = fadd contract float %mul, %z		%add = fadd contract float %mul, %z
ret float %add		ret float %add
}		}

; Reassociation implies that FMA contraction is allowed.		; Reassociation implies that FMA contraction is allowed.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
; FMFDEBUG: fmul {{t[0-9]+}}, {{t[0-9]+}}		; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'

define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {		define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
; FMF-LABEL: fmul_fadd_reassoc1:		; FMF-LABEL: fmul_fadd_reassoc1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: xsmulsp 0, 1, 2		; FMF-NEXT: xsmaddasp 3, 1, 2
; FMF-NEXT: xsaddsp 1, 0, 3		; FMF-NEXT: fmr 1, 3
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fadd_reassoc1:		; GLOBAL-LABEL: fmul_fadd_reassoc1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xsmaddasp 3, 1, 2		; GLOBAL-NEXT: xsmaddasp 3, 1, 2
; GLOBAL-NEXT: fmr 1, 3		; GLOBAL-NEXT: fmr 1, 3
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, %y		%mul = fmul float %x, %y
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	; GLOBAL-NEXT: blr
%add = fadd fast float %mul, %z		%add = fadd fast float %mul, %z
ret float %add		ret float %add
}		}

; fma(X, 7.0, X * 42.0) --> X * 49.0		; fma(X, 7.0, X * 42.0) --> X * 49.0
; This is the minimum FMF needed for this transform - the FMA allows reassociation.		; This is the minimum FMF needed for this transform - the FMA allows reassociation.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
; FMFDEBUG: fma reassoc {{t[0-9]+}}		; FMFDEBUG: fmul reassoc {{t[0-9]+}},
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'

define float @fmul_fma_reassoc1(float %x) {		define float @fmul_fma_reassoc1(float %x) {
; FMF-LABEL: fmul_fma_reassoc1:		; FMF-LABEL: fmul_fma_reassoc1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI6_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI6_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_reassoc1:		; GLOBAL-LABEL: fmul_fma_reassoc1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, 42.0		%mul = fmul float %x, 42.0
%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)		%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
ret float %fma		ret float %fma
}		}

; This shouldn't change anything - the intermediate fmul result is now also flagged.		; This shouldn't change anything - the intermediate fmul result is now also flagged.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
; FMFDEBUG: fmul reassoc {{t[0-9]+}}		; FMFDEBUG: fmul reassoc {{t[0-9]+}}
; FMFDEBUG: fma reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'

define float @fmul_fma_reassoc2(float %x) {		define float @fmul_fma_reassoc2(float %x) {
; FMF-LABEL: fmul_fma_reassoc2:		; FMF-LABEL: fmul_fma_reassoc2:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI7_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI7_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_reassoc2:		; GLOBAL-LABEL: fmul_fma_reassoc2:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul reassoc float %x, 42.0		%mul = fmul reassoc float %x, 42.0
%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)		%fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
ret float %fma		ret float %fma
}		}

; The FMA is now fully 'fast'. This implies that reassociation is allowed.		; The FMA is now fully 'fast'. This implies that reassociation is allowed.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'

define float @fmul_fma_fast1(float %x) {		define float @fmul_fma_fast1(float %x) {
; FMF-LABEL: fmul_fma_fast1:		; FMF-LABEL: fmul_fma_fast1:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI8_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI8_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_fast1:		; GLOBAL-LABEL: fmul_fma_fast1:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: blr		; GLOBAL-NEXT: blr
%mul = fmul float %x, 42.0		%mul = fmul float %x, 42.0
%fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)		%fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
ret float %fma		ret float %fma
}		}

; This shouldn't change anything - the intermediate fmul result is now also flagged.		; This shouldn't change anything - the intermediate fmul result is now also flagged.

; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'		; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'		; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'

; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'		; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}		; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'		; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'

define float @fmul_fma_fast2(float %x) {		define float @fmul_fma_fast2(float %x) {
; FMF-LABEL: fmul_fma_fast2:		; FMF-LABEL: fmul_fma_fast2:
; FMF: # %bb.0:		; FMF: # %bb.0:
; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha		; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha
; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l		; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l
; FMF-NEXT: lfsx 0, 0, 3		; FMF-NEXT: lfsx 0, 0, 3
; FMF-NEXT: addis 3, 2, .LCPI9_1@toc@ha		; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: addi 3, 3, .LCPI9_1@toc@l
; FMF-NEXT: lfsx 2, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmaddasp 0, 1, 2
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
;		;
; GLOBAL-LABEL: fmul_fma_fast2:		; GLOBAL-LABEL: fmul_fma_fast2:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha		; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha
; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l		; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l
; GLOBAL-NEXT: lfsx 0, 0, 3		; GLOBAL-NEXT: lfsx 0, 0, 3
; GLOBAL-NEXT: xsmulsp 1, 1, 0		; GLOBAL-NEXT: xsmulsp 1, 1, 0
Show All 28 Lines
; FMF-NEXT: xsmulsp 4, 3, 3		; FMF-NEXT: xsmulsp 4, 3, 3
; FMF-NEXT: xssubsp 2, 2, 1		; FMF-NEXT: xssubsp 2, 2, 1
; FMF-NEXT: xsmulsp 2, 2, 4		; FMF-NEXT: xsmulsp 2, 2, 4
; FMF-NEXT: xssubsp 0, 0, 2		; FMF-NEXT: xssubsp 0, 0, 2
; FMF-NEXT: xsmulsp 0, 3, 0		; FMF-NEXT: xsmulsp 0, 3, 0
; FMF-NEXT: xsmulsp 0, 0, 1		; FMF-NEXT: xsmulsp 0, 0, 1
; FMF-NEXT: .LBB10_2:		; FMF-NEXT: .LBB10_2:
; FMF-NEXT: fmr 1, 0		; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr		; FMF-NEXT: blr
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions No worries here, this is cross patching cruft, next update will have this sync up. mcberg2017: No worries here, this is cross patching cruft, next update will have this sync up.
;		;
; GLOBAL-LABEL: sqrt_afn:		; GLOBAL-LABEL: sqrt_afn:
; GLOBAL: # %bb.0:		; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0		; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: fcmpu 0, 1, 0		; GLOBAL-NEXT: fcmpu 0, 1, 0
; GLOBAL-NEXT: beq 0, .LBB10_2		; GLOBAL-NEXT: beq 0, .LBB10_2
; GLOBAL-NEXT: # %bb.1:		; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 2, 1		; GLOBAL-NEXT: xsrsqrtesp 2, 1
▲ Show 20 Lines • Show All 178 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support for fma
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 150875

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/AArch64/neon-fma-FMF.ll

test/CodeGen/PowerPC/fma-aggr-FMF.ll

test/CodeGen/PowerPC/fmf-propagation.ll

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support for fmaClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 150875

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/AArch64/neon-fma-FMF.ll

test/CodeGen/PowerPC/fma-aggr-FMF.ll

test/CodeGen/PowerPC/fmf-propagation.ll

Utilize new SDNode flag functionality to expand current support for fma
ClosedPublic