Diff 369022

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 13,045 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))		if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();		return SDValue();

// Always prefer FMAD to FMA for precision.		// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;		unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);		bool Aggressive = TLI.enableAggressiveFMAFusion(VT);

		auto isFusedOp = [&](SDValue N) {
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: invalid case style for variable 'isFusedOp' [readability-identifier-naming] not useful Lint: Pre-merge checks: clang-tidy: warning: invalid case style for variable 'isFusedOp' [readability-identifier…
		unsigned Opcode = N.getOpcode();
		return Opcode == ISD::FMA \|\| Opcode == ISD::FMAD;
		foadUnsubmitted Done Reply Inline Actions Why do you need the HasFMA and HasFMAD checks here? I can see that you don't want to create new FMA instructions if FMA is not legal, but I can't see why you wouldn't want to combine existing FMAs. In other words can this just be `return Opcode == ISD::FMA \|\| Opcode == ISD::FMAD;`? foad: Why do you need the HasFMA and HasFMAD checks here? I can see that you don't want to create…
		critsonAuthorUnsubmitted Done Reply Inline Actions Sure, this yields some test changes (instruction count reductions). critson: Sure, this yields some test changes (instruction count reductions).
		};

// Is the node an FMUL and contractable either due to global flags or		// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.		// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {		auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
if (N.getOpcode() != ISD::FMUL)		if (N.getOpcode() != ISD::FMUL)
return false;		return false;
return AllowFusionGlobally \|\| N->getFlags().hasAllowContract();		return AllowFusionGlobally \|\| N->getFlags().hasAllowContract();
};		};
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),		// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
Show All 15 Lines	if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),		return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
N1.getOperand(1), N0);		N1.getOperand(1), N0);
}		}

// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)		// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)		// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.		// This requires reassociation because it changes the order of operations.
SDValue FMA, E;		SDValue FMA, E;
if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&		if (CanReassociate && isFusedOp(N0) &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&		N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {		N0.getOperand(2).hasOneUse()) {
FMA = N0;		FMA = N0;
E = N1;		E = N1;
} else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&		} else if (CanReassociate && isFusedOp(N1) &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&		N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {		N1.getOperand(2).hasOneUse()) {
FMA = N1;		FMA = N1;
E = N0;		E = N0;
}		}
if (FMA && E) {		if (FMA && E) {
SDValue A = FMA.getOperand(0);		SDValue A = FMA.getOperand(0);
SDValue B = FMA.getOperand(1);		SDValue B = FMA.getOperand(1);
SDValue C = FMA.getOperand(2).getOperand(0);		SDValue C = FMA.getOperand(2).getOperand(0);
SDValue D = FMA.getOperand(2).getOperand(1);		SDValue D = FMA.getOperand(2).getOperand(1);
SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);		SDValue CDE = DAG.getNode(FMA.getOpcode(), SL, VT, C, D, E);
return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);		return DAG.getNode(FMA.getOpcode(), SL, VT, A, B, CDE);
		foadUnsubmitted Done Reply Inline Actions Could use `FMA.getOpcode()` instead of `PreferredFusedOpcode` here, to try to preserve the original opcode of the `AB+something` part of the expression. But that's getting pretty subtle and I'm not sure if it will make any practical difference. The same goes for any other parts of combines which preserve an fma(d) instead of creating a new one from scratch. foad:* Could use `FMA.getOpcode()` instead of `PreferredFusedOpcode` here, to try to preserve the…
		foadUnsubmitted Done Reply Inline Actions On second thoughts, perhaps there's no point trying to preserve the original opcode for the `AB+something` part, because the `something` is different, so it's impossible to retain the (fused vs unfused) rounding behaviour of the original opcode. So perhaps your original approach of using `DAG.getNode(PreferredFusedOpcode, ...)` everywhere is the best we can do. Sorry for the noise. foad:* On second thoughts, perhaps there's no point trying to preserve the original opcode for the…
		foadUnsubmitted Done Reply Inline Actions What I meant was, use `FMA.getOpcode()` for `AB+something` because it's preserving an existing fma(d) from the input, but still use `PreferredFusedOpcode` for `CD+something` because it is a new fma(d) that we are creating by fusing an fmul and and fadd from the input. I realise this is pretty subtle and I'm not sure it will make any practical difference. foad: What I meant was, use `FMA.getOpcode()` for `A*B+something` because it's preserving an existing…
		critsonAuthorUnsubmitted Done Reply Inline Actions No worries, I have spent a while thinking about it too. I tested both versions so far on image output tests and could not detect and meaningful difference. I also implemented the mixed preservation intended with your comment. However, let's just go with the original. critson: No worries, I have spent a while thinking about it too. I tested both versions so far on image…
}		}

// Look through FP_EXTEND nodes to do more combining.		// Look through FP_EXTEND nodes to do more combining.

// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)		// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {		if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);		SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&		if (isContractableFMUL(N00) &&
Show All 19 Lines	if (isContractableFMUL(N10) &&
N0);		N0);
}		}
}		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)		// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))		// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,		auto FoldFAddFMAFPExtFMul = [&](unsigned FusedOpcode, SDValue X, SDValue Y,
SDValue Z) {		SDValue U, SDValue V, SDValue Z) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,		return DAG.getNode(FusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),		DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),		DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));		Z));
};		};
if (N0.getOpcode() == PreferredFusedOpcode) {		if (isFusedOp(N0)) {
		unsigned FusedOpcode = N0.getOpcode();
SDValue N02 = N0.getOperand(2);		SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {		if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);		SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&		if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N020.getValueType())) {
N020.getValueType())) {		return FoldFAddFMAFPExtFMul(FusedOpcode, N0.getOperand(0),
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),		N0.getOperand(1), N020.getOperand(0),
N020.getOperand(0), N020.getOperand(1),		N020.getOperand(1), N1);
N1);
}		}
}		}
}		}

// fold (fadd (fpext (fma x, y, (fmul u, v))), z)		// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))		// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
// FIXME: This turns two single-precision and one double-precision		// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be		// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.		// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,		auto FoldFAddFPExtFMAFMul = [&](unsigned FusedOpcode, SDValue X, SDValue Y,
SDValue Z) {		SDValue U, SDValue V, SDValue Z) {
return DAG.getNode(		return DAG.getNode(
PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),		FusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),		DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),		DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));		DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};		};
if (N0.getOpcode() == ISD::FP_EXTEND) {		if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);		SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {		if (isFusedOp(N00)) {
		unsigned FusedOpcode = N00.getOpcode();
SDValue N002 = N00.getOperand(2);		SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&		if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N00.getValueType())) {
N00.getValueType())) {		return FoldFAddFPExtFMAFMul(FusedOpcode, N00.getOperand(0),
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),		N00.getOperand(1), N002.getOperand(0),
N002.getOperand(0), N002.getOperand(1),		N002.getOperand(1), N1);
N1);
}		}
}		}
}		}

// fold (fadd x, (fma y, z, (fpext (fmul u, v)))		// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))		// -> (fma y, z, (fma (fpext u), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode) {		if (isFusedOp(N1)) {
		unsigned FusedOpcode = N1.getOpcode();
SDValue N12 = N1.getOperand(2);		SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {		if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);		SDValue N120 = N12.getOperand(0);
if (isContractableFMUL(N120) &&		if (isContractableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N120.getValueType())) {
N120.getValueType())) {		return FoldFAddFMAFPExtFMul(FusedOpcode, N1.getOperand(0),
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),		N1.getOperand(1), N120.getOperand(0),
N120.getOperand(0), N120.getOperand(1),		N120.getOperand(1), N0);
N0);
}		}
}		}
}		}

// fold (fadd x, (fpext (fma y, z, (fmul u, v)))		// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))		// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
// FIXME: This turns two single-precision and one double-precision		// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be		// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.		// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {		if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);		SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == PreferredFusedOpcode) {		if (isFusedOp(N10)) {
		unsigned FusedOpcode = N10.getOpcode();
SDValue N102 = N10.getOperand(2);		SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&		if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N10.getValueType())) {
N10.getValueType())) {		return FoldFAddFPExtFMAFMul(FusedOpcode, N10.getOperand(0),
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),		N10.getOperand(1), N102.getOperand(0),
N102.getOperand(0), N102.getOperand(1),		N102.getOperand(1), N0);
N0);
}		}
}		}
}		}
}		}

return SDValue();		return SDValue();
}		}

▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines	auto isReassociable = [Options](SDNode *N) {
return Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();		return Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();
};		};

auto isContractableAndReassociableFMUL = [isContractableFMUL,		auto isContractableAndReassociableFMUL = [isContractableFMUL,
isReassociable](SDValue N) {		isReassociable](SDValue N) {
return isContractableFMUL(N) && isReassociable(N.getNode());		return isContractableFMUL(N) && isReassociable(N.getNode());
};		};

		auto isFusedOp = [&](SDValue N) {
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: invalid case style for variable 'isFusedOp' [readability-identifier-naming] not useful Lint: Pre-merge checks: clang-tidy: warning: invalid case style for variable 'isFusedOp' [readability-identifier…
		unsigned Opcode = N.getOpcode();
		return Opcode == ISD::FMA \|\| Opcode == ISD::FMAD;
		};

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {		if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath \|\| N->getFlags().hasAllowContract();		bool CanFuse = Options.UnsafeFPMath \|\| N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)		// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))		// -> (fma x, y (fma u, v, (fneg z)))
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&		if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&		isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {		N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),		unsigned FusedOpcode = N0.getOpcode();
N0.getOperand(1),		return DAG.getNode(
DAG.getNode(PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
N0.getOperand(2).getOperand(0),		DAG.getNode(FusedOpcode, SL, VT, N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1)));		DAG.getNode(ISD::FNEG, SL, VT, N1)));
}		}

// fold (fsub x, (fma y, z, (fmul u, v)))		// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))		// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&		if (CanFuse && isFusedOp(N1) &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&		isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {		N1->hasOneUse() && NoSignedZero) {
		unsigned FusedOpcode = N1.getOpcode();
SDValue N20 = N1.getOperand(2).getOperand(0);		SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);		SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(		return DAG.getNode(
PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),		N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(FusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20),
DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));		N21, N0));
}		}

// fold (fsub (fma x, y, (fpext (fmul u, v))), z)		// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))		// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&		if (isFusedOp(N0) && N0->hasOneUse()) {
N0->hasOneUse()) {		unsigned FusedOpcode = N0.getOpcode();
SDValue N02 = N0.getOperand(2);		SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {		if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);		SDValue N020 = N02.getOperand(0);
if (isContractableAndReassociableFMUL(N020) &&		if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N020.getValueType())) {
N020.getValueType())) {
return DAG.getNode(		return DAG.getNode(
PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),		FusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
DAG.getNode(		DAG.getNode(
PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT, N1)));		DAG.getNode(ISD::FNEG, SL, VT, N1)));
}		}
}		}
}		}

// fold (fsub (fpext (fma x, y, (fmul u, v))), z)		// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
// -> (fma (fpext x), (fpext y),		// -> (fma (fpext x), (fpext y),
// (fma (fpext u), (fpext v), (fneg z)))		// (fma (fpext u), (fpext v), (fneg z)))
// FIXME: This turns two single-precision and one double-precision		// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be		// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.		// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {		if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);		SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {		if (isFusedOp(N00)) {
		unsigned FusedOpcode = N00.getOpcode();
SDValue N002 = N00.getOperand(2);		SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&		if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N00.getValueType())) {
N00.getValueType())) {
return DAG.getNode(		return DAG.getNode(
PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
DAG.getNode(		DAG.getNode(
PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT, N1)));		DAG.getNode(ISD::FNEG, SL, VT, N1)));
}		}
}		}
}		}

// fold (fsub x, (fma y, z, (fpext (fmul u, v))))		// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))		// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode &&		if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {		N1->hasOneUse()) {
		unsigned FusedOpcode = N1.getOpcode();
SDValue N120 = N1.getOperand(2).getOperand(0);		SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&		if (isContractableAndReassociableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, N120.getValueType())) {
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);		SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);		SDValue N1201 = N120.getOperand(1);
return DAG.getNode(		return DAG.getNode(
PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),		DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));		DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}		}
}		}

// fold (fsub x, (fpext (fma y, z, (fmul u, v))))		// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
// -> (fma (fneg (fpext y)), (fpext z),		// -> (fma (fneg (fpext y)), (fpext z),
// (fma (fneg (fpext u)), (fpext v), x))		// (fma (fneg (fpext u)), (fpext v), x))
// FIXME: This turns two single-precision and one double-precision		// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be		// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.		// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND &&		if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
SDValue CvtSrc = N1.getOperand(0);		SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);		SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);		SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);		SDValue N102 = CvtSrc.getOperand(2);
		unsigned FusedOpcode = CvtSrc.getOpcode();
if (isContractableAndReassociableFMUL(N102) &&		if (isContractableAndReassociableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,		TLI.isFPExtFoldable(DAG, FusedOpcode, VT, CvtSrc.getValueType())) {
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);		SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);		SDValue N1021 = N102.getOperand(1);
return DAG.getNode(		return DAG.getNode(
PreferredFusedOpcode, SL, VT,		FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
DAG.getNode(PreferredFusedOpcode, SL, VT,		DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),		DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));		DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}		}
}		}
}		}

return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 10,040 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s			; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN %s

	define amdgpu_ps float @_amdgpu_ps_main() #0 {			define amdgpu_ps float @_amdgpu_ps_main() #0 {
	; GCN-LABEL: _amdgpu_ps_main:			; GCN-LABEL: _amdgpu_ps_main:
				foadUnsubmitted Done Reply Inline Actions Could use `GCN-COUNT-13: v_fma`. But maybe it would be better to generate the checks for this file? It's not immediately obvious (to me) why 13 is the optimal number of fmas. There are more than 13 fmuls and fadds and fsubs in the IR. foad: Could use `GCN-COUNT-13: v_fma`. But maybe it would be better to generate the checks for this…
				critsonAuthorUnsubmitted Done Reply Inline Actions I'll generate the checks for this file and precommit. This example was specifically reduced to take the code paths changed by this patch. With the patch the number of instructions falls, number of FMAs goes from 14 to 13. critson: I'll generate the checks for this file and precommit. This example was specifically reduced to…
	; GCN: ; %bb.0: ; %.entry			; GCN: ; %bb.0: ; %.entry
	; GCN-NEXT: s_mov_b32 s0, 0			; GCN-NEXT: s_mov_b32 s0, 0
	; GCN-NEXT: s_mov_b32 s1, s0			; GCN-NEXT: s_mov_b32 s1, s0
	; GCN-NEXT: s_mov_b32 s2, s0			; GCN-NEXT: s_mov_b32 s2, s0
	; GCN-NEXT: s_mov_b32 s3, s0			; GCN-NEXT: s_mov_b32 s3, s0
	; GCN-NEXT: s_mov_b32 s4, s0			; GCN-NEXT: s_mov_b32 s4, s0
	; GCN-NEXT: s_mov_b32 s5, s0			; GCN-NEXT: s_mov_b32 s5, s0
	; GCN-NEXT: s_mov_b32 s6, s0			; GCN-NEXT: s_mov_b32 s6, s0
	; GCN-NEXT: s_mov_b32 s7, s0			; GCN-NEXT: s_mov_b32 s7, s0
	; GCN-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D			; GCN-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
	; GCN-NEXT: s_waitcnt vmcnt(0)			; GCN-NEXT: s_waitcnt vmcnt(0)
	; GCN-NEXT: s_clause 0x2			; GCN-NEXT: s_clause 0x2
	; GCN-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D			; GCN-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
	; GCN-NEXT: image_sample v3, v[0:1], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D			; GCN-NEXT: image_sample v3, v[0:1], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
	; GCN-NEXT: image_load v4, v[0:1], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm			; GCN-NEXT: image_load v4, v[0:1], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm
	; GCN-NEXT: s_clause 0x3			; GCN-NEXT: s_clause 0x3
	; GCN-NEXT: s_buffer_load_dword s24, s[0:3], 0x5c			; GCN-NEXT: s_buffer_load_dword s24, s[0:3], 0x5c
	; GCN-NEXT: s_buffer_load_dword s28, s[0:3], 0x7c			; GCN-NEXT: s_buffer_load_dword s28, s[0:3], 0x7c
	; GCN-NEXT: s_buffer_load_dword s29, s[0:3], 0xc0			; GCN-NEXT: s_buffer_load_dword s29, s[0:3], 0xc0
	; GCN-NEXT: s_waitcnt_depctr 0xffe3			; GCN-NEXT: s_waitcnt_depctr 0xffe3
	; GCN-NEXT: s_nop 0			; GCN-NEXT: s_nop 0
	; GCN-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40			; GCN-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
	; GCN-NEXT: s_waitcnt lgkmcnt(0)			; GCN-NEXT: s_waitcnt lgkmcnt(0)
	; GCN-NEXT: v_sub_f32_e64 v5, s24, s28			; GCN-NEXT: v_sub_f32_e64 v5, s24, s28
	; GCN-NEXT: v_add_f32_e64 v7, s29, -1.0
	; GCN-NEXT: s_clause 0x1			; GCN-NEXT: s_clause 0x1
	; GCN-NEXT: s_buffer_load_dwordx4 s[4:7], s[0:3], 0x50			; GCN-NEXT: s_buffer_load_dwordx4 s[4:7], s[0:3], 0x50
	; GCN-NEXT: s_nop 0			; GCN-NEXT: s_nop 0
	; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x2c			; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x2c
	; GCN-NEXT: v_fma_f32 v1, v1, v5, s28			; GCN-NEXT: v_fma_f32 v1, v1, v5, s28
				; GCN-NEXT: v_add_f32_e64 v5, s29, -1.0
	; GCN-NEXT: s_waitcnt lgkmcnt(0)			; GCN-NEXT: s_waitcnt lgkmcnt(0)
	; GCN-NEXT: s_clause 0x3			; GCN-NEXT: s_clause 0x4
	; GCN-NEXT: s_buffer_load_dwordx4 s[8:11], s[0:3], 0x60			; GCN-NEXT: s_buffer_load_dwordx4 s[8:11], s[0:3], 0x60
	; GCN-NEXT: s_buffer_load_dwordx4 s[12:15], s[0:3], 0x20			; GCN-NEXT: s_buffer_load_dwordx4 s[12:15], s[0:3], 0x20
	; GCN-NEXT: s_buffer_load_dwordx4 s[16:19], s[0:3], 0x0			; GCN-NEXT: s_buffer_load_dwordx4 s[16:19], s[0:3], 0x0
	; GCN-NEXT: s_buffer_load_dwordx4 s[20:23], s[0:3], 0x70			; GCN-NEXT: s_buffer_load_dwordx4 s[20:23], s[0:3], 0x70
	; GCN-NEXT: v_max_f32_e64 v6, s0, s0 clamp
	; GCN-NEXT: s_buffer_load_dwordx4 s[24:27], s[0:3], 0x10			; GCN-NEXT: s_buffer_load_dwordx4 s[24:27], s[0:3], 0x10
	; GCN-NEXT: v_sub_f32_e32 v9, s0, v1			; GCN-NEXT: v_max_f32_e64 v6, s0, s0 clamp
				; GCN-NEXT: v_sub_f32_e32 v8, s0, v1
	; GCN-NEXT: s_mov_b32 s0, 0x3c23d70a			; GCN-NEXT: s_mov_b32 s0, 0x3c23d70a
	; GCN-NEXT: v_mul_f32_e32 v5, s2, v6			; GCN-NEXT: v_fma_f32 v7, -s2, v6, s6
	; GCN-NEXT: v_fma_f32 v8, -s2, v6, s6			; GCN-NEXT: v_fmac_f32_e32 v1, v6, v8
	; GCN-NEXT: v_fmac_f32_e32 v1, v6, v9			; GCN-NEXT: v_fma_f32 v5, v6, v5, 1.0
	; GCN-NEXT: v_fma_f32 v7, v6, v7, 1.0
	; GCN-NEXT: v_fmac_f32_e32 v5, v8, v6
	; GCN-NEXT: s_waitcnt lgkmcnt(0)			; GCN-NEXT: s_waitcnt lgkmcnt(0)
	; GCN-NEXT: v_mul_f32_e32 v8, s10, v0			; GCN-NEXT: v_mul_f32_e32 v9, s10, v0
	; GCN-NEXT: v_fma_f32 v0, -v0, s10, s14			; GCN-NEXT: v_fma_f32 v0, -v0, s10, s14
	; GCN-NEXT: v_fmac_f32_e32 v8, v0, v6			; GCN-NEXT: v_fmac_f32_e32 v9, v0, v6
	; GCN-NEXT: v_sub_f32_e32 v0, v1, v7			; GCN-NEXT: v_sub_f32_e32 v0, v1, v5
	; GCN-NEXT: v_fmac_f32_e32 v7, v0, v6			; GCN-NEXT: v_fmac_f32_e32 v5, v0, v6
	; GCN-NEXT: s_waitcnt vmcnt(2)			; GCN-NEXT: s_waitcnt vmcnt(2)
	; GCN-NEXT: v_mul_f32_e32 v9, s18, v2			; GCN-NEXT: v_fma_f32 v10, s2, v6, v2
				; GCN-NEXT: v_mul_f32_e32 v8, s18, v2
	; GCN-NEXT: s_waitcnt vmcnt(1)			; GCN-NEXT: s_waitcnt vmcnt(1)
	; GCN-NEXT: v_mul_f32_e32 v3, s22, v3			; GCN-NEXT: v_mul_f32_e32 v3, s22, v3
	; GCN-NEXT: v_add_f32_e32 v5, v2, v5			; GCN-NEXT: v_fmac_f32_e32 v10, v7, v6
	; GCN-NEXT: v_mul_f32_e32 v1, v9, v6			; GCN-NEXT: v_mul_f32_e32 v1, v8, v6
	; GCN-NEXT: v_mul_f32_e32 v9, v6, v3			; GCN-NEXT: v_mul_f32_e32 v7, v6, v3
	; GCN-NEXT: v_fmac_f32_e64 v8, -v6, v3			; GCN-NEXT: v_fmac_f32_e64 v9, -v6, v3
	; GCN-NEXT: s_waitcnt vmcnt(0)			; GCN-NEXT: s_waitcnt vmcnt(0)
	; GCN-NEXT: v_add_f32_e32 v4, v4, v5			; GCN-NEXT: v_add_f32_e32 v3, v4, v10
	; GCN-NEXT: v_fma_f32 v0, v2, s26, -v1			; GCN-NEXT: v_fma_f32 v0, v2, s26, -v1
	; GCN-NEXT: v_fmac_f32_e32 v9, v8, v6			; GCN-NEXT: v_fma_f32 v4, v5, s0, 0x3ca3d70a
	; GCN-NEXT: v_mul_f32_e32 v3, v4, v6			; GCN-NEXT: v_fmac_f32_e32 v7, v9, v6
	; GCN-NEXT: v_fma_f32 v4, v7, s0, 0x3ca3d70a			; GCN-NEXT: v_mul_f32_e32 v3, v3, v6
	; GCN-NEXT: v_fmac_f32_e32 v1, v0, v6			; GCN-NEXT: v_fmac_f32_e32 v1, v0, v6
	; GCN-NEXT: v_mul_f32_e32 v0, v2, v6			; GCN-NEXT: v_mul_f32_e32 v0, v2, v6
	; GCN-NEXT: v_mul_f32_e32 v2, v9, v4			; GCN-NEXT: v_mul_f32_e32 v2, v7, v4
	; GCN-NEXT: v_mul_f32_e32 v1, v3, v1			; GCN-NEXT: v_mul_f32_e32 v1, v3, v1
	; GCN-NEXT: v_fmac_f32_e32 v1, v2, v0			; GCN-NEXT: v_fmac_f32_e32 v1, v2, v0
	; GCN-NEXT: v_max_f32_e32 v0, 0, v1			; GCN-NEXT: v_max_f32_e32 v0, 0, v1
	; GCN-NEXT: ; return to shader part epilog			; GCN-NEXT: ; return to shader part epilog
	.entry:			.entry:
	%0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)			%0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
	%.i2243 = extractelement <3 x float> %0, i32 2			%.i2243 = extractelement <3 x float> %0, i32 2
	%1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0)			%1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0)
	▲ Show 20 Lines • Show All 125 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/mad-combine.ll

	Show First 20 Lines • Show All 394 Lines • ▼ Show 20 Lines

	; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:			; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
	; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}			; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
	; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 glc{{$}}			; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 glc{{$}}
	; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 glc{{$}}			; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 glc{{$}}
	; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12 glc{{$}}			; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12 glc{{$}}
	; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}			; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}

	; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]			; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
	; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]			; SI-STD-SAFE: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
	; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]			; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]

				; SI-STD-UNSAFE: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
				; SI-STD-UNSAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]

	; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]			; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
	; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]			; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
	; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]			; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]

	; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}			; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
	define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {			define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
	%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0			%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
	▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombine] Allow FMA combine with both FMA and FMAD
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 369022

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll

llvm/test/CodeGen/AMDGPU/mad-combine.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombine] Allow FMA combine with both FMA and FMADClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 369022

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll

llvm/test/CodeGen/AMDGPU/mad-combine.ll

[DAGCombine] Allow FMA combine with both FMA and FMAD
ClosedPublic