Diff 21160

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,935 Lines • ▼ Show 20 Lines	ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
}		}

return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);		return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}		}

// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad		// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
static SDValue performFaddFmulCombines(unsigned FusedOpcode,		static SDValue performFaddFmulCombines(unsigned FusedOpcode,
bool Aggressive,		bool Aggressive,
		bool ArrangeFPExt,
		hfinkelUnsubmitted Not Done Reply Inline Actions Also, I don't find 'ArrangeFPExt' an informative name. Why not just name it LookThroughFPExt? hfinkel: Also, I don't find 'ArrangeFPExt' an informative name. Why not just name it LookThroughFPExt?
SDNode *N,		SDNode *N,
const TargetLowering &TLI,		const TargetLowering &TLI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

		SDLoc SL(N);

// fold (fadd (fmul x, y), z) -> (fma x, y, z)		// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&		if (N0.getOpcode() == ISD::FMUL &&
(Aggressive \|\| N0->hasOneUse())) {		(Aggressive \|\| N0->hasOneUse())) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(FusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);		N0.getOperand(0), N0.getOperand(1), N1);
}		}

// fold (fadd x, (fmul y, z)) -> (fma y, z, x)		// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.		// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL &&		if (N1.getOpcode() == ISD::FMUL &&
(Aggressive \|\| N1->hasOneUse())) {		(Aggressive \|\| N1->hasOneUse())) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(FusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);		N1.getOperand(0), N1.getOperand(1), N0);
}		}

		// When possible, arrange FP_EXTEND nodes to do more combining.
		if (ArrangeFPExt) {
		// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)), N1);
		}

		// fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions // fold (fadd x, (fpext (fmul y, z)), z) -> isn't there an extra , z) mehdi_amini: // fold (fadd x, (fpext (fmul y, z)), z) -> isn't there an extra , z)
		// Note: Commutes FADD operands.
		if (N1.getOpcode() == ISD::FP_EXTEND) {
		SDValue N10 = N1.getOperand(0);
		if (N10.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(1)), N0);
		}
		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))		// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == ISD::FMA &&		if (N0.getOpcode() == ISD::FMA &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {		N0.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(FusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(FusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
N1));		N1));
}		}

// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))		// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
if (N1->getOpcode() == ISD::FMA &&		if (N1->getOpcode() == ISD::FMA &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {		N1.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(FusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),		N1.getOperand(0), N1.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(FusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),		N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),		N1.getOperand(2).getOperand(1),
N0));		N0));
}		}

		if (ArrangeFPExt) {
		// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
		// -> (fma x, y, (fma (fpext u), (fpext v), z))
		if (N0.getOpcode() == ISD::FMA) {
		SDValue N02 = N0.getOperand(2);
		if (N02.getOpcode() == ISD::FP_EXTEND) {
		SDValue N020 = N02.getOperand(0);
		if (N020.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		N0.getOperand(0),
		N0.getOperand(1),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N020.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N020.getOperand(1)),
		N1));
		}
		}

		// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
		// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions This turns two "low precision" and one "high precision" operations into two "high precision" operation. I'm not saying it is necessarily bad, but I'm not convinced it is always beneficial. The other FMA combines don't have the same behavior. mehdi_amini: This turns two "low precision" and one "high precision" operations into two "high precision"…
		ohsallenAuthorUnsubmitted Not Done Reply Inline Actions In principle you're right, that might not be always beneficial. But in general, it should be, because even when "high precision" operations are twice more expensive than "low precision" one, the transformation does not worsen things. Right now this is only enabled for PPC, for which low and high precision operations have the same cost. Tell me if this is not acceptable. ohsallen: In principle you're right, that might not be always beneficial. But in general, it should be…
		hfinkelUnsubmitted Not Done Reply Inline Actions I agree, I think this is okay as-is. However, please note this explicitly in a comment above the transform. hfinkel: I agree, I think this is okay as-is. However, please note this explicitly in a comment above…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Well you can imagine having more than twice the throughput in f16 than f32 on some targets, and you can also imagine that 2 x f16 operations consume less power than one f32. I'd rather have Owen's opinion on this. mehdi_amini: Well you can imagine having more than twice the throughput in f16 than f32 on some targets, and…
		hfinkelUnsubmitted Not Done Reply Inline Actions Sure, but I also don't want to have an unnecessary proliferation target hooks. It is reasonable to believe that this will be generally beneficial, and if we someday have a target that wishes to enable enableAggressiveFMAFusion but does not want this specific type of transformation, then we can add a new hook at that time. FWIW, this is not just a throughput issue, but also a format-conversion issue (when we have vectors of float vs. vectors of double, the fpext implies register reorganization, and minimizing the number of such reorganizations is likely, I think, to be beneficial). hfinkel: Sure, but I also don't want to have an unnecessary proliferation target hooks. It is reasonable…
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FMA) {
		SDValue N002 = N00.getOperand(2);
		if (N002.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N002.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N002.getOperand(1)),
		N1));
		}
		}

		// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
		// -> (fma y, z, (fma (fpext u), (fpext v), x))
		if (N1.getOpcode() == ISD::FMA) {
		SDValue N12 = N1.getOperand(2);
		if (N12.getOpcode() == ISD::FP_EXTEND) {
		SDValue N120 = N12.getOperand(0);
		if (N120.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		N1.getOperand(0),
		N1.getOperand(1),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N120.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N120.getOperand(1)),
		N0));
		}
		}

		// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
		// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions The only change between this one and the one two level above is that the operand of the outer fadd are reversed. Is there an elegant way of not duplicating the code? A lambda maybe? mehdi_amini: The only change between this one and the one two level above is that the operand of the outer…
		hfinkelUnsubmitted Not Done Reply Inline Actions There is indeed a lot of very similar code here. If we could share this better in between patterns that would be much better. hfinkel: There is indeed a lot of very similar code here. If we could share this better in between…
		if (N1.getOpcode() == ISD::FP_EXTEND) {
		SDValue N10 = N1.getOperand(0);
		if (N10.getOpcode() == ISD::FMA) {
		SDValue N102 = N10.getOperand(2);
		if (N102.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(1)),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N102.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N102.getOperand(1)),
		N0));
		}
		}
		}
}		}

return SDValue();		return SDValue();
}		}

static SDValue performFsubFmulCombines(unsigned FusedOpcode,		static SDValue performFsubFmulCombines(unsigned FusedOpcode,
bool Aggressive,		bool Aggressive,
		bool ArrangeFPExt,
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions The existing function is already not document, but adding a new argument only exhibits how lacking it is. Can you add a comment on top of this function. I'd rather see defined what is the meaning of at least the three first arguments. It should also be specified if this function can be called outside of fast-math and what combination of parameter is allowed in this case. mehdi_amini: The existing function is already not document, but adding a new argument only exhibits how…
SDNode *N,		SDNode *N,
const TargetLowering &TLI,		const TargetLowering &TLI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

SDLoc SL(N);		SDLoc SL(N);
Show All 21 Lines	if (N0.getOpcode() == ISD::FNEG &&
(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {		(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);		SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);		SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(FusedOpcode, SL, VT,		return DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,		DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
DAG.getNode(ISD::FNEG, SL, VT, N1));		DAG.getNode(ISD::FNEG, SL, VT, N1));
}		}

		// When possible, arrange FP_EXTEND nodes to do more combining.
		if (ArrangeFPExt) {
		// fold (fsub (fpext (fmul x, y)), z)
		// -> (fma (fpext x), (fpext y), (fneg z))
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT, N1));
		}

		// fold (fsub x, (fpext (fmul y, z)))
		// -> (fma (fneg (fpext y)), (fpext z), x)
		// Note: Commutes FSUB operands.
		if (N1.getOpcode() == ISD::FP_EXTEND) {
		SDValue N10 = N1.getOperand(0);
		if (N10.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(0))),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(1)),
		N0);
		}

		// fold (fsub (fpext (fneg (fmul, x, y))), z)
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I wonder if this is canonical? Couldn't it be transformed to: fneg (fadd (fpext (fmul, x, y)), z) mehdi_amini: I wonder if this is canonical? Couldn't it be transformed to: ``` fneg (fadd (fpext (fmul, x…
		// -> (fma (fneg (fpext x)), (fpext y), (fneg z))
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I come back with the canonicalization problem. Wouldn't it be more canonical to have: fneg (fadd (fpext (fmul, x, y)), z) In which case this combine would be useless. mehdi_amini: I come back with the canonicalization problem. Wouldn't it be more canonical to have: fneg…
		ohsallenAuthorUnsubmitted Not Done Reply Inline Actions This code is triggered when TLI.isFPExtFree() returns true. The FPEXT nodes are only generated for correctness, and are expected to be removed later by a MC pass. Thus the code generated here is more canonical in this context. ohsallen: This code is triggered when TLI.isFPExtFree() returns true. The FPEXT nodes are only generated…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I'm not sure you understood my comment. I'm talking about the input pattern, not the output pattern. mehdi_amini: I'm not sure you understood my comment. I'm talking about the input pattern, not the output…
		hfinkelUnsubmitted Not Done Reply Inline Actions Sure, but don't miss the point. If the form being matched in non canonical, but still fires, then the correct fix is to add a canonicalization in DAGCombine, and not add logic for it here. Assuming transforming from one to the other is always correct (even in the face of NaNs, etc.), then I agree that the fneg(fadd(...)) forms should be preferred. hfinkel: Sure, but don't miss the point. If the form being matched in non canonical, but still fires…
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FNEG) {
		SDValue N000 = N00.getOperand(0);
		if (N000.getOpcode() == ISD::FMUL) {
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(0))),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT, N1));
		}
		}
		}

		// fold (fsub (fneg (fpext (fmul, x, y))), z)
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Same canonicalization question, seems redundant with the one before. mehdi_amini: Same canonicalization question, seems redundant with the one before.
		// -> (fma (fneg (fpext x)), (fpext y), (fneg z))
		if (N0.getOpcode() == ISD::FNEG) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FP_EXTEND) {
		SDValue N000 = N00.getOperand(0);
		if (N000.getOpcode() == ISD::FMUL) {
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(0))),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT, N1));
		}
		}
		}
		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)		// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))		// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == FusedOpcode &&		if (N0.getOpcode() == FusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {		N0.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(FusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(FusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SDLoc(N), VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1)));		N1)));
}		}

// fold (fsub x, (fma y, z, (fmul u, v)))		// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))		// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (N1.getOpcode() == FusedOpcode &&		if (N1.getOpcode() == FusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {		N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);		SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);		SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1),		N1.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,		DAG.getNode(ISD::FNEG, SL, VT, N20),
N20),
N21, N0));		N21, N0));
}		}

		if (ArrangeFPExt) {
		// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
		// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
		if (N0.getOpcode() == ISD::FMA) {
		SDValue N02 = N0.getOperand(2);
		if (N02.getOpcode() == ISD::FP_EXTEND) {
		SDValue N020 = N02.getOperand(0);
		if (N020.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		N0.getOperand(0), N0.getOperand(1),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N020.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N020.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT,
		N1)));
		}
		}

		// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
		// -> (fma (fpext x), (fpext y),
		// (fma (fpext u), (fpext v), (fneg z)))
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FMA) {
		SDValue N002 = N00.getOperand(2);
		if (N002.getOpcode() == ISD::FMUL)
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N002.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N002.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT,
		N1)));
		}
		}

		// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
		// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
		if (N1.getOpcode() == ISD::FMA &&
		N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
		SDValue N120 = N1.getOperand(2).getOperand(0);
		if (N120.getOpcode() == ISD::FMUL) {
		SDValue N1200 = N120.getOperand(0);
		SDValue N1201 = N120.getOperand(1);
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
		N1.getOperand(1),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL,
		VT, N1200)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N1201),
		N0));
		}
		}

		// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
		// -> (fma (fneg (fpext y)), (fpext z),
		// (fma (fneg (fpext u)), (fpext v), x))
		if (N1.getOpcode() == ISD::FP_EXTEND &&
		N1.getOperand(0).getOpcode() == ISD::FMA) {
		SDValue N100 = N1.getOperand(0).getOperand(0);
		SDValue N101 = N1.getOperand(0).getOperand(1);
		SDValue N102 = N1.getOperand(0).getOperand(2);
		if (N102.getOpcode() == ISD::FMUL) {
		SDValue N1020 = N102.getOperand(0);
		SDValue N1021 = N102.getOperand(1);
		return DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N100)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
		DAG.getNode(ISD::FMA, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL,
		VT, N1020)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N1021),
		N0));
		}
		}
		}
}		}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitFADD(SDNode *N) {		SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
▲ Show 20 Lines • Show All 131 Lines • ▼ Show 20 Lines	if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,		return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(0), DAG.getConstantFP(4.0, VT));		N0.getOperand(0), DAG.getConstantFP(4.0, VT));
}		}
} // enable-unsafe-fp-math		} // enable-unsafe-fp-math

if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {		if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
// Assume if there is an fmad instruction that it should be aggressively		// Assume if there is an fmad instruction that it should be aggressively
// used.		// used.
if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))		if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, false, N, TLI, DAG))
		hfinkelUnsubmitted Not Done Reply Inline Actions Please add a comment here explaining that we can't look through FPExt nodes, and thus that parameter is false, because doing so effectively introduces extra precision that would be invalid for FMAD. hfinkel: Please add a comment here explaining that we can't look through FPExt nodes, and thus that…
return Fused;		return Fused;
}		}

// FADD -> FMA combines:		// FADD -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&		if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {		(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {

if (!TLI.isOperationLegal(ISD::FMAD, VT)) {		if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
// Don't form FMA if we are preferring FMAD.		// Don't form FMA if we are preferring FMAD.
if (SDValue Fused		if (SDValue Fused
= performFaddFmulCombines(ISD::FMA,		= performFaddFmulCombines(ISD::FMA,
TLI.enableAggressiveFMAFusion(VT),		TLI.enableAggressiveFMAFusion(VT),
		TLI.isFPExtFree(VT),
N, TLI, DAG)) {		N, TLI, DAG)) {
return Fused;		return Fused;
}		}
}		}

// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) {

// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(1)), N1);
}

// fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N10.getOperand(1)), N0);
}
}
}		}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitFSUB(SDNode *N) {		SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	if (N1.getOpcode() == ISD::FADD) {
if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))		if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
return GetNegatedExpression(N10, DAG, LegalOperations);		return GetNegatedExpression(N10, DAG, LegalOperations);
}		}
}		}

if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {		if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
// Assume if there is an fmad instruction that it should be aggressively		// Assume if there is an fmad instruction that it should be aggressively
// used.		// used.
if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))		if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, false, N, TLI, DAG))
return Fused;		return Fused;
}		}

// FSUB -> FMA combines:		// FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&		if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {		(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {

if (!TLI.isOperationLegal(ISD::FMAD, VT)) {		if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
// Don't form FMA if we are preferring FMAD.		// Don't form FMA if we are preferring FMAD.

if (SDValue Fused		if (SDValue Fused
= performFsubFmulCombines(ISD::FMA,		= performFsubFmulCombines(ISD::FMA,
TLI.enableAggressiveFMAFusion(VT),		TLI.enableAggressiveFMAFusion(VT),
		TLI.isFPExtFree(VT),
N, TLI, DAG)) {		N, TLI, DAG)) {
return Fused;		return Fused;
}		}
}		}

// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(1)),
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
}

// fold (fsub x, (fpext (fmul y, z)))
// -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
VT, N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N10.getOperand(1)),
N0);
}

// fold (fsub (fpext (fneg (fmul, x, y))), z)
// -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
if (N000.getOpcode() == ISD::FMUL) {
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
VT, N000.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N000.getOperand(1)),
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
}
}

// fold (fsub (fneg (fpext (fmul, x, y))), z)
// -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FNEG) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
if (N000.getOpcode() == ISD::FMUL) {
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
VT, N000.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N000.getOperand(1)),
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
}
}
}
}		}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitFMUL(SDNode *N) {		SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
▲ Show 20 Lines • Show All 5,738 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/fma-assoc.ll

	Show First 20 Lines • Show All 71 Lines • ▼ Show 20 Lines

	; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:			; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:
	; CHECK-VSX: xsnmsubmdp			; CHECK-VSX: xsnmsubmdp
	; CHECK-VSX-NEXT: xsnmsubadp			; CHECK-VSX-NEXT: xsnmsubadp
	; CHECK-VSX-NEXT: fmr			; CHECK-VSX-NEXT: fmr
	; CHECK-VSX-NEXT: blr			; CHECK-VSX-NEXT: blr
	}			}

				define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fadd double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT1:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT1:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fadd double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT2:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT2:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fadd double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT3:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT3:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fadd double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT4:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT4:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fsub double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
				; CHECK: fmsub
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
				; CHECK-VSX: xsmsubmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fsub double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
				; CHECK: fmsub
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
				; CHECK-VSX: xsmsubmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fsub double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
				; CHECK: fnmsub
				; CHECK-NEXT: fnmsub
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
				; CHECK-VSX: xsnmsubmdp
				; CHECK-VSX-NEXT: xsnmsubadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fsub double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
				; CHECK: fnmsub
				; CHECK-NEXT: fnmsub
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
				; CHECK-VSX: xsnmsubmdp
				; CHECK-VSX-NEXT: xsnmsubadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}
				No newline at end of file

This is an archive of the discontinued LLVM Phabricator instance.

Refactor and enhance FMA combine
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 21160

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/PowerPC/fma-assoc.ll

This is an archive of the discontinued LLVM Phabricator instance.

Refactor and enhance FMA combineClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 21160

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/PowerPC/fma-assoc.ll

Refactor and enhance FMA combine
ClosedPublic