Diff 23179

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 303 Lines • ▼ Show 20 Lines	private:
SDValue visitCONCAT_VECTORS(SDNode *N);		SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);		SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);		SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);		SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);		SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);		SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);		SDValue visitMSTORE(SDNode *N);

		SDValue visitFADDForFMACombine(SDNode *N);
		SDValue visitFSUBForFMACombine(SDNode *N);

SDValue XformToShuffleWithZero(SDNode *N);		SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);		SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);

SDValue visitShiftByConstant(SDNode N, ConstantSDNode Amt);		SDValue visitShiftByConstant(SDNode N, ConstantSDNode Amt);

bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);		bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);		SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);		SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
▲ Show 20 Lines • Show All 6,730 Lines • ▼ Show 20 Lines	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
// For big endian targets, swap the order of the pieces of each element.		// For big endian targets, swap the order of the pieces of each element.
if (TLI.isBigEndian())		if (TLI.isBigEndian())
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());		std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}		}

return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);		return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}		}

// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad		/// Try to perform FMA combining on a given FADD node.
		hfinkelUnsubmitted Not Done Reply Inline Actions Also, I don't find 'ArrangeFPExt' an informative name. Why not just name it LookThroughFPExt? hfinkel: Also, I don't find 'ArrangeFPExt' an informative name. Why not just name it LookThroughFPExt?
static SDValue performFaddFmulCombines(unsigned FusedOpcode,		SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive,
SDNode *N,
const TargetLowering &TLI,
SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
		SDLoc SL(N);

		const TargetOptions &Options = DAG.getTarget().Options;
		bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
		Options.UnsafeFPMath);

		// Floating-point multiply-add with intermediate rounding.
		bool HasFMAD = (LegalOperations &&
		TLI.isOperationLegal(ISD::FMAD, VT));

		// Floating-point multiply-add without intermediate rounding.
		bool HasFMA = ((!LegalOperations \|\|
		TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
		UnsafeFPMath);

		// No valid opcode, do not combine.
		if (!HasFMAD && !HasFMA)
		return SDValue();

		// Always prefer FMAD to FMA for precision.
		unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
		bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
		bool LookThroughFPExt = TLI.isFPExtFree(VT);

// fold (fadd (fmul x, y), z) -> (fma x, y, z)		// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&		if (N0.getOpcode() == ISD::FMUL &&
(Aggressive \|\| N0->hasOneUse())) {		(Aggressive \|\| N0->hasOneUse())) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);		N0.getOperand(0), N0.getOperand(1), N1);
}		}

// fold (fadd x, (fmul y, z)) -> (fma y, z, x)		// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.		// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL &&		if (N1.getOpcode() == ISD::FMUL &&
(Aggressive \|\| N1->hasOneUse())) {		(Aggressive \|\| N1->hasOneUse())) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);		N1.getOperand(0), N1.getOperand(1), N0);
}		}

		// Look through FP_EXTEND nodes to do more combining.
		if (UnsafeFPMath && LookThroughFPExt) {
		// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FMUL)
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)), N1);
		}

		// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions // fold (fadd x, (fpext (fmul y, z)), z) -> isn't there an extra , z) mehdi_amini: // fold (fadd x, (fpext (fmul y, z)), z) -> isn't there an extra , z)
		// Note: Commutes FADD operands.
		if (N1.getOpcode() == ISD::FP_EXTEND) {
		SDValue N10 = N1.getOperand(0);
		if (N10.getOpcode() == ISD::FMUL)
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(1)), N0);
		}
		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (UnsafeFPMath && Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))		// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == ISD::FMA &&		if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {		N0.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
N1));		N1));
}		}

// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))		// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
if (N1->getOpcode() == ISD::FMA &&		if (N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {		N1.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),		N1.getOperand(0), N1.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),		N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),		N1.getOperand(2).getOperand(1),
N0));		N0));
}		}

		if (LookThroughFPExt) {
		// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
		// -> (fma x, y, (fma (fpext u), (fpext v), z))
		auto FoldFAddFMAFPExtFMul = [&] (
		SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
		return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
		DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
		Z));
		};
		if (N0.getOpcode() == PreferredFusedOpcode) {
		SDValue N02 = N0.getOperand(2);
		if (N02.getOpcode() == ISD::FP_EXTEND) {
		SDValue N020 = N02.getOperand(0);
		if (N020.getOpcode() == ISD::FMUL)
		return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
		N020.getOperand(0), N020.getOperand(1),
		N1);
		}
		}

		mehdi_aminiUnsubmitted Not Done Reply Inline Actions This turns two "low precision" and one "high precision" operations into two "high precision" operation. I'm not saying it is necessarily bad, but I'm not convinced it is always beneficial. The other FMA combines don't have the same behavior. mehdi_amini: This turns two "low precision" and one "high precision" operations into two "high precision"…
		ohsallenAuthorUnsubmitted Not Done Reply Inline Actions In principle you're right, that might not be always beneficial. But in general, it should be, because even when "high precision" operations are twice more expensive than "low precision" one, the transformation does not worsen things. Right now this is only enabled for PPC, for which low and high precision operations have the same cost. Tell me if this is not acceptable. ohsallen: In principle you're right, that might not be always beneficial. But in general, it should be…
		hfinkelUnsubmitted Not Done Reply Inline Actions I agree, I think this is okay as-is. However, please note this explicitly in a comment above the transform. hfinkel: I agree, I think this is okay as-is. However, please note this explicitly in a comment above…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Well you can imagine having more than twice the throughput in f16 than f32 on some targets, and you can also imagine that 2 x f16 operations consume less power than one f32. I'd rather have Owen's opinion on this. mehdi_amini: Well you can imagine having more than twice the throughput in f16 than f32 on some targets, and…
		hfinkelUnsubmitted Not Done Reply Inline Actions Sure, but I also don't want to have an unnecessary proliferation target hooks. It is reasonable to believe that this will be generally beneficial, and if we someday have a target that wishes to enable enableAggressiveFMAFusion but does not want this specific type of transformation, then we can add a new hook at that time. FWIW, this is not just a throughput issue, but also a format-conversion issue (when we have vectors of float vs. vectors of double, the fpext implies register reorganization, and minimizing the number of such reorganizations is likely, I think, to be beneficial). hfinkel: Sure, but I also don't want to have an unnecessary proliferation target hooks. It is reasonable…
		// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
		// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
		// FIXME: This turns two single-precision and one double-precision
		// operation into two double-precision operations, which might not be
		// interesting for all targets, especially GPUs.
		auto FoldFAddFPExtFMAFMul = [&] (
		SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
		DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
		DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
		Z));
		};
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == PreferredFusedOpcode) {
		SDValue N002 = N00.getOperand(2);
		if (N002.getOpcode() == ISD::FMUL)
		return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
		N002.getOperand(0), N002.getOperand(1),
		N1);
		}
		}

		// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
		// -> (fma y, z, (fma (fpext u), (fpext v), x))
		if (N1.getOpcode() == PreferredFusedOpcode) {
		SDValue N12 = N1.getOperand(2);
		if (N12.getOpcode() == ISD::FP_EXTEND) {
		SDValue N120 = N12.getOperand(0);
		if (N120.getOpcode() == ISD::FMUL)
		return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
		N120.getOperand(0), N120.getOperand(1),
		N0);
		}
		}

		// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions The only change between this one and the one two level above is that the operand of the outer fadd are reversed. Is there an elegant way of not duplicating the code? A lambda maybe? mehdi_amini: The only change between this one and the one two level above is that the operand of the outer…
		hfinkelUnsubmitted Not Done Reply Inline Actions There is indeed a lot of very similar code here. If we could share this better in between patterns that would be much better. hfinkel: There is indeed a lot of very similar code here. If we could share this better in between…
		// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
		// FIXME: This turns two single-precision and one double-precision
		// operation into two double-precision operations, which might not be
		// interesting for all targets, especially GPUs.
		if (N1.getOpcode() == ISD::FP_EXTEND) {
		SDValue N10 = N1.getOperand(0);
		if (N10.getOpcode() == PreferredFusedOpcode) {
		SDValue N102 = N10.getOperand(2);
		if (N102.getOpcode() == ISD::FMUL)
		return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
		N102.getOperand(0), N102.getOperand(1),
		N0);
		}
		}
		}
}		}

return SDValue();		return SDValue();
}		}

static SDValue performFsubFmulCombines(unsigned FusedOpcode,		/// Try to perform FMA combining on a given FSUB node.
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions The existing function is already not document, but adding a new argument only exhibits how lacking it is. Can you add a comment on top of this function. I'd rather see defined what is the meaning of at least the three first arguments. It should also be specified if this function can be called outside of fast-math and what combination of parameter is allowed in this case. mehdi_amini: The existing function is already not document, but adding a new argument only exhibits how…
bool Aggressive,		SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDNode *N,
const TargetLowering &TLI,
SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

SDLoc SL(N);		SDLoc SL(N);

		const TargetOptions &Options = DAG.getTarget().Options;
		bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
		Options.UnsafeFPMath);

		// Floating-point multiply-add with intermediate rounding.
		bool HasFMAD = (LegalOperations &&
		TLI.isOperationLegal(ISD::FMAD, VT));

		// Floating-point multiply-add without intermediate rounding.
		bool HasFMA = ((!LegalOperations \|\|
		TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
		TLI.isFMAFasterThanFMulAndFAdd(VT) &&
		UnsafeFPMath);

		// No valid opcode, do not combine.
		if (!HasFMAD && !HasFMA)
		return SDValue();

		// Always prefer FMAD to FMA for precision.
		unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
		bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
		bool LookThroughFPExt = TLI.isFPExtFree(VT);

// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))		// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL &&		if (N0.getOpcode() == ISD::FMUL &&
(Aggressive \|\| N0->hasOneUse())) {		(Aggressive \|\| N0->hasOneUse())) {
return DAG.getNode(FusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));		DAG.getNode(ISD::FNEG, SL, VT, N1));
}		}

// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)		// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.		// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL &&		if (N1.getOpcode() == ISD::FMUL &&
(Aggressive \|\| N1->hasOneUse()))		(Aggressive \|\| N1->hasOneUse()))
return DAG.getNode(FusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1), N0);		N1.getOperand(1), N0);

// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))		// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG &&		if (N0.getOpcode() == ISD::FNEG &&
N0.getOperand(0).getOpcode() == ISD::FMUL &&		N0.getOperand(0).getOpcode() == ISD::FMUL &&
(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {		(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);		SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);		SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(FusedOpcode, SL, VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,		DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
DAG.getNode(ISD::FNEG, SL, VT, N1));		DAG.getNode(ISD::FNEG, SL, VT, N1));
}		}

		// Look through FP_EXTEND nodes to do more combining.
		if (UnsafeFPMath && LookThroughFPExt) {
		// fold (fsub (fpext (fmul x, y)), z)
		// -> (fma (fpext x), (fpext y), (fneg z))
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FMUL)
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT, N1));
		}

		// fold (fsub x, (fpext (fmul y, z)))
		// -> (fma (fneg (fpext y)), (fpext z), x)
		// Note: Commutes FSUB operands.
		if (N1.getOpcode() == ISD::FP_EXTEND) {
		SDValue N10 = N1.getOperand(0);
		if (N10.getOpcode() == ISD::FMUL)
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(0))),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N10.getOperand(1)),
		N0);
		}

		// fold (fsub (fpext (fneg (fmul, x, y))), z)
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I wonder if this is canonical? Couldn't it be transformed to: fneg (fadd (fpext (fmul, x, y)), z) mehdi_amini: I wonder if this is canonical? Couldn't it be transformed to: ``` fneg (fadd (fpext (fmul, x…
		// -> (fneg (fma (fpext x), (fpext y), z))
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I come back with the canonicalization problem. Wouldn't it be more canonical to have: fneg (fadd (fpext (fmul, x, y)), z) In which case this combine would be useless. mehdi_amini: I come back with the canonicalization problem. Wouldn't it be more canonical to have: fneg…
		ohsallenAuthorUnsubmitted Not Done Reply Inline Actions This code is triggered when TLI.isFPExtFree() returns true. The FPEXT nodes are only generated for correctness, and are expected to be removed later by a MC pass. Thus the code generated here is more canonical in this context. ohsallen: This code is triggered when TLI.isFPExtFree() returns true. The FPEXT nodes are only generated…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions I'm not sure you understood my comment. I'm talking about the input pattern, not the output pattern. mehdi_amini: I'm not sure you understood my comment. I'm talking about the input pattern, not the output…
		hfinkelUnsubmitted Not Done Reply Inline Actions Sure, but don't miss the point. If the form being matched in non canonical, but still fires, then the correct fix is to add a canonicalization in DAGCombine, and not add logic for it here. Assuming transforming from one to the other is always correct (even in the face of NaNs, etc.), then I agree that the fneg(fadd(...)) forms should be preferred. hfinkel: Sure, but don't miss the point. If the form being matched in non canonical, but still fires…
		// Note: This could be removed with appropriate canonicalization of the
		// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
		// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
		// from implementing the canonicalization in visitFSUB.
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FNEG) {
		SDValue N000 = N00.getOperand(0);
		if (N000.getOpcode() == ISD::FMUL) {
		return DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(1)),
		N1));
		}
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Same canonicalization question, seems redundant with the one before. mehdi_amini: Same canonicalization question, seems redundant with the one before.
		}
		}

		// fold (fsub (fneg (fpext (fmul, x, y))), z)
		// -> (fneg (fma (fpext x)), (fpext y), z)
		// Note: This could be removed with appropriate canonicalization of the
		// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
		// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
		// from implementing the canonicalization in visitFSUB.
		if (N0.getOpcode() == ISD::FNEG) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == ISD::FP_EXTEND) {
		SDValue N000 = N00.getOperand(0);
		if (N000.getOpcode() == ISD::FMUL) {
		return DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N000.getOperand(1)),
		N1));
		}
		}
		}

		}

// More folding opportunities when target permits.		// More folding opportunities when target permits.
if (Aggressive) {		if (UnsafeFPMath && Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)		// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))		// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == FusedOpcode &&		if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {		N0.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),		N0.getOperand(0), N0.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),		N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),		N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SDLoc(N), VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1)));		N1)));
}		}

// fold (fsub x, (fma y, z, (fmul u, v)))		// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))		// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (N1.getOpcode() == FusedOpcode &&		if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {		N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);		SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);		SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(FusedOpcode, SDLoc(N), VT,		return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,		DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),		N1.getOperand(0)),
N1.getOperand(1),		N1.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,		DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,		DAG.getNode(ISD::FNEG, SL, VT, N20),
N20),
N21, N0));		N21, N0));
}		}

		if (LookThroughFPExt) {
		// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
		// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
		if (N0.getOpcode() == PreferredFusedOpcode) {
		SDValue N02 = N0.getOperand(2);
		if (N02.getOpcode() == ISD::FP_EXTEND) {
		SDValue N020 = N02.getOperand(0);
		if (N020.getOpcode() == ISD::FMUL)
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		N0.getOperand(0), N0.getOperand(1),
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N020.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N020.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT,
		N1)));
		}
		}

		// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
		// -> (fma (fpext x), (fpext y),
		// (fma (fpext u), (fpext v), (fneg z)))
		// FIXME: This turns two single-precision and one double-precision
		// operation into two double-precision operations, which might not be
		// interesting for all targets, especially GPUs.
		if (N0.getOpcode() == ISD::FP_EXTEND) {
		SDValue N00 = N0.getOperand(0);
		if (N00.getOpcode() == PreferredFusedOpcode) {
		SDValue N002 = N00.getOperand(2);
		if (N002.getOpcode() == ISD::FMUL)
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N00.getOperand(1)),
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N002.getOperand(0)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N002.getOperand(1)),
		DAG.getNode(ISD::FNEG, SL, VT,
		N1)));
		}
		}

		// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
		// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
		if (N1.getOpcode() == PreferredFusedOpcode &&
		N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
		SDValue N120 = N1.getOperand(2).getOperand(0);
		if (N120.getOpcode() == ISD::FMUL) {
		SDValue N1200 = N120.getOperand(0);
		SDValue N1201 = N120.getOperand(1);
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
		N1.getOperand(1),
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL,
		VT, N1200)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N1201),
		N0));
		}
		}

		// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
		// -> (fma (fneg (fpext y)), (fpext z),
		// (fma (fneg (fpext u)), (fpext v), x))
		// FIXME: This turns two single-precision and one double-precision
		// operation into two double-precision operations, which might not be
		// interesting for all targets, especially GPUs.
		if (N1.getOpcode() == ISD::FP_EXTEND &&
		N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
		SDValue N100 = N1.getOperand(0).getOperand(0);
		SDValue N101 = N1.getOperand(0).getOperand(1);
		SDValue N102 = N1.getOperand(0).getOperand(2);
		if (N102.getOpcode() == ISD::FMUL) {
		SDValue N1020 = N102.getOperand(0);
		SDValue N1021 = N102.getOperand(1);
		return DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N100)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
		DAG.getNode(PreferredFusedOpcode, SL, VT,
		DAG.getNode(ISD::FNEG, SL, VT,
		DAG.getNode(ISD::FP_EXTEND, SL,
		VT, N1020)),
		DAG.getNode(ISD::FP_EXTEND, SL, VT,
		N1021),
		N0));
		}
		}
		}
}		}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitFADD(SDNode *N) {		SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines	if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&		N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&		N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&		N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0))		N0.getOperand(0) == N1.getOperand(0))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,		return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
N0.getOperand(0), DAG.getConstantFP(4.0, VT));		N0.getOperand(0), DAG.getConstantFP(4.0, VT));
}		}
} // enable-unsafe-fp-math		} // enable-unsafe-fp-math

if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
// Assume if there is an fmad instruction that it should be aggressively
// used.
if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
return Fused;
}

// FADD -> FMA combines:		// FADD -> FMA combines:
		hfinkelUnsubmitted Not Done Reply Inline Actions Please add a comment here explaining that we can't look through FPExt nodes, and thus that parameter is false, because doing so effectively introduces extra precision that would be invalid for FMAD. hfinkel: Please add a comment here explaining that we can't look through FPExt nodes, and thus that…
if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&		SDValue Fused = visitFADDForFMACombine(N);
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		if (Fused) {
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {		AddToWorklist(Fused.getNode());

if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
// Don't form FMA if we are preferring FMAD.
if (SDValue Fused
= performFaddFmulCombines(ISD::FMA,
TLI.enableAggressiveFMAFusion(VT),
N, TLI, DAG)) {
return Fused;		return Fused;
}		}
}

// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) {

// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(1)), N1);
}

// fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N10.getOperand(1)), N0);
}
}
}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitFSUB(SDNode *N) {		SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);		ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	if (N1.getOpcode() == ISD::FADD) {
if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))		if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
return GetNegatedExpression(N11, DAG, LegalOperations);		return GetNegatedExpression(N11, DAG, LegalOperations);

if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))		if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
return GetNegatedExpression(N10, DAG, LegalOperations);		return GetNegatedExpression(N10, DAG, LegalOperations);
}		}
}		}

if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
// Assume if there is an fmad instruction that it should be aggressively
// used.
if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
return Fused;
}

// FSUB -> FMA combines:		// FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&		SDValue Fused = visitFSUBForFMACombine(N);
TLI.isFMAFasterThanFMulAndFAdd(VT) &&		if (Fused) {
(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {		AddToWorklist(Fused.getNode());

if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
// Don't form FMA if we are preferring FMAD.

if (SDValue Fused
= performFsubFmulCombines(ISD::FMA,
TLI.enableAggressiveFMAFusion(VT),
N, TLI, DAG)) {
return Fused;		return Fused;
}		}
}

// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N00.getOperand(1)),
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
}

// fold (fsub x, (fpext (fmul y, z)))
// -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
VT, N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N10.getOperand(1)),
N0);
}

// fold (fsub (fpext (fneg (fmul, x, y))), z)
// -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
if (N000.getOpcode() == ISD::FMUL) {
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
VT, N000.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N000.getOperand(1)),
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
}
}

// fold (fsub (fneg (fpext (fmul, x, y))), z)
// -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FNEG) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
if (N000.getOpcode() == ISD::FMUL) {
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
VT, N000.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
N000.getOperand(1)),
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
}
}
}
}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitFMUL(SDNode *N) {		SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);		ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
▲ Show 20 Lines • Show All 5,842 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/fma-assoc.ll

	Show First 20 Lines • Show All 71 Lines • ▼ Show 20 Lines

	; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:			; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:
	; CHECK-VSX: xsnmsubmdp			; CHECK-VSX: xsnmsubmdp
	; CHECK-VSX-NEXT: xsnmsubadp			; CHECK-VSX-NEXT: xsnmsubadp
	; CHECK-VSX-NEXT: fmr			; CHECK-VSX-NEXT: fmr
	; CHECK-VSX-NEXT: blr			; CHECK-VSX-NEXT: blr
	}			}

				define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fadd double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT1:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT1:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fadd double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT2:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT2:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fadd double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT3:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT3:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fadd double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMADD_ASSOC_EXT4:
				; CHECK: fmadd
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT4:
				; CHECK-VSX: xsmaddmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fsub double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
				; CHECK: fmsub
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
				; CHECK-VSX: xsmsubmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fsub double %I, %E ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
				; CHECK: fmsub
				; CHECK-NEXT: fmadd
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
				; CHECK-VSX: xsmsubmdp
				; CHECK-VSX-NEXT: xsmaddadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
				double %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fpext float %F to double ; <double> [#uses=1]
				%H = fmul double %C, %D ; <double> [#uses=1]
				%I = fadd double %H, %G ; <double> [#uses=1]
				%J = fsub double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
				; CHECK: fnmsub
				; CHECK-NEXT: fnmsub
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
				; CHECK-VSX: xsnmsubmdp
				; CHECK-VSX-NEXT: xsnmsubadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}

				define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
				float %D, double %E) {
				%F = fmul float %A, %B ; <float> [#uses=1]
				%G = fmul float %C, %D ; <float> [#uses=1]
				%H = fadd float %F, %G ; <float> [#uses=1]
				%I = fpext float %H to double ; <double> [#uses=1]
				%J = fsub double %E, %I ; <double> [#uses=1]
				ret double %J
				; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
				; CHECK: fnmsub
				; CHECK-NEXT: fnmsub
				; CHECK-NEXT: blr

				; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
				; CHECK-VSX: xsnmsubmdp
				; CHECK-VSX-NEXT: xsnmsubadp
				; CHECK-VSX-NEXT: fmr
				; CHECK-VSX-NEXT: blr
				}
				No newline at end of file

test/CodeGen/PowerPC/fma-ext.ll

	Show First 20 Lines • Show All 59 Lines • ▼ Show 20 Lines

	define double @test_FMSUB_EXT3(float %A, float %B, double %C) {			define double @test_FMSUB_EXT3(float %A, float %B, double %C) {
	%D = fmul float %A, %B ; <float> [#uses=1]			%D = fmul float %A, %B ; <float> [#uses=1]
	%E = fsub float -0.000000e+00, %D ; <float> [#uses=1]			%E = fsub float -0.000000e+00, %D ; <float> [#uses=1]
	%F = fpext float %E to double ; <double> [#uses=1]			%F = fpext float %E to double ; <double> [#uses=1]
	%G = fsub double %F, %C ; <double> [#uses=1]			%G = fsub double %F, %C ; <double> [#uses=1]
	ret double %G			ret double %G
	; CHECK-LABEL: test_FMSUB_EXT3:			; CHECK-LABEL: test_FMSUB_EXT3:
	; CHECK: fneg			; CHECK: fnmadd
	; CHECK-NEXT: fmsub
	; CHECK-NEXT: blr			; CHECK-NEXT: blr

	; CHECK-VSX-LABEL: test_FMSUB_EXT3:			; CHECK-VSX-LABEL: test_FMSUB_EXT3:
	; CHECK-VSX: xsnegdp			; CHECK-VSX: xsnmaddmdp
	; CHECK-VSX-NEXT: xsmsubmdp
	; CHECK-VSX-NEXT: blr			; CHECK-VSX-NEXT: blr
	}			}

	define double @test_FMSUB_EXT4(float %A, float %B, double %C) {			define double @test_FMSUB_EXT4(float %A, float %B, double %C) {
	%D = fmul float %A, %B ; <float> [#uses=1]			%D = fmul float %A, %B ; <float> [#uses=1]
	%E = fpext float %D to double ; <double> [#uses=1]			%E = fpext float %D to double ; <double> [#uses=1]
	%F = fsub double -0.000000e+00, %E ; <double> [#uses=1]			%F = fsub double -0.000000e+00, %E ; <double> [#uses=1]
	%G = fsub double %F, %C ; <double> [#uses=1]			%G = fsub double %F, %C ; <double> [#uses=1]
	ret double %G			ret double %G
	; CHECK-LABEL: test_FMSUB_EXT4:			; CHECK-LABEL: test_FMSUB_EXT4:
	; CHECK: fneg			; CHECK: fnmadd
	; CHECK-NEXT: fmsub
	; CHECK-NEXT: blr			; CHECK-NEXT: blr

	; CHECK-VSX-LABEL: test_FMSUB_EXT4:			; CHECK-VSX-LABEL: test_FMSUB_EXT4:
	; CHECK-VSX: xsnegdp			; CHECK-VSX: xsnmaddmdp
	; CHECK-VSX-NEXT: xsmsubmdp
	; CHECK-VSX-NEXT: blr			; CHECK-VSX-NEXT: blr
	}			}
	No newline at end of file

This is an archive of the discontinued LLVM Phabricator instance.

Refactor and enhance FMA combine
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 23179

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/PowerPC/fma-assoc.ll

test/CodeGen/PowerPC/fma-ext.ll

This is an archive of the discontinued LLVM Phabricator instance.

Refactor and enhance FMA combineClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 23179

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/PowerPC/fma-assoc.ll

test/CodeGen/PowerPC/fma-ext.ll

Refactor and enhance FMA combine
ClosedPublic