Diff 148473

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 668 Lines • ▼ Show 20 Lines	static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,		const TargetLowering &TLI,
const TargetOptions *Options,		const TargetOptions *Options,
unsigned Depth = 0) {		unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.		// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;		if (Op.getOpcode() == ISD::FNEG) return 2;

// Don't allow anything with multiple uses unless we know it is free.		// Don't allow anything with multiple uses unless we know it is free.
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
		const SDNodeFlags Flags = Op->getFlags();
if (!Op.hasOneUse())		if (!Op.hasOneUse())
if (!(Op.getOpcode() == ISD::FP_EXTEND &&		if (!(Op.getOpcode() == ISD::FP_EXTEND &&
TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))		TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
return 0;		return 0;

// Don't recurse exponentially.		// Don't recurse exponentially.
if (Depth > 6) return 0;		if (Depth > 6) return 0;

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
		arsenmUnsubmitted Not Done Reply Inline Actions Are we almost to the point where we could seed the fast math flags from the attributes during initial DAG creation to avoid doing this everywhere, or do too many places still not preserve them? arsenm: Are we almost to the point where we could seed the fast math flags from the attributes during…
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions Many places do not preserve the flags as of yet. Even when we finish here there will still be work to be done. Also, because we propagate in these scenarios to expressions we make, that seems less likely as the context will become more divergent over time at the sub flag level. mcberg2017: Many places do not preserve the flags as of yet. Even when we finish here there will still be…
default: return false;		default: return false;
case ISD::ConstantFP: {		case ISD::ConstantFP: {
if (!LegalOperations)		if (!LegalOperations)
return 1;		return 1;

// Don't invert constant FP values after legalization unless the target says		// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.		// the negated constant is legal.
return TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|		return TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);		TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
}		}
case ISD::FADD:		case ISD::FADD:
// FIXME: determine better conditions for this xform.		if (!Options->UnsafeFPMath &&
if (!Options->UnsafeFPMath) return 0;		!Flags.hasNoNaNs() && !Flags.hasNoInfs()) return 0;

		spatelUnsubmitted Not Done Reply Inline Actions This 1st transform shows why I'm not comfortable with patches that are trying to make multiple changes at once. Which test(s) correspond to this code change? I think the existing predicate is bogus, and the new ones are wrong too. Why does NaN/Inf make a difference here? Is this safe other than with -0.0? Test program to check that part: #include <stdio.h> int main() { float x,y; x = -0.0; y = -0.0; printf("-(%f + %f) = %f\n", x, y, -(x + y)); printf("(-%f) - %f = %f\n", x, y, (-x) - y); x = +0.0; y = -0.0; printf("-(%f + %f) = %f\n", x, y, -(x + y)); printf("(-%f) - %f = %f\n", x, y, (-x) - y); x = -0.0; y = +0.0; printf("-(%f + %f) = %f\n", x, y, -(x + y)); printf("(-%f) - %f = %f\n", x, y, (-x) - y); x = +0.0; y = +0.0; printf("-(%f + %f) = %f\n", x, y, -(x + y)); printf("(-%f) - %f = %f\n", x, y, (-x) - y); return 0; } spatel: This 1st transform shows why I'm not comfortable with patches that are trying to make multiple…
// After operation legalization, it might not be legal to create new FSUBs.		// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))		if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
return 0;		return 0;

// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)		// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,		if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))		Options, Depth + 1))
return V;		return V;

// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)		// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,		return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);		Depth + 1);
case ISD::FSUB:		case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.		// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options->NoSignedZerosFPMath &&		if (!Options->NoSignedZerosFPMath &&
!Op.getNode()->getFlags().hasNoSignedZeros())		!Flags.hasNoSignedZeros())
return 0;		return 0;

// fold (fneg (fsub A, B)) -> (fsub B, A)		// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;		return 1;

case ISD::FMUL:		case ISD::FMUL:
case ISD::FDIV:		case ISD::FDIV:
if (Options->HonorSignDependentRoundingFPMath()) return 0;		if (Options->HonorSignDependentRoundingFPMathOption &&
		!Options->UnsafeFPMath && !Flags.hasNoNaNs()) return 0;

// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))		// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,		if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))		Options, Depth + 1))
return V;		return V;

return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,		return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);		Depth + 1);
Show All 20 Lines	static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");		default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {		case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();		APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();		V.changeSign();
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());		return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}		}
case ISD::FADD:		case ISD::FADD:
// FIXME: determine better conditions for this xform.		assert(Options.UnsafeFPMath \|\|
assert(Options.UnsafeFPMath);		(Flags.hasNoNaNs() && Flags.hasNoInfs()));

// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)		// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,		if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))		DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),		LegalOperations, Depth+1),
Op.getOperand(1), Flags);		Op.getOperand(1), Flags);
Show All 9 Lines	if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
return Op.getOperand(1);		return Op.getOperand(1);

// fold (fneg (fsub A, B)) -> (fsub B, A)		// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(0), Flags);		Op.getOperand(1), Op.getOperand(0), Flags);

case ISD::FMUL:		case ISD::FMUL:
case ISD::FDIV:		case ISD::FDIV:
assert(!Options.HonorSignDependentRoundingFPMath());		assert(!Options.HonorSignDependentRoundingFPMath() \|\| Flags.hasNoNaNs());

// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)		// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,		if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))		DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),		return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),		LegalOperations, Depth+1),
Op.getOperand(1), Flags);		Op.getOperand(1), Flags);
▲ Show 20 Lines • Show All 9,405 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFADD(SDNode *N) {
if ((isFMulNegTwo(N0) && N0.hasOneUse()) \|\|		if ((isFMulNegTwo(N0) && N0.hasOneUse()) \|\|
(isFMulNegTwo(N1) && N1.hasOneUse())) {		(isFMulNegTwo(N1) && N1.hasOneUse())) {
bool N1IsFMul = isFMulNegTwo(N1);		bool N1IsFMul = isFMulNegTwo(N1);
SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);		SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);		SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);		return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
}		}

// FIXME: Auto-upgrade the target/function-level option.		ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
if (Options.NoSignedZerosFPMath \|\| N->getFlags().hasNoSignedZeros()) {		if (N1C && N1C->isZero()) {
		if (N1C->isNegative() \|\| Options.UnsafeFPMath \|\|
		Flags.hasNoSignedZeros()) {
// fold (fadd A, 0) -> A		// fold (fadd A, 0) -> A
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
return N0;		return N0;
}		}
		}

// If 'unsafe math' is enabled, fold lots of things.		// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {		bool AllowNewConst = (Level < AfterLegalizeDAG);
		if (Options.UnsafeFPMath \|\| Flags.hasNoNaNs()) {
		// If allowed, fold (fadd (fneg x), x) -> 0.0
		if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
		return DAG.getConstantFP(0.0, DL, VT);

		// If allowed, fold (fadd x, (fneg x)) -> 0.0
		if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
		return DAG.getConstantFP(0.0, DL, VT);
		}

		// If 'unsafe math' is enabled, fold lots of things.
		if (Options.UnsafeFPMath \|\| Flags.hasAllowReassociation()) {
// No FP constant should be created after legalization as Instruction		// No FP constant should be created after legalization as Instruction
// Selection pass has a hard time dealing with FP constants.		// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);

// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))		// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&		if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))		isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),		return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,		DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
Flags),		Flags),
Flags);		Flags);

// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);

// If allowed, fold (fadd x, (fneg x)) -> 0.0
if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
return DAG.getConstantFP(0.0, DL, VT);

// We can fold chains of FADD's of the same value into multiplications.		// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number		// This transform is not safe in general because we are reducing the number
// of rounding steps.		// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {		if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {		if (N0.getOpcode() == ISD::FMUL) {
bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));		bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));		bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));

▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines	if (N0CFP && N0CFP->isZero()) {
return GetNegatedExpression(N1, DAG, LegalOperations);		return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))		if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);		return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}		}
}		}

// If 'unsafe math' is enabled, fold lots of things.		// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {		if (Options.UnsafeFPMath) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero())
return N0;

// (fsub x, x) -> 0.0		// (fsub x, x) -> 0.0
if (N0 == N1)		if (N0 == N1)
return DAG.getConstantFP(0.0f, DL, VT);		return DAG.getConstantFP(0.0f, DL, VT);

// (fsub x, (fadd x, y)) -> (fneg y)		// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)		// (fsub x, (fadd y, x)) -> (fneg y)
if (N1.getOpcode() == ISD::FADD) {		if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);		SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);		SDValue N11 = N1->getOperand(1);

if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))		if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
return GetNegatedExpression(N11, DAG, LegalOperations);		return GetNegatedExpression(N11, DAG, LegalOperations);

if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))		if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
return GetNegatedExpression(N10, DAG, LegalOperations);		return GetNegatedExpression(N10, DAG, LegalOperations);
}		}
		} else {
		// (fsub x, x) -> 0.0
		if (N0 == N1 && Flags.hasNoNaNs()) {
		return DAG.getConstantFP(0.0f, DL, VT);
		}
		}

		// (fsub A, 0) -> A
		if (N1CFP && N1CFP->isZero()) {
		if (!N1CFP->isNegative() \|\| Options.UnsafeFPMath \|\|
		Flags.hasNoSignedZeros()) {
		return N0;
		}
}		}

// FSUB -> FMA combines:		// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {		if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());		AddToWorklist(Fused.getNode());
return Fused;		return Fused;
}		}

Show All 28 Lines	SDValue DAGCombiner::visitFMUL(SDNode *N) {

// fold (fmul A, 1.0) -> A		// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))		if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;		return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))		if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;		return NewSel;

if (Options.UnsafeFPMath) {		if (Options.UnsafeFPMath \|\|
		(Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0		// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())		if (N1CFP && N1CFP->isZero())
return N1;		return N1;
		}

		if (Options.UnsafeFPMath \|\| Flags.hasAllowReassociation()) {
// fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))		// fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
if (N0.getOpcode() == ISD::FMUL) {		if (N0.getOpcode() == ISD::FMUL) {
// Fold scalars or any vector constants (not just splats).		// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts		// This fold is done in general by InstCombine, but extra fmul insts
// may have been generated during lowering.		// may have been generated during lowering.
SDValue N00 = N0.getOperand(0);		SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);		SDValue N01 = N0.getOperand(1);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);		auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
▲ Show 20 Lines • Show All 290 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFDIV(SDNode *N) {

// fold (fdiv c1, c2) -> c1/c2		// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)		if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);		return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);

if (SDValue NewSel = foldBinOpIntoSelect(N))		if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;		return NewSel;

if (Options.UnsafeFPMath) {		if (Options.UnsafeFPMath \|\| Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.		// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {		if (N1CFP) {
// Compute the reciprocal 1.0 / c2.		// Compute the reciprocal 1.0 / c2.
const APFloat &N1APF = N1CFP->getValueAPF();		const APFloat &N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0		APFloat Recip(N1APF.getSemantics(), 1); // 1.0
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);		APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
// Only do the transform if the reciprocal is a legal fp immediate that		// Only do the transform if the reciprocal is a legal fp immediate that
// isn't too nasty (eg NaN, denormal, ...).		// isn't too nasty (eg NaN, denormal, ...).
▲ Show 20 Lines • Show All 368 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
return FoldIntToFPToInt(N, DAG);		return FoldIntToFPToInt(N, DAG);
}		}

SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {		SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);		ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
		SDNodeFlags Flags = N->getFlags();

// fold (fp_round c1fp) -> c1fp		// fold (fp_round c1fp) -> c1fp
if (N0CFP)		if (N0CFP)
return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);		return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);

// fold (fp_round (fp_extend x)) -> x		// fold (fp_round (fp_extend x)) -> x
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())		if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
return N0.getOperand(0);		return N0.getOperand(0);
Show All 13 Lines	if (N0.getOpcode() == ISD::FP_ROUND) {
if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)		if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
return SDValue();		return SDValue();

// If the first fp_round isn't a value preserving truncation, it might		// If the first fp_round isn't a value preserving truncation, it might
// introduce a tie in the second fp_round, that wouldn't occur in the		// introduce a tie in the second fp_round, that wouldn't occur in the
// single-step fp_round we want to fold to.		// single-step fp_round we want to fold to.
// In other words, double rounding isn't the same as rounding.		// In other words, double rounding isn't the same as rounding.
// Also, this is a value preserving truncation iff both fp_round's are.		// Also, this is a value preserving truncation iff both fp_round's are.
if (DAG.getTarget().Options.UnsafeFPMath \|\| N0IsTrunc) {		if (DAG.getTarget().Options.UnsafeFPMath \|\|
		Flags.hasAllowReassociation() \|\| N0IsTrunc) {
SDLoc DL(N);		SDLoc DL(N);
return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),		return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));		DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
}		}
}		}

// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)		// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {		if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
▲ Show 20 Lines • Show All 6,893 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,436 Lines • ▼ Show 20 Lines	case ISD::UMAX:
assert(N1.getValueType() == N2.getValueType() &&		assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");		N1.getValueType() == VT && "Binary operator types must match!");
break;		break;
case ISD::FADD:		case ISD::FADD:
case ISD::FSUB:		case ISD::FSUB:
case ISD::FMUL:		case ISD::FMUL:
case ISD::FDIV:		case ISD::FDIV:
case ISD::FREM:		case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// x+0 --> x
if (N2CFP && N2CFP->getValueAPF().isZero())
return N1;
} else if (Opcode == ISD::FSUB) {
// x-0 --> x
if (N2CFP && N2CFP->getValueAPF().isZero())
return N1;
} else if (Opcode == ISD::FMUL) {
// x*0 --> 0
if (N2CFP && N2CFP->isZero())
return N2;
// x*1 --> x
if (N2CFP && N2CFP->isExactlyValue(1.0))
return N1;
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");		assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&		assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");		N1.getValueType() == VT && "Binary operator types must match!");
break;		break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.		case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
assert(N1.getValueType() == VT &&		assert(N1.getValueType() == VT &&
N1.getValueType().isFloatingPoint() &&		N1.getValueType().isFloatingPoint() &&
N2.getValueType().isFloatingPoint() &&		N2.getValueType().isFloatingPoint() &&
▲ Show 20 Lines • Show All 345 Lines • ▼ Show 20 Lines	if (N2.isUndef()) {
case ISD::SUB:		case ISD::SUB:
case ISD::UDIV:		case ISD::UDIV:
case ISD::SDIV:		case ISD::SDIV:
case ISD::UREM:		case ISD::UREM:
case ISD::SREM:		case ISD::SREM:
case ISD::SRA:		case ISD::SRA:
case ISD::SRL:		case ISD::SRL:
case ISD::SHL:		case ISD::SHL:
return getUNDEF(VT); // fold op(arg1, undef) -> undef		return getUNDEF(VT); // fold op(arg1, undef) -> undef
case ISD::MUL:		case ISD::MUL:
		spatelUnsubmitted Not Done Reply Inline Actions This is just wrong. Let me try to clean this up before we compound the problem. spatel: This is just wrong. Let me try to clean this up before we compound the problem.
case ISD::AND:		case ISD::AND:
return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0		return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
case ISD::OR:		case ISD::OR:
return getAllOnesConstant(DL, VT);		return getAllOnesConstant(DL, VT);
}		}
}		}

// Memoize this node if possible.		// Memoize this node if possible.
▲ Show 20 Lines • Show All 3,877 Lines • Show Last 20 Lines

test/CodeGen/AArch64/fdiv-combine.ll

	; RUN: llc -mtriple=aarch64-unknown-unknown < %s \| FileCheck %s			; RUN: llc < %s -mtriple=aarch64-unknown-unknown \| FileCheck %s
				; RUN: llc < %s -mtriple=aarch64-unknown-unknown -enable-unsafe-fp-math \| FileCheck %s

	; Following test cases check:			; Following test cases check:
	; a / D; b / D; c / D;			; a / D; b / D; c / D;
	; =>			; =>
	; recip = 1.0 / D; a * recip; b * recip; c * recip;			; recip = 1.0 / D; a * recip; b * recip; c * recip;
	define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {			define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
	; CHECK-LABEL: three_fdiv_float:			; CHECK-LABEL: three_fdiv_float:
	; CHECK: fdiv s			; CHECK: fdiv s
	; CHECK-NOT: fdiv			; CHECK-NOT: fdiv
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	%div = fdiv float %a, %D			%div = fdiv fast float %a, %D
	%div1 = fdiv float %b, %D			%div1 = fdiv fast float %b, %D
	%div2 = fdiv float %c, %D			%div2 = fdiv fast float %c, %D
	tail call void @foo_3f(float %div, float %div1, float %div2)			tail call void @foo_3f(float %div, float %div1, float %div2)
	ret void			ret void
	}			}

	define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {			define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
	; CHECK-LABEL: three_fdiv_double:			; CHECK-LABEL: three_fdiv_double:
	; CHECK: fdiv d			; CHECK: fdiv d
	; CHECK-NOT: fdiv			; CHECK-NOT: fdiv
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	%div = fdiv double %a, %D			%div = fdiv fast double %a, %D
	%div1 = fdiv double %b, %D			%div1 = fdiv fast double %b, %D
	%div2 = fdiv double %c, %D			%div2 = fdiv fast double %c, %D
	tail call void @foo_3d(double %div, double %div1, double %div2)			tail call void @foo_3d(double %div, double %div1, double %div2)
	ret void			ret void
	}			}

	define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {			define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
	; CHECK-LABEL: three_fdiv_4xfloat:			; CHECK-LABEL: three_fdiv_4xfloat:
	; CHECK: fdiv v			; CHECK: fdiv v
	; CHECK-NOT: fdiv			; CHECK-NOT: fdiv
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	%div = fdiv <4 x float> %a, %D			%div = fdiv fast <4 x float> %a, %D
	%div1 = fdiv <4 x float> %b, %D			%div1 = fdiv fast <4 x float> %b, %D
	%div2 = fdiv <4 x float> %c, %D			%div2 = fdiv fast <4 x float> %c, %D
	tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)			tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
	ret void			ret void
	}			}

	define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {			define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
	; CHECK-LABEL: three_fdiv_2xdouble:			; CHECK-LABEL: three_fdiv_2xdouble:
	; CHECK: fdiv v			; CHECK: fdiv v
	; CHECK-NOT: fdiv			; CHECK-NOT: fdiv
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	%div = fdiv <2 x double> %a, %D			%div = fdiv fast <2 x double> %a, %D
	%div1 = fdiv <2 x double> %b, %D			%div1 = fdiv fast <2 x double> %b, %D
	%div2 = fdiv <2 x double> %c, %D			%div2 = fdiv fast <2 x double> %c, %D
	tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)			tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
	ret void			ret void
	}			}

	; Following test cases check we never combine two FDIVs if neither of them			; Following test cases check we never combine two FDIVs if neither of them
	; calculates a reciprocal.			; calculates a reciprocal.
	define void @two_fdiv_float(float %D, float %a, float %b) #0 {			define void @two_fdiv_float(float %D, float %a, float %b) #0 {
	; CHECK-LABEL: two_fdiv_float:			; CHECK-LABEL: two_fdiv_float:
	; CHECK: fdiv s			; CHECK: fdiv s
	; CHECK: fdiv s			; CHECK: fdiv s
	; CHECK-NOT: fmul			; CHECK-NOT: fmul
	%div = fdiv float %a, %D			%div = fdiv fast float %a, %D
	%div1 = fdiv float %b, %D			%div1 = fdiv fast float %b, %D
	tail call void @foo_2f(float %div, float %div1)			tail call void @foo_2f(float %div, float %div1)
	ret void			ret void
	}			}

	define void @two_fdiv_double(double %D, double %a, double %b) #0 {			define void @two_fdiv_double(double %D, double %a, double %b) #0 {
	; CHECK-LABEL: two_fdiv_double:			; CHECK-LABEL: two_fdiv_double:
	; CHECK: fdiv d			; CHECK: fdiv d
	; CHECK: fdiv d			; CHECK: fdiv d
	; CHECK-NOT: fmul			; CHECK-NOT: fmul
	%div = fdiv double %a, %D			%div = fdiv fast double %a, %D
	%div1 = fdiv double %b, %D			%div1 = fdiv fast double %b, %D
	tail call void @foo_2d(double %div, double %div1)			tail call void @foo_2d(double %div, double %div1)
	ret void			ret void
	}			}

	declare void @foo_3f(float, float, float)			declare void @foo_3f(float, float, float)
	declare void @foo_3d(double, double, double)			declare void @foo_3d(double, double, double)
	declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)			declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
	declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)			declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
	declare void @foo_2f(float, float)			declare void @foo_2f(float, float)
	declare void @foo_2d(double, double)			declare void @foo_2d(double, double)

	attributes #0 = { "unsafe-fp-math"="true" }

test/CodeGen/AMDGPU/fadd.ll

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
%add = fadd <8 x float> %a, %b		%add = fadd <8 x float> %a, %b
store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32		store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
ret void		ret void
}		}

; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:		; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
; SI-NOT: v_add_f32		; SI-NOT: v_add_f32
define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {		define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {
%add = fadd float %a, 0.0		%add = fadd nsz float %a, 0.0
store float %add, float addrspace(1)* %out, align 4		store float %add, float addrspace(1)* %out, align 4
ret void		ret void
}		}

attributes #0 = { nounwind }		attributes #0 = { nounwind }
attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }		attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }

test/CodeGen/AMDGPU/fdiv.f16.ll

	Show First 20 Lines • Show All 212 Lines • ▼ Show 20 Lines
	define amdgpu_kernel void @div_arcp_2_x_pat_f16(half addrspace(1)* %out) #0 {			define amdgpu_kernel void @div_arcp_2_x_pat_f16(half addrspace(1)* %out) #0 {
	%x = load half, half addrspace(1)* undef			%x = load half, half addrspace(1)* undef
	%rcp = fdiv arcp half %x, 2.0			%rcp = fdiv arcp half %x, 2.0
	store half %rcp, half addrspace(1)* %out, align 4			store half %rcp, half addrspace(1)* %out, align 4
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16:			; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16:
	; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}}			; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}}
				mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions Note for Matt, the numbers now used reflect 16 bit fp values. mcberg2017: Note for Matt, the numbers now used reflect 16 bit fp values.

	; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}			; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}
	; GFX8_9: buffer_store_short [[MUL]]			; GFX8_9: buffer_store_short [[MUL]]
	define amdgpu_kernel void @div_arcp_k_x_pat_f16(half addrspace(1)* %out) #0 {			define amdgpu_kernel void @div_arcp_k_x_pat_f16(half addrspace(1)* %out) #0 {
	%x = load half, half addrspace(1)* undef			%x = load half, half addrspace(1)* undef
	%rcp = fdiv arcp half %x, 10.0			%rcp = fdiv arcp half %x, 10.0
	store half %rcp, half addrspace(1)* %out, align 4			store half %rcp, half addrspace(1)* %out, align 4
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16:			; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16:
	; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}}			; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}}

	; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}			; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}
	; GFX8_9: buffer_store_short [[MUL]]			; GFX8_9: buffer_store_short [[MUL]]
	define amdgpu_kernel void @div_arcp_neg_k_x_pat_f16(half addrspace(1)* %out) #0 {			define amdgpu_kernel void @div_arcp_neg_k_x_pat_f16(half addrspace(1)* %out) #0 {
	%x = load half, half addrspace(1)* undef			%x = load half, half addrspace(1)* undef
	%rcp = fdiv arcp half %x, -10.0			%rcp = fdiv arcp half %x, -10.0
	store half %rcp, half addrspace(1)* %out, align 4			store half %rcp, half addrspace(1)* %out, align 4
	ret void			ret void
	Show All 9 Lines

test/CodeGen/PowerPC/fdiv-combine.ll

	; RUN: llc -verify-machineinstrs -mcpu=ppc64 < %s \| FileCheck %s			; RUN: llc -verify-machineinstrs -mcpu=ppc64 < %s \| FileCheck %s
				; RUN: llc -verify-machineinstrs -mcpu=ppc64 -enable-unsafe-fp-math < %s \| FileCheck %s
	target datalayout = "E-m:e-i64:64-n32:64"			target datalayout = "E-m:e-i64:64-n32:64"
	target triple = "powerpc64-unknown-linux-gnu"			target triple = "powerpc64-unknown-linux-gnu"

	; Following test case checks:			; Following test case checks:
	; a / D; b / D; c / D;			; a / D; b / D; c / D;
	; =>			; =>
	; recip = 1.0 / D; a * recip; b * recip; c * recip;			; recip = 1.0 / D; a * recip; b * recip; c * recip;

	define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {			define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
	; CHECK-LABEL: three_fdiv_double:			; CHECK-LABEL: three_fdiv_double:
	; CHECK: fdiv {{[0-9]}}			; CHECK: fdiv {{[0-9]}}
	; CHECK-NOT: fdiv			; CHECK-NOT: fdiv
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	; CHECK: fmul			; CHECK: fmul
	%div = fdiv double %a, %D			%div = fdiv fast double %a, %D
	%div1 = fdiv double %b, %D			%div1 = fdiv fast double %b, %D
	%div2 = fdiv double %c, %D			%div2 = fdiv fast double %c, %D
	tail call void @foo_3d(double %div, double %div1, double %div2)			tail call void @foo_3d(double %div, double %div1, double %div2)
	ret void			ret void
	}			}

	define void @two_fdiv_double(double %D, double %a, double %b) #0 {			define void @two_fdiv_double(double %D, double %a, double %b) #0 {
	; CHECK-LABEL: two_fdiv_double:			; CHECK-LABEL: two_fdiv_double:
	; CHECK: fdiv {{[0-9]}}			; CHECK: fdiv {{[0-9]}}
	; CHECK: fdiv {{[0-9]}}			; CHECK: fdiv {{[0-9]}}
	; CHECK-NOT: fmul			; CHECK-NOT: fmul
	%div = fdiv double %a, %D			%div = fdiv fast double %a, %D
	%div1 = fdiv double %b, %D			%div1 = fdiv fast double %b, %D
	tail call void @foo_2d(double %div, double %div1)			tail call void @foo_2d(double %div, double %div1)
	ret void			ret void
	}			}

	declare void @foo_3d(double, double, double)			declare void @foo_3d(double, double, double)
	declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)			declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
	declare void @foo_2d(double, double)			declare void @foo_2d(double, double)

	attributes #0 = { "unsafe-fp-math"="true" }

test/CodeGen/PowerPC/fmf-math.ll

				; RUN: llc -verify-machineinstrs < %s -mattr=-vsx -mtriple=ppc32-- \| grep fmul \| count 1

				define double @foo(double %X) nounwind {
				%tmp1 = fmul fast double %X, 1.23
				%tmp2 = fmul fast double %tmp1, 4.124
				ret double %tmp2
				}

test/CodeGen/X86/change-ir-fp-math.ll

				; Check that we can enable/disable fast IR flag attributes.

				; RUN: llc < %s -mtriple=x86_64-unknown-unknown \
				; RUN: \| FileCheck %s --check-prefix=CHECK

				; The div in these functions should be converted to a mul when unsafe-fp-math
				; is enabled.

				; CHECK-LABEL: fast_fp_math:
				define double @fast_fp_math(double %x) {
				; CHECK: mulsd
				%div = fdiv fast double %x, 2.0
				ret double %div
				}

				; CHECK-LABEL: noflags_fp_math:
				define double @noflags_fp_math(double %x) {
				; CHECK: divsd
				%div = fdiv double %x, 2.0
				ret double %div
				}

test/CodeGen/X86/fadd-combines.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=x86_64-unknown-unknown < %s \| FileCheck %s			; RUN: llc -mtriple=x86_64-unknown-unknown < %s \| FileCheck %s
				; RUN: llc -mtriple=x86_64-unknown-unknown < %s -enable-unsafe-fp-math \| FileCheck %s

	define float @fadd_zero_f32(float %x) #0 {			define float @fadd_zero_f32(float %x) #0 {
	; CHECK-LABEL: fadd_zero_f32:			; CHECK-LABEL: fadd_zero_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, 0.0			%y = fadd fast float %x, 0.0
	ret float %y			ret float %y
	}			}

	define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_zero_4f32:			; CHECK-LABEL: fadd_zero_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, zeroinitializer			%y = fadd fast <4 x float> %x, zeroinitializer
	ret <4 x float> %y			ret <4 x float> %y
	}			}

	; CHECK: float 3			; CHECK: float 3
	define float @fadd_2const_f32(float %x) #0 {			define float @fadd_2const_f32(float %x) #0 {
	; CHECK-LABEL: fadd_2const_f32:			; CHECK-LABEL: fadd_2const_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: addss {{.*}}(%rip), %xmm0			; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, 1.0			%y = fadd fast float %x, 1.0
	%z = fadd float %y, 2.0			%z = fadd fast float %y, 2.0
	ret float %z			ret float %z
	}			}

	; CHECK: float 5			; CHECK: float 5
	; CHECK: float 5			; CHECK: float 5
	; CHECK: float 5			; CHECK: float 5
	; CHECK: float 5			; CHECK: float 5
	define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_2const_4f32:			; CHECK-LABEL: fadd_2const_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: addps {{.*}}(%rip), %xmm0			; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>			%y = fadd fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
	%z = fadd <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>			%z = fadd fast <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
	ret <4 x float> %z			ret <4 x float> %z
	}			}

	; CHECK: float 3			; CHECK: float 3
	define float @fadd_x_fmul_x_c_f32(float %x) #0 {			define float @fadd_x_fmul_x_c_f32(float %x) #0 {
	; CHECK-LABEL: fadd_x_fmul_x_c_f32:			; CHECK-LABEL: fadd_x_fmul_x_c_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fmul float %x, 2.0			%y = fmul fast float %x, 2.0
	%z = fadd float %x, %y			%z = fadd fast float %x, %y
	ret float %z			ret float %z
	}			}

	; CHECK: float 2			; CHECK: float 2
	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 5			; CHECK: float 5
	define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_x_fmul_x_c_4f32:			; CHECK-LABEL: fadd_x_fmul_x_c_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>			%y = fmul fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
	%z = fadd <4 x float> %x, %y			%z = fadd fast <4 x float> %x, %y
	ret <4 x float> %z			ret <4 x float> %z
	}			}

	; CHECK: float 3			; CHECK: float 3
	define float @fadd_fmul_x_c_x_f32(float %x) #0 {			define float @fadd_fmul_x_c_x_f32(float %x) #0 {
	; CHECK-LABEL: fadd_fmul_x_c_x_f32:			; CHECK-LABEL: fadd_fmul_x_c_x_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fmul float %x, 2.0			%y = fmul fast float %x, 2.0
	%z = fadd float %y, %x			%z = fadd fast float %y, %x
	ret float %z			ret float %z
	}			}

	; CHECK: float 2			; CHECK: float 2
	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 5			; CHECK: float 5
	define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_fmul_x_c_x_4f32:			; CHECK-LABEL: fadd_fmul_x_c_x_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>			%y = fmul fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
	%z = fadd <4 x float> %y, %x			%z = fadd fast <4 x float> %y, %x
	ret <4 x float> %z			ret <4 x float> %z
	}			}

	; CHECK: float 4			; CHECK: float 4
	define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {			define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
	; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32:			; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, %x			%y = fadd fast float %x, %x
	%z = fmul float %x, 2.0			%z = fmul fast float %x, 2.0
	%w = fadd float %y, %z			%w = fadd fast float %y, %z
	ret float %w			ret float %w
	}			}

	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 5			; CHECK: float 5
	; CHECK: float 6			; CHECK: float 6
	define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32:			; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, %x			%y = fadd fast <4 x float> %x, %x
	%z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>			%z = fmul fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
	%w = fadd <4 x float> %y, %z			%w = fadd fast <4 x float> %y, %z
	ret <4 x float> %w			ret <4 x float> %w
	}			}

	; CHECK: float 4			; CHECK: float 4
	define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {			define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
	; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32:			; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, %x			%y = fadd fast float %x, %x
	%z = fmul float %x, 2.0			%z = fmul fast float %x, 2.0
	%w = fadd float %z, %y			%w = fadd fast float %z, %y
	ret float %w			ret float %w
	}			}

	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 5			; CHECK: float 5
	; CHECK: float 6			; CHECK: float 6
	define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32:			; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, %x			%y = fadd fast <4 x float> %x, %x
	%z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>			%z = fmul fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
	%w = fadd <4 x float> %z, %y			%w = fadd fast <4 x float> %z, %y
	ret <4 x float> %w			ret <4 x float> %w
	}			}

	; CHECK: float 3			; CHECK: float 3
	define float @fadd_x_fadd_x_x_f32(float %x) #0 {			define float @fadd_x_fadd_x_x_f32(float %x) #0 {
	; CHECK-LABEL: fadd_x_fadd_x_x_f32:			; CHECK-LABEL: fadd_x_fadd_x_x_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, %x			%y = fadd fast float %x, %x
	%z = fadd float %x, %y			%z = fadd fast float %x, %y
	ret float %z			ret float %z
	}			}

	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 3			; CHECK: float 3
	define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_x_fadd_x_x_4f32:			; CHECK-LABEL: fadd_x_fadd_x_x_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, %x			%y = fadd fast <4 x float> %x, %x
	%z = fadd <4 x float> %x, %y			%z = fadd fast <4 x float> %x, %y
	ret <4 x float> %z			ret <4 x float> %z
	}			}

	; CHECK: float 3			; CHECK: float 3
	define float @fadd_fadd_x_x_x_f32(float %x) #0 {			define float @fadd_fadd_x_x_x_f32(float %x) #0 {
	; CHECK-LABEL: fadd_fadd_x_x_x_f32:			; CHECK-LABEL: fadd_fadd_x_x_x_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, %x			%y = fadd fast float %x, %x
	%z = fadd float %y, %x			%z = fadd fast float %y, %x
	ret float %z			ret float %z
	}			}

	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 3			; CHECK: float 3
	; CHECK: float 3			; CHECK: float 3
	define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_fadd_x_x_x_4f32:			; CHECK-LABEL: fadd_fadd_x_x_x_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, %x			%y = fadd fast <4 x float> %x, %x
	%z = fadd <4 x float> %y, %x			%z = fadd fast <4 x float> %y, %x
	ret <4 x float> %z			ret <4 x float> %z
	}			}

	; CHECK: float 4			; CHECK: float 4
	define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {			define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
	; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32:			; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd float %x, %x			%y = fadd fast float %x, %x
	%z = fadd float %y, %y			%z = fadd fast float %y, %y
	ret float %z			ret float %z
	}			}

	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 4			; CHECK: float 4
	; CHECK: float 4			; CHECK: float 4
	define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {			define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
	; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32:			; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%y = fadd <4 x float> %x, %x			%y = fadd fast <4 x float> %x, %x
	%z = fadd <4 x float> %y, %y			%z = fadd fast <4 x float> %y, %y
	ret <4 x float> %z			ret <4 x float> %z
	}			}

	attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }			attributes #0 = { "less-precise-fpmad"="true" }
				mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions I trimmed this down to the required essentials required to test both paths. mcberg2017: I trimmed this down to the required essentials required to test both paths.

test/CodeGen/X86/fdiv-combine.ll

Show All 13 Lines	; CHECK-NEXT: retq
ret float %div1		ret float %div1
}		}

; All math instructions are 'arcp', so optimize.		; All math instructions are 'arcp', so optimize.

define float @div2_arcp_all(float %x, float %y, float %z) {		define float @div2_arcp_all(float %x, float %y, float %z) {
; CHECK-LABEL: div2_arcp_all:		; CHECK-LABEL: div2_arcp_all:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero		; CHECK-NEXT: movss {{.*}}(%rip), %xmm3
; CHECK-NEXT: divss %xmm2, %xmm3		; CHECK-NEXT: divss %xmm2, %xmm3
; CHECK-NEXT: mulss %xmm3, %xmm0		; CHECK-NEXT: mulss %xmm3, %xmm0
; CHECK-NEXT: mulss %xmm1, %xmm0		; CHECK-NEXT: mulss %xmm1, %xmm0
; CHECK-NEXT: mulss %xmm3, %xmm0		; CHECK-NEXT: mulss %xmm3, %xmm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%div1 = fdiv arcp float %x, %z		%div1 = fdiv arcp float %x, %z
%mul = fmul arcp float %div1, %y		%mul = fmul arcp float %div1, %y
%div2 = fdiv arcp float %mul, %z		%div2 = fdiv arcp float %mul, %z
▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines

test/CodeGen/X86/fdiv.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math \| FileCheck %s

	define double @exact(double %x) {			define double @exact(double %x) {
	; Exact division by a constant converted to multiplication.			; Exact division by a constant converted to multiplication.
	; CHECK-LABEL: exact:			; CHECK-LABEL: exact:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%div = fdiv double %x, 2.0			%div = fdiv fast double %x, 2.0
	ret double %div			ret double %div
	}			}

	define double @inexact(double %x) {			define double @inexact(double %x) {
	; Inexact division by a constant converted to multiplication.			; Inexact division by a constant converted to multiplication.
	; CHECK-LABEL: inexact:			; CHECK-LABEL: inexact:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0			; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%div = fdiv double %x, 0x41DFFFFFFFC00000			%div = fdiv fast double %x, 0x41DFFFFFFFC00000
	ret double %div			ret double %div
	}			}

	define double @funky(double %x) {			define double @funky(double %x) {
	; No conversion to multiplication if too funky.			; No conversion to multiplication if too funky.
	; CHECK-LABEL: funky:			; CHECK-LABEL: funky:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: xorpd %xmm1, %xmm1			; CHECK-NEXT: xorpd %xmm1, %xmm1
	; CHECK-NEXT: divsd %xmm1, %xmm0			; CHECK-NEXT: divsd %xmm1, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%div = fdiv double %x, 0.0			%div = fdiv fast double %x, 0.0
	ret double %div			ret double %div
	}			}

	define double @denormal1(double %x) {			define double @denormal1(double %x) {
	; Don't generate multiplication by a denormal.			; Don't generate multiplication by a denormal.
	; CHECK-LABEL: denormal1:			; CHECK-LABEL: denormal1:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0			; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%div = fdiv double %x, 0x7FD0000000000001			%div = fdiv fast double %x, 0x7FD0000000000001
	ret double %div			ret double %div
	}			}

	define double @denormal2(double %x) {			define double @denormal2(double %x) {
	; Don't generate multiplication by a denormal.			; Don't generate multiplication by a denormal.
	; CHECK-LABEL: denormal2:			; CHECK-LABEL: denormal2:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0			; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%div = fdiv double %x, 0x7FEFFFFFFFFFFFFF			%div = fdiv fast double %x, 0x7FEFFFFFFFFFFFFF
	ret double %div			ret double %div
	}			}

	; Deleting the negates does not require unsafe-fp-math.			; Deleting the negates does not require unsafe-fp-math.

	define float @double_negative(float %x, float %y) #0 {			define float @double_negative(float %x, float %y) #0 {
	; CHECK-LABEL: double_negative:			; CHECK-LABEL: double_negative:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: divss %xmm1, %xmm0			; CHECK-NEXT: divss %xmm1, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%neg1 = fsub float -0.0, %x			%neg1 = fsub fast float -0.0, %x
	%neg2 = fsub float -0.0, %y			%neg2 = fsub fast float -0.0, %y
	%div = fdiv float %neg1, %neg2			%div = fdiv fast float %neg1, %neg2
	ret float %div			ret float %div
	}			}

	attributes #0 = { "unsafe-fp-math"="false" }

test/CodeGen/X86/fmf-flags.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown \| FileCheck %s -check-prefix=X64			; RUN: llc < %s -mtriple=x86_64-unknown \| FileCheck %s -check-prefix=X64
	; RUN: llc < %s -mtriple=i686-unknown \| FileCheck %s -check-prefix=X86			; RUN: llc < %s -mtriple=i686-unknown \| FileCheck %s -check-prefix=X86

	declare float @llvm.sqrt.f32(float %x);			declare float @llvm.sqrt.f32(float %x);

	define float @fast_recip_sqrt(float %x) {			define float @fast_recip_sqrt(float %x) {
	; X64-LABEL: fast_recip_sqrt:			; X64-LABEL: fast_recip_sqrt:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: sqrtss %xmm0, %xmm1			; X64-NEXT: rsqrtss %xmm0, %xmm1
	; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; X64-NEXT: mulss %xmm1, %xmm0
	; X64-NEXT: divss %xmm1, %xmm0			; X64-NEXT: mulss %xmm1, %xmm0
				; X64-NEXT: addss {{.*}}(%rip), %xmm0
				; X64-NEXT: mulss {{.*}}(%rip), %xmm1
				; X64-NEXT: mulss %xmm1, %xmm0
	; X64-NEXT: retq			; X64-NEXT: retq
	;			;
	; X86-LABEL: fast_recip_sqrt:			; X86-LABEL: fast_recip_sqrt:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: flds {{[0-9]+}}(%esp)			; X86-NEXT: flds {{[0-9]+}}(%esp)
	; X86-NEXT: fsqrt			; X86-NEXT: fsqrt
	; X86-NEXT: fld1			; X86-NEXT: fld1
	; X86-NEXT: fdivp %st(1)			; X86-NEXT: fdivp %st(1)
	; X86-NEXT: retl			; X86-NEXT: retl
	%y = call fast float @llvm.sqrt.f32(float %x)			%y = call fast float @llvm.sqrt.f32(float %x)
	%z = fdiv fast float 1.0, %y			%z = fdiv fast float 1.0, %y
	ret float %z			ret float %z
	}			}

	declare float @llvm.fmuladd.f32(float %a, float %b, float %c);			declare float @llvm.fmuladd.f32(float %a, float %b, float %c);

	define float @fast_fmuladd_opts(float %a , float %b , float %c) {			define float @fast_fmuladd_opts(float %a , float %b , float %c) {
	; X64-LABEL: fast_fmuladd_opts:			; X64-LABEL: fast_fmuladd_opts:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movaps %xmm0, %xmm1			; X64-NEXT: mulss {{.*}}(%rip), %xmm0
	; X64-NEXT: addss %xmm0, %xmm1
	; X64-NEXT: addss %xmm0, %xmm1
	; X64-NEXT: movaps %xmm1, %xmm0
	; X64-NEXT: retq			; X64-NEXT: retq
	;			;
	; X86-LABEL: fast_fmuladd_opts:			; X86-LABEL: fast_fmuladd_opts:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: flds {{[0-9]+}}(%esp)			; X86-NEXT: flds {{[0-9]+}}(%esp)
	; X86-NEXT: fld %st(0)			; X86-NEXT: fmuls {{.*}}
	; X86-NEXT: fadd %st(1)
	; X86-NEXT: faddp %st(1)
	; X86-NEXT: retl			; X86-NEXT: retl
	%res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)			%res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)
	ret float %res			ret float %res
	}			}

	; The multiply is strict.			; The multiply is strict.

	@mul1 = common global double 0.000000e+00, align 4			@mul1 = common global double 0.000000e+00, align 4

	define double @not_so_fast_mul_add(double %x) {			define double @not_so_fast_mul_add(double %x) {
	; X64-LABEL: not_so_fast_mul_add:			; X64-LABEL: not_so_fast_mul_add:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero			; X64-NEXT: movsd {{.*}}(%rip), %xmm1
	; X64-NEXT: mulsd %xmm0, %xmm1			; X64-NEXT: mulsd %xmm0, %xmm1
	; X64-NEXT: addsd %xmm1, %xmm0			; X64-NEXT: mulsd {{.*}}(%rip), %xmm0
	; X64-NEXT: movsd %xmm1, {{.*}}(%rip)			; X64-NEXT: movsd %xmm1, {{.*}}(%rip)
	; X64-NEXT: retq			; X64-NEXT: retq
	;			;
	; X86-LABEL: not_so_fast_mul_add:			; X86-LABEL: not_so_fast_mul_add:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: fldl {{[0-9]+}}(%esp)			; X86-NEXT: fldl {{[0-9]+}}(%esp)
	; X86-NEXT: fld %st(0)			; X86-NEXT: fld %st(0)
	; X86-NEXT: fmull {{\.LCPI.*}}			; X86-NEXT: fmull {{\.LCPI.*}}
	; X86-NEXT: fadd %st(0), %st(1)			; X86-NEXT: fxch %st(1)
				; X86-NEXT: fmull {{\.LCPI.*}}
				; X86-NEXT: fxch %st(1)
	; X86-NEXT: fstpl mul1			; X86-NEXT: fstpl mul1
	; X86-NEXT: retl			; X86-NEXT: retl
	%m = fmul double %x, 4.2			%m = fmul double %x, 4.2
	%a = fadd fast double %m, %x			%a = fadd fast double %m, %x
	store double %m, double* @mul1, align 4			store double %m, double* @mul1, align 4
	ret double %a			ret double %a
	}			}

	; The sqrt is strict.			; The sqrt is strict.

	@sqrt1 = common global float 0.000000e+00, align 4			@sqrt1 = common global float 0.000000e+00, align 4

	define float @not_so_fast_recip_sqrt(float %x) {			define float @not_so_fast_recip_sqrt(float %x) {
	; X64-LABEL: not_so_fast_recip_sqrt:			; X64-LABEL: not_so_fast_recip_sqrt:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: sqrtss %xmm0, %xmm1			; X64-NEXT: rsqrtss %xmm0, %xmm1
	; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; X64-NEXT: sqrtss %xmm0, %xmm2
	; X64-NEXT: divss %xmm1, %xmm0			; X64-NEXT: mulss %xmm1, %xmm0
	; X64-NEXT: movss %xmm1, {{.*}}(%rip)			; X64-NEXT: mulss %xmm1, %xmm0
				; X64-NEXT: addss {{.*}}(%rip), %xmm0
				; X64-NEXT: mulss {{.*}}(%rip), %xmm1
				; X64-NEXT: mulss %xmm1, %xmm0
				; X64-NEXT: movss %xmm2, {{.*}}(%rip)
	; X64-NEXT: retq			; X64-NEXT: retq
	;			;
	; X86-LABEL: not_so_fast_recip_sqrt:			; X86-LABEL: not_so_fast_recip_sqrt:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: flds {{[0-9]+}}(%esp)			; X86-NEXT: flds {{[0-9]+}}(%esp)
	; X86-NEXT: fsqrt			; X86-NEXT: fsqrt
	; X86-NEXT: fld1			; X86-NEXT: fld1
	; X86-NEXT: fdiv %st(1)			; X86-NEXT: fdiv %st(1)
	Show All 10 Lines

test/CodeGen/X86/fp-fast.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s \| FileCheck %s
	; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s \| FileCheck %s			; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s \| FileCheck %s

	define float @test1(float %a) {			define float @test1(float %a) {
	; CHECK-LABEL: test1:			; CHECK-LABEL: test1:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0			; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fadd float %a, %a			%t1 = fadd fast float %a, %a
	%r = fadd float %t1, %t1			%r = fadd fast float %t1, %t1
	ret float %r			ret float %r
	}			}

	define float @test2(float %a) {			define float @test2(float %a) {
	; CHECK-LABEL: test2:			; CHECK-LABEL: test2:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0			; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fmul float 4.0, %a			%t1 = fmul fast float 4.0, %a
	%t2 = fadd float %a, %a			%t2 = fadd fast float %a, %a
	%r = fadd float %t1, %t2			%r = fadd fast float %t1, %t2
	ret float %r			ret float %r
	}			}

	define float @test3(float %a) {			define float @test3(float %a) {
	; CHECK-LABEL: test3:			; CHECK-LABEL: test3:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0			; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fmul float %a, 4.0			%t1 = fmul fast float %a, 4.0
	%t2 = fadd float %a, %a			%t2 = fadd fast float %a, %a
	%r = fadd float %t1, %t2			%r = fadd fast float %t1, %t2
	ret float %r			ret float %r
	}			}

	define float @test4(float %a) {			define float @test4(float %a) {
	; CHECK-LABEL: test4:			; CHECK-LABEL: test4:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0			; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fadd float %a, %a			%t1 = fadd fast float %a, %a
	%t2 = fmul float 4.0, %a			%t2 = fmul fast float 4.0, %a
	%r = fadd float %t1, %t2			%r = fadd fast float %t1, %t2
	ret float %r			ret float %r
	}			}

	define float @test5(float %a) {			define float @test5(float %a) {
	; CHECK-LABEL: test5:			; CHECK-LABEL: test5:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0			; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fadd float %a, %a			%t1 = fadd fast float %a, %a
	%t2 = fmul float %a, 4.0			%t2 = fmul fast float %a, 4.0
	%r = fadd float %t1, %t2			%r = fadd fast float %t1, %t2
	ret float %r			ret float %r
	}			}

	define float @test6(float %a) {			define float @test6(float %a) {
	; CHECK-LABEL: test6:			; CHECK-LABEL: test6:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fmul float 2.0, %a			%t1 = fmul fast float 2.0, %a
	%t2 = fadd float %a, %a			%t2 = fadd fast float %a, %a
	%r = fsub float %t1, %t2			%r = fsub fast float %t1, %t2
	ret float %r			ret float %r
	}			}

	define float @test7(float %a) {			define float @test7(float %a) {
	; CHECK-LABEL: test7:			; CHECK-LABEL: test7:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fmul float %a, 2.0			%t1 = fmul fast float %a, 2.0
	%t2 = fadd float %a, %a			%t2 = fadd fast float %a, %a
	%r = fsub float %t1, %t2			%r = fsub fast float %t1, %t2
	ret float %r			ret float %r
	}			}

	define float @test8(float %a) {			define float @test8(float %a) {
	; CHECK-LABEL: test8:			; CHECK-LABEL: test8:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fmul float %a, 0.0			%t1 = fmul fast float %a, 0.0
	%t2 = fadd float %a, %t1			%t2 = fadd fast float %a, %t1
	ret float %t2			ret float %t2
	}			}

	define float @test9(float %a) {			define float @test9(float %a) {
	; CHECK-LABEL: test9:			; CHECK-LABEL: test9:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fmul float 0.0, %a			%t1 = fmul fast float 0.0, %a
	%t2 = fadd float %t1, %a			%t2 = fadd fast float %t1, %a
	ret float %t2			ret float %t2
	}			}

	define float @test10(float %a) {			define float @test10(float %a) {
	; CHECK-LABEL: test10:			; CHECK-LABEL: test10:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fsub float -0.0, %a			%t1 = fsub fast float -0.0, %a
	%t2 = fadd float %a, %t1			%t2 = fadd fast float %a, %t1
	ret float %t2			ret float %t2
	}			}

	define float @test11(float %a) {			define float @test11(float %a) {
	; CHECK-LABEL: test11:			; CHECK-LABEL: test11:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t1 = fsub float -0.0, %a			%t1 = fsub fast float -0.0, %a
	%t2 = fadd float %a, %t1			%t2 = fadd fast float %a, %t1
	ret float %t2			ret float %t2
	}			}

test/CodeGen/X86/fp-fold.ll

	Show All 11 Lines
	; UNSAFE-LABEL: fadd_zero:			; UNSAFE-LABEL: fadd_zero:
	; UNSAFE: # %bb.0:			; UNSAFE: # %bb.0:
	; UNSAFE-NEXT: retq			; UNSAFE-NEXT: retq
	%r = fadd float %x, 0.0			%r = fadd float %x, 0.0
	ret float %r			ret float %r
	}			}

	define float @fadd_negzero(float %x) {			define float @fadd_negzero(float %x) {
	; STRICT-LABEL: fadd_negzero:			; ANY-LABEL: fadd_negzero:
	; STRICT: # %bb.0:			; ANY: # %bb.0:
	; STRICT-NEXT: addss {{.*}}(%rip), %xmm0			; ANY-NEXT: retq
	; STRICT-NEXT: retq
	;
	; UNSAFE-LABEL: fadd_negzero:
	; UNSAFE: # %bb.0:
	; UNSAFE-NEXT: retq
	%r = fadd float %x, -0.0			%r = fadd float %x, -0.0
	ret float %r			ret float %r
	}			}

	define float @fadd_zero_nsz(float %x) {			define float @fadd_zero_nsz(float %x) {
	; ANY-LABEL: fadd_zero_nsz:			; ANY-LABEL: fadd_zero_nsz:
	; ANY: # %bb.0:			; ANY: # %bb.0:
	; ANY-NEXT: retq			; ANY-NEXT: retq
	%r = fadd nsz float %x, 0.0			%r = fadd nsz float %x, 0.0
	ret float %r			ret float %r
	}			}

	define float @fadd_negzero_nsz(float %x) {			define float @fadd_negzero_nsz(float %x) {
	; ANY-LABEL: fadd_negzero_nsz:			; ANY-LABEL: fadd_negzero_nsz:
	; ANY: # %bb.0:			; ANY: # %bb.0:
	; ANY-NEXT: retq			; ANY-NEXT: retq
	%r = fadd nsz float %x, -0.0			%r = fadd nsz float %x, -0.0
	ret float %r			ret float %r
	}			}

	define float @fsub_zero(float %x) {			define float @fsub_zero(float %x) {
	; STRICT-LABEL: fsub_zero:			; ANY-LABEL: fsub_zero:
	; STRICT: # %bb.0:			; ANY: # %bb.0:
	; STRICT-NEXT: addss {{.*}}(%rip), %xmm0			; ANY-NEXT: retq
	; STRICT-NEXT: retq
	;
	; UNSAFE-LABEL: fsub_zero:
	; UNSAFE: # %bb.0:
	; UNSAFE-NEXT: retq
	%r = fsub float %x, 0.0			%r = fsub float %x, 0.0
	ret float %r			ret float %r
	}			}

	define float @fsub_negzero(float %x) {			define float @fsub_negzero(float %x) {
	; STRICT-LABEL: fsub_negzero:			; STRICT-LABEL: fsub_negzero:
	; STRICT: # %bb.0:			; STRICT: # %bb.0:
	; STRICT-NEXT: xorps %xmm1, %xmm1			; STRICT-NEXT: xorps %xmm1, %xmm1
	Show All 20 Lines
	; ANY: # %bb.0:			; ANY: # %bb.0:
	; ANY-NEXT: retq			; ANY-NEXT: retq
	%r = fsub nsz float %x, -0.0			%r = fsub nsz float %x, -0.0
	ret float %r			ret float %r
	}			}

	; TODO: handle x*0 for fast flags the same as unsafe			; TODO: handle x*0 for fast flags the same as unsafe
	define float @fmul_zero(float %x) {			define float @fmul_zero(float %x) {
	; STRICT-LABEL: fmul_zero:			; ANY-LABEL: fmul_zero:
	; STRICT: # %bb.0:			; ANY: # %bb.0:
	; STRICT-NEXT: xorps %xmm1, %xmm1			; ANY-NEXT: xorps %xmm0, %xmm0
	; STRICT-NEXT: mulss %xmm1, %xmm0			; ANY-NEXT: retq
	; STRICT-NEXT: retq
	;
	; UNSAFE-LABEL: fmul_zero:
	; UNSAFE: # %bb.0:
	; UNSAFE-NEXT: xorps %xmm0, %xmm0
	; UNSAFE-NEXT: retq
	%r = fmul nnan nsz float %x, 0.0			%r = fmul nnan nsz float %x, 0.0
	ret float %r			ret float %r
	}			}

	define float @fmul_one(float %x) {			define float @fmul_one(float %x) {
	; ANY-LABEL: fmul_one:			; ANY-LABEL: fmul_one:
	; ANY: # %bb.0:			; ANY: # %bb.0:
	; ANY-NEXT: retq			; ANY-NEXT: retq
	%r = fmul float %x, 1.0			%r = fmul float %x, 1.0
	ret float %r			ret float %r
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support model
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 148473

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

test/CodeGen/AArch64/fdiv-combine.ll

test/CodeGen/AMDGPU/fadd.ll

test/CodeGen/AMDGPU/fdiv.f16.ll

test/CodeGen/PowerPC/fdiv-combine.ll

test/CodeGen/PowerPC/fmf-math.ll

test/CodeGen/X86/change-ir-fp-math.ll

test/CodeGen/X86/fadd-combines.ll

test/CodeGen/X86/fdiv-combine.ll

test/CodeGen/X86/fdiv.ll

test/CodeGen/X86/fmf-flags.ll

test/CodeGen/X86/fp-fast.ll

test/CodeGen/X86/fp-fold.ll

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support modelAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 148473

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

test/CodeGen/AArch64/fdiv-combine.ll

test/CodeGen/AMDGPU/fadd.ll

test/CodeGen/AMDGPU/fdiv.f16.ll

test/CodeGen/PowerPC/fdiv-combine.ll

test/CodeGen/PowerPC/fmf-math.ll

test/CodeGen/X86/change-ir-fp-math.ll

test/CodeGen/X86/fadd-combines.ll

test/CodeGen/X86/fdiv-combine.ll

test/CodeGen/X86/fdiv.ll

test/CodeGen/X86/fmf-flags.ll

test/CodeGen/X86/fp-fast.ll

test/CodeGen/X86/fp-fold.ll

Utilize new SDNode flag functionality to expand current support model
AbandonedPublic