Diff 151402

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 699 Lines • ▼ Show 20 Lines	case ISD::ConstantFP: {

// Don't invert constant FP values after legalization unless the target says		// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.		// the negated constant is legal.
return TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|		return TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);		TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
}		}
case ISD::FADD:		case ISD::FADD:
// FIXME: determine better conditions for this xform.		// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;		if (!Options->UnsafeFPMath) return 0;

		spatelUnsubmitted Not Done Reply Inline Actions clang-format? spatel: clang-format?
// After operation legalization, it might not be legal to create new FSUBs.		// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))		if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
return 0;		return 0;

// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)		// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,		if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))		Options, Depth + 1))
return V;		return V;
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");		default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {		case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();		APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();		V.changeSign();
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());		return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}		}
case ISD::FADD:		case ISD::FADD:
// FIXME: determine better conditions for this xform.		// FIXME: determine better conditions for this xform.
		spatelUnsubmitted Not Done Reply Inline Actions Change FIXME: the FMF version is good now; the global check is wrong. (Might want to comment on the broken global checks in multiple places as an NFC preliminary step.) spatel: Change FIXME: the FMF version is good now; the global check is wrong. (Might want to comment…
assert(Options.UnsafeFPMath);		assert(Options.UnsafeFPMath);

// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)		// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,		if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))		DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),		LegalOperations, Depth+1),
▲ Show 20 Lines • Show All 9,554 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFADD(SDNode *N) {
if ((isFMulNegTwo(N0) && N0.hasOneUse()) \|\|		if ((isFMulNegTwo(N0) && N0.hasOneUse()) \|\|
(isFMulNegTwo(N1) && N1.hasOneUse())) {		(isFMulNegTwo(N1) && N1.hasOneUse())) {
bool N1IsFMul = isFMulNegTwo(N1);		bool N1IsFMul = isFMulNegTwo(N1);
SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);		SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);		SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);		return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
}		}

// FIXME: Auto-upgrade the target/function-level option.		ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
if (Options.NoSignedZerosFPMath \|\| N->getFlags().hasNoSignedZeros()) {		if (N1C && N1C->isZero()) {
		if (N1C->isNegative() \|\| Options.UnsafeFPMath \|\|
		Flags.hasNoSignedZeros()) {
// fold (fadd A, 0) -> A		// fold (fadd A, 0) -> A
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
return N0;		return N0;
}		}
		}

// If 'unsafe math' is enabled, fold lots of things.		// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
// No FP constant should be created after legalization as Instruction
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);		bool AllowNewConst = (Level < AfterLegalizeDAG);
		if (Options.UnsafeFPMath \|\| Flags.hasNoNaNs()) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
}

// If allowed, fold (fadd (fneg x), x) -> 0.0		// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)		if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);		return DAG.getConstantFP(0.0, DL, VT);

// If allowed, fold (fadd x, (fneg x)) -> 0.0		// If allowed, fold (fadd x, (fneg x)) -> 0.0
if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)		if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
return DAG.getConstantFP(0.0, DL, VT);		return DAG.getConstantFP(0.0, DL, VT);
		spatelUnsubmitted Not Done Reply Inline Actions I'd prefer that we not perpetuate UnsafeFPMath any more than necessary, so I'd say something like: FP reassociation can have drastic effects on results. For example, inf * -2.0 + inf -> NaN, but reassociation allows that to become inf * -1.0 -> -inf. spatel: I'd prefer that we not perpetuate UnsafeFPMath any more than necessary, so I'd say something…
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions I am going to remove the comment, if we leave a special message about reassocation, someone can infer it is different from Unsafe and therefore new behavior, when it is not. mcberg2017: I am going to remove the comment, if we leave a special message about reassocation, someone can…
		}

		// If 'unsafe math' or reassoc and nsz, fold lots of things.
		// TODO: break out portions of the transformations below for which Unsafe is
		// considered and which do not require both nsz and reassoc
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions I think I will change "false" to "considered" mcberg2017: I think I will change "false" to "considered"
		if (Options.UnsafeFPMath \|\|
		(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) {
		// No FP constant should be created after legalization as Instruction
		// Selection pass has a hard time dealing with FP constants.
		spatelUnsubmitted Not Done Reply Inline Actions Shouldn't the 'AllowNewConst' check be moved up here rather than repeated with each transform below? Again, I think that particular change is independent of the FMF change, so it should be reviewed/committed as a separate patch (preferably before we make the FMF change, so we're not increasing the chance of hitting the bug). Not sure how/if we can make that difference visible in a regression test with in-tree targets. Remove the existing variable in trunk and see if anything breaks? spatel: Shouldn't the 'AllowNewConst' check be moved up here rather than repeated with each transform…
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions The AllowNewConst change is NFC, is I will track it as a simple check in, then update this review afterwards. mcberg2017: The AllowNewConst change is NFC, is I will track it as a simple check in, then update this…
		mcberg2017AuthorUnsubmitted Not Done Reply Inline Actions I wound up keeping some of the refactoring so it is not quite NFC, the review should appear shortly. The resultant change here in this review will make the code change quite small. mcberg2017: I wound up keeping some of the refactoring so it is not quite NFC, the review should appear…

		spatelUnsubmitted Not Done Reply Inline Actions This seems broken independently of this patch - we just said it's not safe to create new constants, and every transform under here creates a new constant. But we only check 'AllowNewConst' for some of the folds? Also, I updated the 1st transform under here to not check hasOneUse() in rL334608, so you'll need to rebase. spatel: This seems broken independently of this patch - we just said it's not safe to create new…
		// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
		if (N1CFP && N0.getOpcode() == ISD::FADD && AllowNewConst &&
		isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
		SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
		return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
		}

// We can fold chains of FADD's of the same value into multiplications.		// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number		// This transform is not safe in general because we are reducing the number
// of rounding steps.		// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {		if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {		if (N0.getOpcode() == ISD::FMUL) {
bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));		bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));		bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));

// (fadd (fmul x, c), x) -> (fmul x, c+1)		// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {		if (CFP01 && !CFP00 && N0.getOperand(0) == N1 && AllowNewConst) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),		SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);		DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);		return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}		}
		nhaehnleUnsubmitted Not Done Reply Inline Actions This transformation and the similar ones below can change the NaN-ness of the result. For example, consider `x = +inf`, `c = -0.5`. In that case, the LHS is NaN (because of the intermediate `-inf + +inf`), while the RHS is `+inf`. Is `reassoc` supposed to be a strong enough condition to allow this? I don't see a way for a non-NaN result to be transformed into a NaN result, so perhaps this is okay? But this needs to be a conscious decision and it should be mentioned in the comment above that talks about rounding steps. nhaehnle: This transformation and the similar ones below can change the NaN-ness of the result. For…
		spatelUnsubmitted Not Done Reply Inline Actions This is circular reasoning, but LLVM does this fold already in IR as long as we have 'nsz reassoc'. This is also allowed by gcc with the equivalent flags: https://godbolt.org/g/wDh3bM Our 'reassoc' definition currently says: "Allow reassociation transformations for floating-point instructions. This may dramatically change results in floating-point." We could add to that definition if we can make that clearer? Or as you suggest, we can document the potential differences more clearly here by including the INF example. spatel: This is circular reasoning, but LLVM does this fold already in IR as long as we have 'nsz…
		nhaehnleUnsubmitted Not Done Reply Inline Actions Those are good points. It did catch my eye because the documentation of `reassoc` led me down the path of thinking that sure, there's going to be differences based on different rounding. But this particular difference isn't caused by rounding differences. Having slept on it and with your points, I think it's okay to do this transform based on `reassoc` because it doesn't create new NaNs as far as I can tell. If there were similar cases where new NaNs can be created, I think we should tread more carefully. (Of course "mere" rounding differences can create new NaNs, e.g. by turning `inf * tiny number` into `inf * 0`, but those should be expected by floating point practicioners, which makes it okay.) nhaehnle: Those are good points. It did catch my eye because the documentation of `reassoc` led me down…

// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)		// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&		if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && AllowNewConst &&
N1.getOperand(0) == N1.getOperand(1) &&		N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {		N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),		SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);		DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);		return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}		}
}		}

if (N1.getOpcode() == ISD::FMUL) {		if (N1.getOpcode() == ISD::FMUL) {
bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));		bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));		bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));

// (fadd x, (fmul x, c)) -> (fmul x, c+1)		// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {		if (CFP11 && !CFP10 && N1.getOperand(0) == N0 && AllowNewConst ) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),		SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);		DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);		return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}		}

// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)		// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&		if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && AllowNewConst &&
N0.getOperand(0) == N0.getOperand(1) &&		N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {		N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),		SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);		DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);		return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}		}
}		}

Show All 10 Lines	if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {		if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));		bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)		// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&		if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {		N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT,		return DAG.getNode(ISD::FMUL, DL, VT,
N0, DAG.getConstantFP(3.0, DL, VT), Flags);		N0, DAG.getConstantFP(3.0, DL, VT), Flags);
}		}
}

// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)		// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
if (AllowNewConst &&		if (N0.getOpcode() == ISD::FADD &&
N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&		N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&		N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {		N0.getOperand(0) == N1.getOperand(0)) {
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),		return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
DAG.getConstantFP(4.0, DL, VT), Flags);		DAG.getConstantFP(4.0, DL, VT), Flags);
}		}
}		}
		}
} // enable-unsafe-fp-math		} // enable-unsafe-fp-math

// FADD -> FMA combines:		// FADD -> FMA combines:
if (SDValue Fused = visitFADDForFMACombine(N)) {		if (SDValue Fused = visitFADDForFMACombine(N)) {
AddToWorklist(Fused.getNode());		AddToWorklist(Fused.getNode());
return Fused;		return Fused;
}		}
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 7,767 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,436 Lines • ▼ Show 20 Lines	case ISD::UMAX:
assert(N1.getValueType() == N2.getValueType() &&		assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");		N1.getValueType() == VT && "Binary operator types must match!");
break;		break;
case ISD::FADD:		case ISD::FADD:
case ISD::FSUB:		case ISD::FSUB:
case ISD::FMUL:		case ISD::FMUL:
case ISD::FDIV:		case ISD::FDIV:
case ISD::FREM:		case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// x+0 --> x
if (N2CFP && N2CFP->getValueAPF().isZero())
return N1;
} else if (Opcode == ISD::FSUB) {
// x-0 --> x
if (N2CFP && N2CFP->getValueAPF().isZero())
return N1;
} else if (Opcode == ISD::FMUL) {
// x*0 --> 0
if (N2CFP && N2CFP->isZero())
return N2;
// x*1 --> x
if (N2CFP && N2CFP->isExactlyValue(1.0))
return N1;
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");		assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&		assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");		N1.getValueType() == VT && "Binary operator types must match!");
break;		break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.		case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
assert(N1.getValueType() == VT &&		assert(N1.getValueType() == VT &&
N1.getValueType().isFloatingPoint() &&		N1.getValueType().isFloatingPoint() &&
N2.getValueType().isFloatingPoint() &&		N2.getValueType().isFloatingPoint() &&
▲ Show 20 Lines • Show All 4,246 Lines • Show Last 20 Lines

test/CodeGen/AArch64/fadd-combines.ll

Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
call void @use(double %mul)		call void @use(double %mul)
ret double %add1		ret double %add1
}		}

define float @fadd_const_multiuse_fmf(float %x) {		define float @fadd_const_multiuse_fmf(float %x) {
; CHECK-LABEL: fadd_const_multiuse_fmf:		; CHECK-LABEL: fadd_const_multiuse_fmf:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI7_0		; CHECK-NEXT: adrp x8, .LCPI7_0
		; CHECK-NEXT: adrp x9, .LCPI7_1
; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI7_0]		; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: fadd s0, s0, s1		; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI7_1]
; CHECK-NEXT: fmov s1, #17.00000000
; CHECK-NEXT: fadd s1, s0, s1		; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fadd s0, s0, s1		; CHECK-NEXT: fadd s0, s0, s2
		; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%a1 = fadd float %x, 42.0		%a1 = fadd float %x, 42.0
%a2 = fadd nsz reassoc float %a1, 17.0		%a2 = fadd nsz reassoc float %a1, 17.0
%a3 = fadd float %a1, %a2		%a3 = fadd float %a1, %a2
ret float %a3		ret float %a3
}		}

; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0).		; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0).
Show All 24 Lines

test/CodeGen/AMDGPU/fadd.ll

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
%add = fadd <8 x float> %a, %b		%add = fadd <8 x float> %a, %b
store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32		store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
ret void		ret void
}		}

; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:		; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
; SI-NOT: v_add_f32		; SI-NOT: v_add_f32
define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {		define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {
%add = fadd float %a, 0.0		%add = fadd nsz float %a, 0.0
store float %add, float addrspace(1)* %out, align 4		store float %add, float addrspace(1)* %out, align 4
ret void		ret void
}		}

attributes #0 = { nounwind }		attributes #0 = { nounwind }
attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }		attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }

test/CodeGen/X86/fmf-flags.ll

Show All 32 Lines	; X86-NEXT: retl
ret float %z		ret float %z
}		}

declare float @llvm.fmuladd.f32(float %a, float %b, float %c);		declare float @llvm.fmuladd.f32(float %a, float %b, float %c);

define float @fast_fmuladd_opts(float %a , float %b , float %c) {		define float @fast_fmuladd_opts(float %a , float %b , float %c) {
; X64-LABEL: fast_fmuladd_opts:		; X64-LABEL: fast_fmuladd_opts:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movaps %xmm0, %xmm1		; X64-NEXT: mulss {{.*}}(%rip), %xmm0
; X64-NEXT: addss %xmm0, %xmm1
; X64-NEXT: addss %xmm0, %xmm1
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: fast_fmuladd_opts:		; X86-LABEL: fast_fmuladd_opts:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: flds {{[0-9]+}}(%esp)		; X86-NEXT: flds {{[0-9]+}}(%esp)
; X86-NEXT: fld %st(0)		; X86-NEXT: fmuls {{.*}}
; X86-NEXT: fadd %st(1)
; X86-NEXT: faddp %st(1)
; X86-NEXT: retl		; X86-NEXT: retl
%res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)		%res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)
ret float %res		ret float %res
}		}

; The multiply is strict.		; The multiply is strict.

@mul1 = common global double 0.000000e+00, align 4		@mul1 = common global double 0.000000e+00, align 4

define double @not_so_fast_mul_add(double %x) {		define double @not_so_fast_mul_add(double %x) {
; X64-LABEL: not_so_fast_mul_add:		; X64-LABEL: not_so_fast_mul_add:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero		; X64-NEXT: movsd {{.*}}(%rip), %xmm1
; X64-NEXT: mulsd %xmm0, %xmm1		; X64-NEXT: mulsd %xmm0, %xmm1
; X64-NEXT: addsd %xmm1, %xmm0		; X64-NEXT: mulsd {{.*}}(%rip), %xmm0
; X64-NEXT: movsd %xmm1, {{.*}}(%rip)		; X64-NEXT: movsd %xmm1, {{.*}}(%rip)
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: not_so_fast_mul_add:		; X86-LABEL: not_so_fast_mul_add:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: fldl {{[0-9]+}}(%esp)		; X86-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NEXT: fld %st(0)		; X86-NEXT: fld %st(0)
; X86-NEXT: fmull {{\.LCPI.*}}		; X86-NEXT: fmull {{\.LCPI.*}}
; X86-NEXT: fadd %st(0), %st(1)		; X86-NEXT: fxch %st(1)
		; X86-NEXT: fmull {{\.LCPI.*}}
		; X86-NEXT: fxch %st(1)
; X86-NEXT: fstpl mul1		; X86-NEXT: fstpl mul1
; X86-NEXT: retl		; X86-NEXT: retl
%m = fmul double %x, 4.2		%m = fmul double %x, 4.2
%a = fadd fast double %m, %x		%a = fadd fast double %m, %x
store double %m, double* @mul1, align 4		store double %m, double* @mul1, align 4
ret double %a		ret double %a
}		}

Show All 29 Lines

test/CodeGen/X86/fp-fold.ll

	Show All 11 Lines
	; UNSAFE-LABEL: fadd_zero:			; UNSAFE-LABEL: fadd_zero:
	; UNSAFE: # %bb.0:			; UNSAFE: # %bb.0:
	; UNSAFE-NEXT: retq			; UNSAFE-NEXT: retq
	%r = fadd float %x, 0.0			%r = fadd float %x, 0.0
	ret float %r			ret float %r
	}			}

	define float @fadd_negzero(float %x) {			define float @fadd_negzero(float %x) {
	; STRICT-LABEL: fadd_negzero:			; ANY-LABEL: fadd_negzero:
	; STRICT: # %bb.0:			; ANY: # %bb.0:
	; STRICT-NEXT: addss {{.*}}(%rip), %xmm0			; ANY-NEXT: retq
	; STRICT-NEXT: retq
	;
	; UNSAFE-LABEL: fadd_negzero:
	; UNSAFE: # %bb.0:
	; UNSAFE-NEXT: retq
	%r = fadd float %x, -0.0			%r = fadd float %x, -0.0
	ret float %r			ret float %r
	}			}

				define float @fadd_produce_zero(float %x) {
				; ANY-LABEL: fadd_produce_zero:
				; ANY: # %bb.0:
				; ANY-NEXT: xorps %xmm0, %xmm0
				; ANY-NEXT: retq
				%neg = fsub nsz float 0.0, %x
				%r = fadd nnan float %neg, %x
				ret float %r
				}

				define float @fadd_reassociate(float %x) {
				; ANY-LABEL: fadd_reassociate:
				; ANY: # %bb.0:
				; ANY-NEXT: addss {{.*}}(%rip), %xmm0
				; ANY-NEXT: retq
				%sum = fadd float %x, 8.0
				%r = fadd reassoc nsz float %sum, 12.0
				ret float %r
				}

	define float @fadd_negzero_nsz(float %x) {			define float @fadd_negzero_nsz(float %x) {
	; ANY-LABEL: fadd_negzero_nsz:			; ANY-LABEL: fadd_negzero_nsz:
	; ANY: # %bb.0:			; ANY: # %bb.0:
	; ANY-NEXT: retq			; ANY-NEXT: retq
	%r = fadd nsz float %x, -0.0			%r = fadd nsz float %x, -0.0
	ret float %r			ret float %r
	}			}

	▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support for fadd
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 151402

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

test/CodeGen/AArch64/fadd-combines.ll

test/CodeGen/AMDGPU/fadd.ll

test/CodeGen/X86/fmf-flags.ll

test/CodeGen/X86/fp-fold.ll

This is an archive of the discontinued LLVM Phabricator instance.

Utilize new SDNode flag functionality to expand current support for faddClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 151402

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

test/CodeGen/AArch64/fadd-combines.ll

test/CodeGen/AMDGPU/fadd.ll

test/CodeGen/X86/fmf-flags.ll

test/CodeGen/X86/fp-fold.ll

Utilize new SDNode flag functionality to expand current support for fadd
ClosedPublic