This is an archive of the discontinued LLVM Phabricator instance.

Fix interference caused by fmul 2, x -> fadd x, x combine
ClosedPublic

Authored by arsenm on Jul 25 2014, 3:56 PM.

Download Raw Diff

Details

Reviewers

Summary

This solves 2 variants of this problem. First, change the order things are tried so that fmul (fmul x, c1) c2 -> fmul x, (fmul c1, c2) before fadd x, x.

Also add a variant of the fmul constant combine that understands fadd x, x as a multiply by 2. This is necessary because a multiply by 2 that exists originally will be transformed into the fadd by one of the early runs of DAG combiner, and not folded with new fmuls inserted during lowering.

Diff Detail

Event Timeline

arsenm updated this revision to Diff 11901.Jul 25 2014, 3:56 PM

arsenm retitled this revision from to Fix interference caused by fmul 2, x -> fadd x, x combine .

arsenm updated this object.

arsenm edited the test plan for this revision. (Show Details)

arsenm added a subscriber: Unknown Object (MLST).

Add more tests I forgot to attach

Ping

hfinkel added a subscriber: hfinkel.Sep 2 2014, 9:53 AM

hfinkel added inline comments.

lib/CodeGen/SelectionDAG/DAGCombiner.cpp
691	BV can only truncate integer operands, not FP ones. If you want to check that the types agree, please make it an assert.
6863	As a general note, it looks like we don't currently preserve FastMathFlags at the SDAG level. We've fixed that for NSW/NUW, and we should probably fix that for FP ops too (obviously this is a separate issue from what you're addressing here).
6875	Okay, but what happens next? Wouldn't we get (fmul x, (fadd c, c))?

Thanks Hal!
Committed revision 216913.

Forget my last comment... Posted in the wrong code review :-(

arsenm added inline comments.Sep 2 2014, 11:28 AM

lib/CodeGen/SelectionDAG/DAGCombiner.cpp
691	This was already added in another patch which also used this. I can fix this separately
6875	The fmul 2.0, c will be constant folded into 2c which is the goal

LGTM.

lib/CodeGen/SelectionDAG/DAGCombiner.cpp
691	Okay, thanks! (please do).
6875	Okay, makes sense. ;)

This revision is now accepted and ready to land.Sep 2 2014, 11:31 AM

r216932 and 216928

Revision Contents

Path

Size

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

59 lines

test/

CodeGen/

Mips/

msa/

arithmetic_float.ll

10 lines

R600/

fmul.ll

25 lines

llvm.sin.ll

111 lines

X86/

fmul-combines.ll

99 lines

Diff 11903

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

	Show First 20 Lines • Show All 672 Lines • ▼ Show 20 Lines
	if (CN && UndefElements.none() &&			if (CN && UndefElements.none() &&
	CN->getValueType(0) == N.getValueType().getScalarType())			CN->getValueType(0) == N.getValueType().getScalarType())
	return CN;			return CN;
	}			}

	return nullptr;			return nullptr;
	}			}

				// \brief Returns the SDNode if it is a constant splat BuildVector or constant
				// float.
				static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
				if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
				return CN;

				if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
				BitVector UndefElements;
				ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);

				// BuildVectors can truncate their operands. Ignore that case here.
				hfinkelUnsubmitted Not Done Reply Inline Actions BV can only truncate integer operands, not FP ones. If you want to check that the types agree, please make it an assert. hfinkel: BV can only truncate integer operands, not FP ones. If you want to check that the types agree…
				arsenmAuthorUnsubmitted Not Done Reply Inline Actions This was already added in another patch which also used this. I can fix this separately arsenm: This was already added in another patch which also used this. I can fix this separately
				hfinkelUnsubmitted Not Done Reply Inline Actions Okay, thanks! (please do). hfinkel: Okay, thanks! (please do).
				// FIXME: We blindly ignore splats which include undef which is overly
				// pessimistic.
				if (CN && UndefElements.none() &&
				CN->getValueType(0) == N.getValueType().getScalarType())
				return CN;
				}

				return nullptr;
				}

	SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,			SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
	SDValue N0, SDValue N1) {			SDValue N0, SDValue N1) {
	EVT VT = N0.getValueType();			EVT VT = N0.getValueType();
	if (N0.getOpcode() == Opc) {			if (N0.getOpcode() == Opc) {
	if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {			if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
	if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {			if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
	// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))			// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
	SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);			SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
	▲ Show 20 Lines • Show All 1,982 Lines • ▼ Show 20 Lines
	}			}

	return SDValue();			return SDValue();
	}			}

	SDValue DAGCombiner::visitFMUL(SDNode *N) {			SDValue DAGCombiner::visitFMUL(SDNode *N) {
	SDValue N0 = N->getOperand(0);			SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);			SDValue N1 = N->getOperand(1);
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);			ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);			ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
	EVT VT = N->getValueType(0);			EVT VT = N->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();			const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// fold vector ops			// fold vector ops
	if (VT.isVector()) {			if (VT.isVector()) {
	SDValue FoldedVOp = SimplifyVBinOp(N);			SDValue FoldedVOp = SimplifyVBinOp(N);
	if (FoldedVOp.getNode()) return FoldedVOp;			if (FoldedVOp.getNode()) return FoldedVOp;
	}			}

	// fold (fmul c1, c2) -> c1*c2			// fold (fmul c1, c2) -> c1*c2
	if (N0CFP && N1CFP)			if (N0CFP && N1CFP)
	return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);			return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
	// canonicalize constant to RHS			// canonicalize constant to RHS
	if (N0CFP && !N1CFP)			if (N0CFP && !N1CFP)
	return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);			return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
	// fold (fmul A, 0) -> 0			// fold (fmul A, 0) -> 0
	if (DAG.getTarget().Options.UnsafeFPMath &&			if (DAG.getTarget().Options.UnsafeFPMath &&
	N1CFP && N1CFP->getValueAPF().isZero())			N1CFP && N1CFP->getValueAPF().isZero())
	return N1;			return N1;
	// fold (fmul A, 0) -> 0, vector edition.
	if (DAG.getTarget().Options.UnsafeFPMath &&
	ISD::isBuildVectorAllZeros(N1.getNode()))
	return N1;
	// fold (fmul A, 1.0) -> A			// fold (fmul A, 1.0) -> A
	if (N1CFP && N1CFP->isExactlyValue(1.0))			if (N1CFP && N1CFP->isExactlyValue(1.0))
	return N0;			return N0;

				if (DAG.getTarget().Options.UnsafeFPMath) {
				hfinkelUnsubmitted Not Done Reply Inline Actions As a general note, it looks like we don't currently preserve FastMathFlags at the SDAG level. We've fixed that for NSW/NUW, and we should probably fix that for FP ops too (obviously this is a separate issue from what you're addressing here). hfinkel: As a general note, it looks like we don't currently preserve FastMathFlags at the SDAG level.
				// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
				if (N1CFP && N0.getOpcode() == ISD::FMUL &&
				N0.getNode()->hasOneUse() && isConstOrConstSplatFP(N0.getOperand(1))) {
				SDLoc SL(N);
				SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(1), N1);
				return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
				}

				// If allowed, fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
				// Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
				// during an early run of DAGCombiner can prevent folding with fmuls
				// inserted during lowering.
				hfinkelUnsubmitted Not Done Reply Inline Actions Okay, but what happens next? Wouldn't we get (fmul x, (fadd c, c))? hfinkel: Okay, but what happens next? Wouldn't we get (fmul x, (fadd c, c))?
				arsenmAuthorUnsubmitted Not Done Reply Inline Actions The fmul 2.0, c will be constant folded into 2c which is the goal arsenm: The fmul 2.0, c will be constant folded into 2c which is the goal
				hfinkelUnsubmitted Not Done Reply Inline Actions Okay, makes sense. ;) hfinkel: Okay, makes sense. ;)
				if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
				SDLoc SL(N);
				const SDValue Two = DAG.getConstantFP(2.0, VT);
				SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
				return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
				}
				}

	// fold (fmul X, 2.0) -> (fadd X, X)			// fold (fmul X, 2.0) -> (fadd X, X)
	if (N1CFP && N1CFP->isExactlyValue(+2.0))			if (N1CFP && N1CFP->isExactlyValue(+2.0))
	return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);			return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
	// fold (fmul X, -1.0) -> (fneg X)			// fold (fmul X, -1.0) -> (fneg X)
	if (N1CFP && N1CFP->isExactlyValue(-1.0))			if (N1CFP && N1CFP->isExactlyValue(-1.0))
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))			if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
	return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);			return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);

	// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)			// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
	if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,			if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
	&DAG.getTarget().Options)) {			&DAG.getTarget().Options)) {
	if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,			if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
	&DAG.getTarget().Options)) {			&DAG.getTarget().Options)) {
	// Both can be negated for free, check to see if at least one is cheaper			// Both can be negated for free, check to see if at least one is cheaper
	// negated.			// negated.
	if (LHSNeg == 2 \|\| RHSNeg == 2)			if (LHSNeg == 2 \|\| RHSNeg == 2)
	return DAG.getNode(ISD::FMUL, SDLoc(N), VT,			return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
	GetNegatedExpression(N0, DAG, LegalOperations),			GetNegatedExpression(N0, DAG, LegalOperations),
	GetNegatedExpression(N1, DAG, LegalOperations));			GetNegatedExpression(N1, DAG, LegalOperations));
	}			}
	}			}

	// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
	if (DAG.getTarget().Options.UnsafeFPMath &&
	N1CFP && N0.getOpcode() == ISD::FMUL &&
	N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
	return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
	DAG.getNode(ISD::FMUL, SDLoc(N), VT,
	N0.getOperand(1), N1));

	return SDValue();			return SDValue();
	}			}

	SDValue DAGCombiner::visitFMA(SDNode *N) {			SDValue DAGCombiner::visitFMA(SDNode *N) {
	SDValue N0 = N->getOperand(0);			SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);			SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);			SDValue N2 = N->getOperand(2);
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);			ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
	▲ Show 20 Lines • Show All 991 Lines • Show Last 20 Lines

test/CodeGen/Mips/msa/arithmetic_float.ll

	Show First 20 Lines • Show All 270 Lines • ▼ Show 20 Lines

	define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {			define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
	; CHECK: fexp2_v4f32_2:			; CHECK: fexp2_v4f32_2:

	%1 = load <4 x float>* %a			%1 = load <4 x float>* %a
	; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)			; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
	%2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)			%2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
	%3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2			%3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
	; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384			; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
	; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]]			; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
	; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]			; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
	store <4 x float> %3, <4 x float>* %c			store <4 x float> %3, <4 x float>* %c
	; CHECK-DAG: st.w [[R5]], 0($4)			; CHECK-DAG: st.w [[R5]], 0($4)

	ret void			ret void
	; CHECK: .size fexp2_v4f32_2			; CHECK: .size fexp2_v4f32_2
	}			}

	define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {			define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
	; CHECK: .8byte 4611686018427387904
	; CHECK-NEXT: .8byte 4611686018427387904
	; CHECK: fexp2_v2f64_2:			; CHECK: fexp2_v2f64_2:

	%1 = load <2 x double>* %a			%1 = load <2 x double>* %a
	; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)			; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
	%2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)			%2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
	%3 = fmul <2 x double> <double 2.0, double 2.0>, %2			%3 = fmul <2 x double> <double 2.0, double 2.0>, %2
	; CHECK-DAG: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($			; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1
	; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[G_PTR]])			; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]]
	; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]			; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
	store <2 x double> %3, <2 x double>* %c			store <2 x double> %3, <2 x double>* %c
	; CHECK-DAG: st.d [[R4]], 0($4)			; CHECK-DAG: st.d [[R4]], 0($4)

	ret void			ret void
	; CHECK: .size fexp2_v2f64_2			; CHECK: .size fexp2_v2f64_2
	}			}

	▲ Show 20 Lines • Show All 150 Lines • Show Last 20 Lines

test/CodeGen/R600/fmul.ll

	Show First 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
	define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {			define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
	%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1			%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
	%a = load <4 x float> addrspace(1) * %in			%a = load <4 x float> addrspace(1) * %in
	%b = load <4 x float> addrspace(1) * %b_ptr			%b = load <4 x float> addrspace(1) * %b_ptr
	%result = fmul <4 x float> %a, %b			%result = fmul <4 x float> %a, %b
	store <4 x float> %result, <4 x float> addrspace(1)* %out			store <4 x float> %result, <4 x float> addrspace(1)* %out
	ret void			ret void
	}			}

				; FUNC-LABEL: @test_mul_2_k
				; SI: V_MUL_F32
				; SI-NOT: V_MUL_F32
				; SI: S_ENDPGM
				define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
				%y = fmul float %x, 2.0
				%z = fmul float %y, 3.0
				store float %z, float addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: @test_mul_2_k_inv
				; SI: V_MUL_F32
				; SI-NOT: V_MUL_F32
				; SI-NOT: V_MAD_F32
				; SI: S_ENDPGM
				define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
				%y = fmul float %x, 3.0
				%z = fmul float %y, 2.0
				store float %z, float addrspace(1)* %out
				ret void
				}

				attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }

test/CodeGen/R600/llvm.sin.ll

	;RUN: llc -march=r600 -mcpu=redwood < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s			; RUN: llc -march=r600 -mcpu=redwood < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s
	;RUN: llc -march=r600 -mcpu=SI < %s \| FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s			; RUN: llc -march=r600 -mcpu=SI < %s \| FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
	;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s \| FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s			; RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s \| FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s

	;FUNC-LABEL: test			; FUNC-LABEL: sin_f32
	;EG: MULADD_IEEE *			; EG: MULADD_IEEE *
	;EG: FRACT *			; EG: FRACT *
	;EG: ADD *			; EG: ADD *
	;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}			; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
	;EG-NOT: SIN			; EG-NOT: SIN
	;SI: V_MUL_F32			; SI: V_MUL_F32
	;SI: V_FRACT_F32			; SI: V_FRACT_F32
	;SI: V_SIN_F32			; SI: V_SIN_F32
	;SI-NOT: V_SIN_F32			; SI-NOT: V_SIN_F32

	define void @test(float addrspace(1)* %out, float %x) #1 {			define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
	%sin = call float @llvm.sin.f32(float %x)			%sin = call float @llvm.sin.f32(float %x)
	store float %sin, float addrspace(1)* %out			store float %sin, float addrspace(1)* %out
	ret void			ret void
	}			}

	;FUNC-LABEL: testf			; FUNC-LABEL: @sin_3x_f32
	;SI-UNSAFE: 4.774			; SI-UNSAFE-NOT: V_ADD_F32
				; SI-UNSAFE: 4.774648e-01
	;SI-UNSAFE: V_MUL_F32			; SI-UNSAFE: V_MUL_F32
	;SI-SAFE: V_MUL_F32			; SI-SAFE: V_MUL_F32
	;SI-SAFE: V_MUL_F32			; SI-SAFE: V_MUL_F32
	;SI: V_FRACT_F32			; SI: V_FRACT_F32
	;SI: V_SIN_F32			; SI: V_SIN_F32
	;SI-NOT: V_SIN_F32			; SI-NOT: V_SIN_F32
				define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
	define void @testf(float addrspace(1)* %out, float %x) #1 {
	%y = fmul float 3.0, %x			%y = fmul float 3.0, %x
	%sin = call float @llvm.sin.f32(float %y)			%sin = call float @llvm.sin.f32(float %y)
	store float %sin, float addrspace(1)* %out			store float %sin, float addrspace(1)* %out
	ret void			ret void
	}			}

	;FUNC-LABEL: testv			; FUNC-LABEL: @sin_2x_f32
				; SI-UNSAFE-NOT: V_ADD_F32
				; SI-UNSAFE: 3.183099e-01
				; SI-UNSAFE: V_MUL_F32
				; SI-SAFE: V_ADD_F32
				; SI-SAFE: V_MUL_F32
				; SI: V_FRACT_F32
				; SI: V_SIN_F32
				; SI-NOT: V_SIN_F32
				define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
				%y = fmul float 2.0, %x
				%sin = call float @llvm.sin.f32(float %y)
				store float %sin, float addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: @test_2sin_f32
				; SI-UNSAFE: 3.183099e-01
				; SI-UNSAFE: V_MUL_F32
				; SI-SAFE: V_ADD_F32
				; SI-SAFE: V_MUL_F32
				; SI: V_FRACT_F32
				; SI: V_SIN_F32
				; SI-NOT: V_SIN_F32
				define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
				%y = fmul float 2.0, %x
				%sin = call float @llvm.sin.f32(float %y)
				store float %sin, float addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: @sin_v4f32
	;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}			; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
	;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}			; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
	;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}			; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
	;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}			; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
	;EG-NOT: SIN			; EG-NOT: SIN
	;SI: V_SIN_F32			; SI: V_SIN_F32
	;SI: V_SIN_F32			; SI: V_SIN_F32
	;SI: V_SIN_F32			; SI: V_SIN_F32
	;SI: V_SIN_F32			; SI: V_SIN_F32
	;SI-NOT: V_SIN_F32			; SI-NOT: V_SIN_F32

	define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {			define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
	%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)			%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
	store <4 x float> %sin, <4 x float> addrspace(1)* %out			store <4 x float> %sin, <4 x float> addrspace(1)* %out
	ret void			ret void
	}			}

	declare float @llvm.sin.f32(float) readnone			declare float @llvm.sin.f32(float) readnone
	declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone			declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone

	attributes #0 = { "ShaderType"="0" }			attributes #0 = { "ShaderType"="0" }

test/CodeGen/X86/fmul-combines.ll

This file was added.

				; RUN: llc -march=x86-64 < %s \| FileCheck %s

				; CHECK-LABEL: fmul2_f32:
				; CHECK: addss %xmm0, %xmm0
				define float @fmul2_f32(float %x) {
				%y = fmul float %x, 2.0
				ret float %y
				}

				; fmul 2.0, x -> fadd x, x for vectors.

				; CHECK-LABEL: fmul2_v4f32:
				; CHECK: addps %xmm0, %xmm0
				; CHECK-NEXT: retq
				define <4 x float> @fmul2_v4f32(<4 x float> %x) {
				%y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
				ret <4 x float> %y
				}

				; CHECK-LABEL: constant_fold_fmul_v4f32:
				; CHECK: movaps
				; CHECK-NEXT: ret
				define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
				%y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
				ret <4 x float> %y
				}

				; CHECK-LABEL: fmul0_v4f32:
				; CHECK: xorps %xmm0, %xmm0
				; CHECK-NEXT: retq
				define <4 x float> @fmul0_v4f32(<4 x float> %x) #0 {
				%y = fmul <4 x float> %x, <float 0.0, float 0.0, float 0.0, float 0.0>
				ret <4 x float> %y
				}

				; CHECK-LABEL: fmul_c2_c4_v4f32:
				; CHECK-NOT: addps
				; CHECK: mulps
				; CHECK-NOT: mulps
				; CHECK-NEXT: ret
				define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 {
				%y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
				%z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
				ret <4 x float> %z
				}

				; CHECK-LABEL: fmul_c3_c4_v4f32:
				; CHECK-NOT: addps
				; CHECK: mulps
				; CHECK-NOT: mulps
				; CHECK-NEXT: ret
				define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
				%y = fmul <4 x float> %x, <float 3.0, float 3.0, float 3.0, float 3.0>
				%z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
				ret <4 x float> %z
				}

				; CHECK-LABEL: fmul_c2_c4_f32:
				; CHECK-NOT: addss
				; CHECK: mulss
				; CHECK-NOT: mulss
				; CHECK-NEXT: ret
				define float @fmul_c2_c4_f32(float %x) #0 {
				%y = fmul float %x, 2.0
				%z = fmul float %y, 4.0
				ret float %z
				}

				; CHECK-LABEL: fmul_c3_c4_f32:
				; CHECK-NOT: addss
				; CHECK: mulss
				; CHECK-NOT: mulss
				; CHECK-NET: ret
				define float @fmul_c3_c4_f32(float %x) #0 {
				%y = fmul float %x, 3.0
				%z = fmul float %y, 4.0
				ret float %z
				}

				; CHECK-LABEL: fmul_fneg_fneg_f32:
				; CHECK: mulss %xmm1, %xmm0
				; CHECK-NEXT: retq
				define float @fmul_fneg_fneg_f32(float %x, float %y) {
				%x.neg = fsub float -0.0, %x
				%y.neg = fsub float -0.0, %y
				%mul = fmul float %x.neg, %y.neg
				ret float %mul
				}
				; CHECK-LABEL: fmul_fneg_fneg_v4f32:
				; CHECK: mulps %xmm1, %xmm0
				; CHECK-NEXT: retq
				define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) {
				%x.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
				%y.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %y
				%mul = fmul <4 x float> %x.neg, %y.neg
				ret <4 x float> %mul
				}

				attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }