This is an archive of the discontinued LLVM Phabricator instance.

I think we can fold (fmul X, -1.0) -> (fsub -0.0, X) unconditionally; at least, Alive2 is happy with it. If we can do that, we can just let the existing fsub code figure out the rest. Does that sound like a plan?

In D109446#2990532, @efriedma wrote:

I think we can fold (fmul X, -1.0) -> (fsub -0.0, X) unconditionally; at least, Alive2 is happy with it. If we can do that, we can just let the existing fsub code figure out the rest. Does that sound like a plan?

That works (although is a code size regression for AMDGPU). We have other combines like this that we have to undo later for this reason anyway

Use fsub instead

Herald added a subscriber: nemanjai. · View Herald TranscriptSep 13 2021, 9:31 AM

Harbormaster completed remote builds in B123693: Diff 372280.Sep 13 2021, 10:56 AM

LGTM

This revision is now accepted and ready to land.Sep 13 2021, 11:16 AM

54d755a034362814bd7a0b90f172cbba39729cf4

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

11 lines

test/

CodeGen/

AArch64/

arm64-fmadd.ll

6 lines

fp16_intrinsic_scalar_3op.ll

6 lines

AMDGPU/

fneg-combines.ll

45 lines

ARM/

fnegs.ll

2 lines

Hexagon/

opt-fneg.ll

16 lines

PowerPC/

combine-fneg.ll

8 lines

Diff 372280

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 13,998 Lines • ▼ Show 20 Lines	if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);		return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}		}
}		}

// fold (fmul X, 2.0) -> (fadd X, X)		// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))		if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);		return DAG.getNode(ISD::FADD, DL, VT, N0, N0);

// fold (fmul X, -1.0) -> (fneg X)		// fold (fmul X, -1.0) -> (fsub -0.0, X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))		if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))		if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FSUB, VT)) {
return DAG.getNode(ISD::FNEG, DL, VT, N0);		return DAG.getNode(ISD::FSUB, DL, VT,
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - return DAG.getNode(ISD::FSUB, DL, VT, - DAG.getConstantFP(-0.0, DL, VT), N0, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, DAG.getConstantFP(-0.0, DL, VT), N0, + Flags); Lint: Pre-merge checks: clang-format: please reformat the code ``` - return DAG.getNode(ISD::FSUB, DL, VT…
		DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
		}
		}

// -N0 * -N1 --> N0 * N1		// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =		TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;		TargetLowering::NegatibleCost::Expensive;
TargetLowering::NegatibleCost CostN1 =		TargetLowering::NegatibleCost CostN1 =
TargetLowering::NegatibleCost::Expensive;		TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =		SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);		TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
▲ Show 20 Lines • Show All 9,596 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/arm64-fmadd.ll

	Show First 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	}			}

	define double @fms64(double %a, double %b, double %c) nounwind readnone ssp {			define double @fms64(double %a, double %b, double %c) nounwind readnone ssp {
	; CHECK-LABEL: fms64:			; CHECK-LABEL: fms64:
	; CHECK: // %bb.0: // %entry			; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: fmsub d0, d0, d1, d2			; CHECK-NEXT: fmsub d0, d0, d1, d2
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%mul = fmul double %b, -1.000000e+00			%mul = fneg double %b
	%0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)			%0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)
	ret double %0			ret double %0
	}			}

	define double @fms64_com(double %a, double %b, double %c) nounwind readnone ssp {			define double @fms64_com(double %a, double %b, double %c) nounwind readnone ssp {
	; CHECK-LABEL: fms64_com:			; CHECK-LABEL: fms64_com:
	; CHECK: // %bb.0: // %entry			; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: fmsub d0, d1, d0, d2			; CHECK-NEXT: fmsub d0, d1, d0, d2
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%mul = fmul double %b, -1.000000e+00			%mul = fneg double %b
	%0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c)			%0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c)
	ret double %0			ret double %0
	}			}

	define double @fnms64(double %a, double %b, double %c) nounwind readnone ssp {			define double @fnms64(double %a, double %b, double %c) nounwind readnone ssp {
	; CHECK-LABEL: fnms64:			; CHECK-LABEL: fnms64:
	; CHECK: // %bb.0: // %entry			; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: fnmsub d0, d0, d1, d2			; CHECK-NEXT: fnmsub d0, d0, d1, d2
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%mul = fmul double %c, -1.000000e+00			%mul = fneg double %c
	%0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)			%0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)
	ret double %0			ret double %0
	}			}

	; This would crash while trying getNegatedExpression().			; This would crash while trying getNegatedExpression().

	define float @negated_constant(float %x) {			define float @negated_constant(float %x) {
	; CHECK-LABEL: negated_constant:			; CHECK-LABEL: negated_constant:
	Show All 16 Lines

llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll

Show All 16 Lines	entry:
%mul = fmul half %0, -1.000000e+00		%mul = fmul half %0, -1.000000e+00
ret half %mul		ret half %mul
}		}

define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {		define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK-LABEL: fms16:		; CHECK-LABEL: fms16:
; CHECK: fmsub h0, h0, h1, h2		; CHECK: fmsub h0, h0, h1, h2
entry:		entry:
%mul = fmul half %b, -1.000000e+00		%mul = fneg half %b
%0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)		%0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)
ret half %0		ret half %0
}		}

define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp {		define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK-LABEL: fms16_com:		; CHECK-LABEL: fms16_com:
; CHECK: fmsub h0, h1, h0, h2		; CHECK: fmsub h0, h1, h0, h2
; CHECK-NEXT: ret		; CHECK-NEXT: ret
entry:		entry:
%mul = fmul half %b, -1.000000e+00		%mul = fneg half %b
%0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c)		%0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c)
ret half %0		ret half %0
}		}

define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp {		define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK-LABEL: fnms16:		; CHECK-LABEL: fnms16:
; CHECK: fnmsub h0, h0, h1, h2		; CHECK: fnmsub h0, h0, h1, h2
; CHECK-NEXT: ret		; CHECK-NEXT: ret
entry:		entry:
%mul = fmul half %c, -1.000000e+00		%mul = fneg half %c
%0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul)		%0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul)
ret half %0		ret half %0
}		}

define half @test_fmsub(half %a, half %b, half %c) {		define half @test_fmsub(half %a, half %b, half %c) {
; CHECK-LABEL: test_fmsub:		; CHECK-LABEL: test_fmsub:
; CHECK: fmsub h0, h0, h1, h2		; CHECK: fmsub h0, h0, h1, h2
; CHECK-NEXT: ret		; CHECK-NEXT: ret
Show All 39 Lines

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

	Show First 20 Lines • Show All 2,591 Lines • ▼ Show 20 Lines
	bb:			bb:
	%i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)			%i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
	%i4 = fadd fast <2 x float> %i3, %arg			%i4 = fadd fast <2 x float> %i3, %arg
	%i5 = fneg <2 x float> %i4			%i5 = fneg <2 x float> %i4
	%i6 = fmul fast <2 x float> %i5, %arg2			%i6 = fmul fast <2 x float> %i5, %arg2
	ret <2 x float> %i6			ret <2 x float> %i6
	}			}

				; This expects denormal flushing, so can't turn this fmul into fneg
				; TODO: Keeping this as fmul saves encoding size
				; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
				; GCN: v_sub_f32_e32 [[TMP:v[0-9]+]], 0x80000000, v0
				; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1
				define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
				%mul = fmul float %x, -1.0
				%add = fmul nnan float %mul, %y
				ret float %add
				}

				; It's legal to turn this fmul into an fneg since denormals are
				; preserved and we know an snan can't happen from the flag.
				; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:
				; GCN: v_mul_f32_e64 v0, -v0, v1
				; GCN-NEXT: s_setpc_b64
				define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
				%mul = fmul nnan float %x, -1.0
				%add = fmul float %mul, %y
				ret float %add
				}

				; know the source can't be an snan
				; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:
				; GCN: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0
				; GCN: v_mul_f32_e32 v0, [[TMP]], v1
				; GCN-NEXT: s_setpc_b64
				define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
				%canonical = fmul float %x, %x
				%mul = fmul float %canonical, -1.0
				%add = fmul float %mul, %y
				ret float %add
				}

				; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:
				; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], 1.0, v0
				; GCN: v_sub_f32_e32 [[TMP1:v[0-9]+]], 0x80000000, [[TMP0]]
				; GCN-NEXT: v_mul_f32_e32 v0, [[TMP1]], v1
				define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
				%quiet = call float @llvm.canonicalize.f32(float %x)
				%mul = fmul float %quiet, -1.0
				%add = fmul float %mul, %y
				ret float %add
				}

	declare i32 @llvm.amdgcn.workitem.id.x() #1			declare i32 @llvm.amdgcn.workitem.id.x() #1
	declare float @llvm.fma.f32(float, float, float) #1			declare float @llvm.fma.f32(float, float, float) #1
	declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)			declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
	declare float @llvm.fmuladd.f32(float, float, float) #1			declare float @llvm.fmuladd.f32(float, float, float) #1
	declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1			declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
	declare float @llvm.sin.f32(float) #1			declare float @llvm.sin.f32(float) #1
	declare float @llvm.trunc.f32(float) #1			declare float @llvm.trunc.f32(float) #1
	declare float @llvm.round.f32(float) #1			declare float @llvm.round.f32(float) #1
	Show All 20 Lines

llvm/test/CodeGen/ARM/fnegs.ll

	Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines
	; CORTEXA8U: vneg.f32 d{{.}}, d{{.}}			; CORTEXA8U: vneg.f32 d{{.}}, d{{.}}

	; CORTEXA9-LABEL: test1:			; CORTEXA9-LABEL: test1:
	; CORTEXA9: vneg.f32 s{{.}}, s{{.}}			; CORTEXA9: vneg.f32 s{{.}}, s{{.}}

	define float @test2(float* %a) {			define float @test2(float* %a) {
	entry:			entry:
	%0 = load float, float* %a, align 4 ; <float> [#uses=2]			%0 = load float, float* %a, align 4 ; <float> [#uses=2]
	%1 = fmul float -1.000000e+00, %0 ; <float> [#uses=2]			%1 = fneg float %0 ; <float> [#uses=2]
	%2 = fpext float %1 to double ; <double> [#uses=1]			%2 = fpext float %1 to double ; <double> [#uses=1]
	%3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]			%3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
	%retval = select i1 %3, float %1, float %0 ; <float> [#uses=1]			%retval = select i1 %3, float %1, float %0 ; <float> [#uses=1]
	ret float %retval			ret float %retval
	}			}
	; VFP2-LABEL: test2:			; VFP2-LABEL: test2:
	; VFP2: vneg.f32 s{{.}}, s{{.}}			; VFP2: vneg.f32 s{{.}}, s{{.}}

	▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines

llvm/test/CodeGen/Hexagon/opt-fneg.ll

	; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s \| FileCheck %s			; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s \| FileCheck %s
	; Optimize fneg to togglebit in V5.			; Optimize fneg to togglebit in V5.

	define float @foo(float %x) nounwind {			define float @foo(float %x) nounwind {
	entry:			entry:
				; CHECK-LABEL: foo:
	; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)			; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
	%x.addr = alloca float, align 4			%x.addr = alloca float, align 4
	store float %x, float* %x.addr, align 4			store float %x, float* %x.addr, align 4
	%0 = load float, float* %x.addr, align 4			%0 = load float, float* %x.addr, align 4
	%sub = fsub float -0.000000e+00, %0			%sub = fsub float -0.000000e+00, %0
	ret float %sub			ret float %sub
	}			}

	define float @bar(float %x) nounwind {			define float @bar(float %x) nounwind {
	entry:			entry:
				; CHECK-LABEL: bar:
	; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)			; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
	%sub = fsub float -0.000000e+00, %x			%sub = fsub float -0.000000e+00, %x
	ret float %sub			ret float %sub
	}			}

	define float @baz(float %x) nounwind {			define float @baz0(float %x) nounwind {
	entry:			entry:
				; CHECK-LABEL: baz0:
	; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)			; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
	%conv1 = fmul float %x, -1.000000e+00			%conv1 = fmul nnan float %x, -1.000000e+00
				ret float %conv1
				}

				define float @baz1(float %x) nounwind {
				entry:
				%not.nan = fadd nnan float %x, %x
				; CHECK-LABEL: baz1:
				; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
				%conv1 = fmul float %not.nan, -1.000000e+00
	ret float %conv1			ret float %conv1
	}			}

llvm/test/CodeGen/PowerPC/combine-fneg.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown \| FileCheck %s			; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown \| FileCheck %s

	; Infinite loop identified in D62963.			; Infinite loop identified in D62963.
	define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {			define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
	; CHECK-LABEL: fneg_fdiv_splat:			; CHECK-LABEL: fneg_fdiv_splat:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha			; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
	; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1			; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
	; CHECK-NEXT: xxspltd 0, 1, 0			; CHECK-NEXT: xxspltd 0, 1, 0
	; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l			; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
	; CHECK-NEXT: lxvd2x 1, 0, 3			; CHECK-NEXT: lxvd2x 1, 0, 3
	; CHECK-NEXT: xvredp 2, 0			; CHECK-NEXT: xvredp 2, 0
	; CHECK-NEXT: xxswapd 1, 1			; CHECK-NEXT: xxswapd 1, 1
	; CHECK-NEXT: xxlor 3, 1, 1			; CHECK-NEXT: xxlor 3, 1, 1
	; CHECK-NEXT: xvnmsubadp 3, 0, 2			; CHECK-NEXT: xvmaddadp 3, 0, 2
	; CHECK-NEXT: xvmaddadp 2, 2, 3			; CHECK-NEXT: xvnmsubadp 2, 2, 3
	; CHECK-NEXT: xvnmsubadp 1, 0, 2			; CHECK-NEXT: xvmaddadp 1, 0, 2
	; CHECK-NEXT: xvnmaddadp 2, 2, 1			; CHECK-NEXT: xvmsubadp 2, 2, 1
	; CHECK-NEXT: xvmuldp 34, 34, 2			; CHECK-NEXT: xvmuldp 34, 34, 2
	; CHECK-NEXT: xvmuldp 35, 35, 2			; CHECK-NEXT: xvmuldp 35, 35, 2
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	entry:			entry:
	%splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0			%splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
	%splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
	%div = fdiv contract reassoc nsz arcp ninf <4 x double> %a1, %splat.splat			%div = fdiv contract reassoc nsz arcp ninf <4 x double> %a1, %splat.splat
	%sub = fsub contract reassoc nsz <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div			%sub = fsub contract reassoc nsz <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
	ret <4 x double> %sub			ret <4 x double> %sub
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

DAG: Fix incorrect folding of fmul -1 to fnegClosedPublic

Details

Diff Detail

Event Timeline