This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/CodeGen/SelectionDAG/
-
CodeGen/
-
SelectionDAG/
-
DAGCombiner.cpp
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
fdiv-combine.ll

Differential D118343

[DAGCombiner] Fix invalid size request in combineRepeatedFPDivisors
ClosedPublic

Authored by c-rhodes on Jan 27 2022, 3:15 AM.

Download Raw Diff

Details

Reviewers

sdesmalen
david-arm
dmgreen

Commits

rG5d089d9a832c: [DAGCombiner] Fix invalid size request in combineRepeatedFPDivisors

Summary

If we have a vector FP division with a splatted divisor, use
getVectorMinNumElements when scaling the num of uses by splat factor.

For AArch64 the combine kicks in for the <vscale x 4 x float> case since it's
above the fdiv threshold (3) when scaling num uses by splat factor, but the
codegen is worse (splat + vector fdiv + vector fmul) than the <vscale x 2 x
double> case (splat + vector fdiv).

If the combine could be converted into a scalar FP division by
scalarizeBinOpOfSplats it may be cheaper, but it looks like this is predicated
on the isExtractVecEltCheap TLI function which is implemented for x86 but not
AArch64. Perhaps for now combineRepeatedFPDivisors should only scale num uses
by splat if the division can be converted into scalar op.

Diff Detail

Unit TestsFailed

	Time	Test
	4,940 ms	x64 debian > libarcher.races::critical-unrelated.c
	4,140 ms	x64 debian > libarcher.races::lock-nested-unrelated.c
	4,160 ms	x64 debian > libarcher.races::lock-unrelated.c
	4,760 ms	x64 debian > libarcher.races::parallel-simple.c
	4,480 ms	x64 debian > libarcher.races::task-dependency.c
		View Full Test Results (10 Failed)

Event Timeline

c-rhodes created this revision.Jan 27 2022, 3:15 AM

Herald added subscribers: ecnelises, pengfei, hiraditya, kristof.beyls. · View Herald TranscriptJan 27 2022, 3:15 AM

c-rhodes requested review of this revision.Jan 27 2022, 3:15 AM

Herald added a project: Restricted Project. · View Herald TranscriptJan 27 2022, 3:15 AM

Combine test with existing llvm/test/CodeGen/AArch64/fdiv-combine.ll

Added some more splat tests (including NEON). Will post a follow-up patch to prevent scaling of num uses by splat factor unless division can be converted into scalar op.

c-rhodes added a child revision: D118356: [DAGCombiner] Limit scaling of repeated FP divisor by splat factor.Jan 27 2022, 5:47 AM

Harbormaster completed remote builds in B146004: Diff 403622.Jan 27 2022, 11:13 AM

LGTM!

This revision is now accepted and ready to land.Jan 28 2022, 5:17 AM

This revision was landed with ongoing or failed builds.Jan 28 2022, 9:01 AM

Closed by commit rG5d089d9a832c: [DAGCombiner] Fix invalid size request in combineRepeatedFPDivisors (authored by c-rhodes). · Explain Why

This revision was automatically updated to reflect the committed changes.

c-rhodes added a commit: rG5d089d9a832c: [DAGCombiner] Fix invalid size request in combineRepeatedFPDivisors.

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

2 lines

test/

CodeGen/

AArch64/

fdiv-combine.ll

178 lines

Diff 403622

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 14,531 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// possibly be enough uses of the divisor to make the transform worthwhile.		// possibly be enough uses of the divisor to make the transform worthwhile.
unsigned MinUses = TLI.combineRepeatedFPDivisors();		unsigned MinUses = TLI.combineRepeatedFPDivisors();

// For splat vectors, scale the number of uses by the splat factor. If we can		// For splat vectors, scale the number of uses by the splat factor. If we can
// convert the division into a scalar op, that will likely be much faster.		// convert the division into a scalar op, that will likely be much faster.
unsigned NumElts = 1;		unsigned NumElts = 1;
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
if (VT.isVector() && DAG.isSplatValue(N1))		if (VT.isVector() && DAG.isSplatValue(N1))
NumElts = VT.getVectorNumElements();		NumElts = VT.getVectorMinNumElements();

if (!MinUses \|\| (N1->use_size() * NumElts) < MinUses)		if (!MinUses \|\| (N1->use_size() * NumElts) < MinUses)
return SDValue();		return SDValue();

// Find all FDIV users of the same divisor.		// Find all FDIV users of the same divisor.
// Use a set because duplicates may be present in the user list.		// Use a set because duplicates may be present in the user list.
SetVector<SDNode *> Users;		SetVector<SDNode *> Users;
for (auto *U : N1->uses()) {		for (auto *U : N1->uses()) {
▲ Show 20 Lines • Show All 9,551 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/fdiv-combine.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=aarch64-unknown-unknown < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-unknown-unknown < %s \| FileCheck %s

	; Following test cases check:			; Following test cases check:
	; a / D; b / D; c / D;			; a / D; b / D; c / D;
	; =>			; =>
	; recip = 1.0 / D; a * recip; b * recip; c * recip;			; recip = 1.0 / D; a * recip; b * recip; c * recip;
	define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {			define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
	; CHECK-LABEL: three_fdiv_float:			; CHECK-LABEL: three_fdiv_float:
	; CHECK: fdiv s			; CHECK: // %bb.0:
	; CHECK-NOT: fdiv			; CHECK-NEXT: fmov s4, #1.00000000
	; CHECK: fmul			; CHECK-NEXT: fdiv s4, s4, s0
	; CHECK: fmul			; CHECK-NEXT: fmul s0, s1, s4
	; CHECK: fmul			; CHECK-NEXT: fmul s1, s2, s4
				; CHECK-NEXT: fmul s2, s3, s4
				; CHECK-NEXT: b foo_3f
	%div = fdiv float %a, %D			%div = fdiv float %a, %D
	%div1 = fdiv float %b, %D			%div1 = fdiv float %b, %D
	%div2 = fdiv float %c, %D			%div2 = fdiv float %c, %D
	tail call void @foo_3f(float %div, float %div1, float %div2)			tail call void @foo_3f(float %div, float %div1, float %div2)
	ret void			ret void
	}			}

	define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {			define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
	; CHECK-LABEL: three_fdiv_double:			; CHECK-LABEL: three_fdiv_double:
	; CHECK: fdiv d			; CHECK: // %bb.0:
	; CHECK-NOT: fdiv			; CHECK-NEXT: fmov d4, #1.00000000
	; CHECK: fmul			; CHECK-NEXT: fdiv d4, d4, d0
	; CHECK: fmul			; CHECK-NEXT: fmul d0, d1, d4
	; CHECK: fmul			; CHECK-NEXT: fmul d1, d2, d4
				; CHECK-NEXT: fmul d2, d3, d4
				; CHECK-NEXT: b foo_3d
	%div = fdiv double %a, %D			%div = fdiv double %a, %D
	%div1 = fdiv double %b, %D			%div1 = fdiv double %b, %D
	%div2 = fdiv double %c, %D			%div2 = fdiv double %c, %D
	tail call void @foo_3d(double %div, double %div1, double %div2)			tail call void @foo_3d(double %div, double %div1, double %div2)
	ret void			ret void
	}			}

	define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {			define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
	; CHECK-LABEL: three_fdiv_4xfloat:			; CHECK-LABEL: three_fdiv_4xfloat:
	; CHECK: fdiv v			; CHECK: // %bb.0:
	; CHECK-NOT: fdiv			; CHECK-NEXT: fmov v4.4s, #1.00000000
	; CHECK: fmul			; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
	; CHECK: fmul			; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
	; CHECK: fmul			; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
				; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
				; CHECK-NEXT: b foo_3_4xf
	%div = fdiv <4 x float> %a, %D			%div = fdiv <4 x float> %a, %D
	%div1 = fdiv <4 x float> %b, %D			%div1 = fdiv <4 x float> %b, %D
	%div2 = fdiv <4 x float> %c, %D			%div2 = fdiv <4 x float> %c, %D
	tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)			tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
	ret void			ret void
	}			}

	define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {			define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
	; CHECK-LABEL: three_fdiv_2xdouble:			; CHECK-LABEL: three_fdiv_2xdouble:
	; CHECK: fdiv v			; CHECK: // %bb.0:
	; CHECK-NOT: fdiv			; CHECK-NEXT: fmov v4.2d, #1.00000000
	; CHECK: fmul			; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
	; CHECK: fmul			; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
	; CHECK: fmul			; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
				; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
				; CHECK-NEXT: b foo_3_2xd
	%div = fdiv <2 x double> %a, %D			%div = fdiv <2 x double> %a, %D
	%div1 = fdiv <2 x double> %b, %D			%div1 = fdiv <2 x double> %b, %D
	%div2 = fdiv <2 x double> %c, %D			%div2 = fdiv <2 x double> %c, %D
	tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)			tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
	ret void			ret void
	}			}

	; Following test cases check we never combine two FDIVs if neither of them			; Following test cases check we never combine two FDIVs if neither of them
	; calculates a reciprocal.			; calculates a reciprocal.
	define void @two_fdiv_float(float %D, float %a, float %b) #0 {			define void @two_fdiv_float(float %D, float %a, float %b) #0 {
	; CHECK-LABEL: two_fdiv_float:			; CHECK-LABEL: two_fdiv_float:
	; CHECK: fdiv s			; CHECK: // %bb.0:
	; CHECK: fdiv s			; CHECK-NEXT: fdiv s3, s1, s0
	; CHECK-NOT: fmul			; CHECK-NEXT: fdiv s1, s2, s0
				; CHECK-NEXT: fmov s0, s3
				; CHECK-NEXT: b foo_2f
	%div = fdiv float %a, %D			%div = fdiv float %a, %D
	%div1 = fdiv float %b, %D			%div1 = fdiv float %b, %D
	tail call void @foo_2f(float %div, float %div1)			tail call void @foo_2f(float %div, float %div1)
	ret void			ret void
	}			}

	define void @two_fdiv_double(double %D, double %a, double %b) #0 {			define void @two_fdiv_double(double %D, double %a, double %b) #0 {
	; CHECK-LABEL: two_fdiv_double:			; CHECK-LABEL: two_fdiv_double:
	; CHECK: fdiv d			; CHECK: // %bb.0:
	; CHECK: fdiv d			; CHECK-NEXT: fdiv d3, d1, d0
	; CHECK-NOT: fmul			; CHECK-NEXT: fdiv d1, d2, d0
				; CHECK-NEXT: fmov d0, d3
				; CHECK-NEXT: b foo_2d
	%div = fdiv double %a, %D			%div = fdiv double %a, %D
	%div1 = fdiv double %b, %D			%div1 = fdiv double %b, %D
	tail call void @foo_2d(double %div, double %div1)			tail call void @foo_2d(double %div, double %div1)
	ret void			ret void
	}			}

				define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
				; CHECK-LABEL: splat_three_fdiv_4xfloat:
				; CHECK: // %bb.0:
				; CHECK-NEXT: fmov v4.4s, #1.00000000
				; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
				; CHECK-NEXT: dup v0.4s, v0.s[0]
				; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
				; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
				; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
				; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
				; CHECK-NEXT: b foo_3_4xf
				%D.ins = insertelement <4 x float> poison, float %D, i64 0
				%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
				%div = fdiv <4 x float> %a, %splat
				%div1 = fdiv <4 x float> %b, %splat
				%div2 = fdiv <4 x float> %c, %splat
				tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
				ret void
				}

				define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
				; CHECK-LABEL: splat_fdiv_v4f32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: fmov v2.4s, #1.00000000
				; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
				; CHECK-NEXT: dup v0.4s, v0.s[0]
				; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s
				; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
				; CHECK-NEXT: ret
				entry:
				%D.ins = insertelement <4 x float> poison, float %D, i64 0
				%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
				%div = fdiv <4 x float> %a, %splat
				ret <4 x float> %div
				}

				define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
				; CHECK-LABEL: splat_fdiv_nxv4f32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
				; CHECK-NEXT: fmov z2.s, #1.00000000
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: mov z0.s, s0
				; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s
				; CHECK-NEXT: fmul z0.s, z1.s, z0.s
				; CHECK-NEXT: ret
				entry:
				%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
				%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
				%div = fdiv <vscale x 4 x float> %a, %splat
				ret <vscale x 4 x float> %div
				}

				define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
				; CHECK-LABEL: splat_three_fdiv_nxv4f32:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
				; CHECK-NEXT: fmov z4.s, #1.00000000
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: mov z0.s, s0
				; CHECK-NEXT: fdiv z4.s, p0/m, z4.s, z0.s
				; CHECK-NEXT: fmul z0.s, z1.s, z4.s
				; CHECK-NEXT: fmul z1.s, z2.s, z4.s
				; CHECK-NEXT: fmul z2.s, z3.s, z4.s
				; CHECK-NEXT: b foo_3_nxv4f32
				entry:
				%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
				%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
				%div = fdiv <vscale x 4 x float> %a, %splat
				%div1 = fdiv <vscale x 4 x float> %b, %splat
				%div2 = fdiv <vscale x 4 x float> %c, %splat
				tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
				ret void
				}

				define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
				; CHECK-LABEL: splat_fdiv_nxv2f64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: mov z0.d, d0
				; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
				; CHECK-NEXT: ret
				entry:
				%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
				%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
				%div = fdiv <vscale x 2 x double> %a, %splat
				ret <vscale x 2 x double> %div
				}

				define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
				; CHECK-LABEL: splat_two_fdiv_nxv2f64:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
				; CHECK-NEXT: fmov z3.d, #1.00000000
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: mov z0.d, d0
				; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d
				; CHECK-NEXT: fmul z0.d, z1.d, z3.d
				; CHECK-NEXT: fmul z1.d, z2.d, z3.d
				; CHECK-NEXT: b foo_2_nxv2f64
				entry:
				%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
				%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
				%div = fdiv <vscale x 2 x double> %a, %splat
				%div1 = fdiv <vscale x 2 x double> %b, %splat
				tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
				ret void
				}

	declare void @foo_3f(float, float, float)			declare void @foo_3f(float, float, float)
	declare void @foo_3d(double, double, double)			declare void @foo_3d(double, double, double)
	declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)			declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
	declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)			declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
	declare void @foo_2f(float, float)			declare void @foo_2f(float, float)
	declare void @foo_2d(double, double)			declare void @foo_2d(double, double)
				declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
				declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	attributes #0 = { "unsafe-fp-math"="true" }			attributes #0 = { "unsafe-fp-math"="true" }
				attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }