Diff 396180

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

	Show First 20 Lines • Show All 576 Lines • ▼ Show 20 Lines
	}			}

	void SILowerControlFlow::optimizeEndCf() {			void SILowerControlFlow::optimizeEndCf() {
	// If the only instruction immediately following this END_CF is an another			// If the only instruction immediately following this END_CF is an another
	// END_CF in the only successor we can avoid emitting exec mask restore here.			// END_CF in the only successor we can avoid emitting exec mask restore here.
	if (!RemoveRedundantEndcf)			if (!RemoveRedundantEndcf)
	return;			return;

	for (MachineInstr *MI : LoweredEndCf) {			for (MachineInstr *MI : llvm::reverse(LoweredEndCf)) {
				foadUnsubmitted Not Done Reply Inline Actions There's a "using namespace llvm" at the top of this file. foad: There's a "using namespace llvm" at the top of this file.
				cdevadasAuthorUnsubmitted Done Reply Inline Actions Thanks for pointing it out. Will simplify it. cdevadas: Thanks for pointing it out. Will simplify it.
	MachineBasicBlock &MBB = *MI->getParent();			MachineBasicBlock &MBB = *MI->getParent();
	auto Next =			auto Next =
	skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));			skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
	if (Next == MBB.end() \|\| !LoweredEndCf.count(&*Next))			if (Next == MBB.end() \|\| !LoweredEndCf.count(&*Next))
	continue;			continue;
	// Only skip inner END_CF if outer ENDCF belongs to SI_IF.			// Only skip inner END_CF if outer ENDCF belongs to SI_IF.
	// If that belongs to SI_ELSE then saved mask has an inverted value.			// If that belongs to SI_ELSE then saved mask has an inverted value.
	Register SavedExec			Register SavedExec
	▲ Show 20 Lines • Show All 319 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

Show First 20 Lines • Show All 691 Lines • ▼ Show 20 Lines	bb.4:
S_ENDPGM 0		S_ENDPGM 0


bb.5:		bb.5:
SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec		SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec

bb.6:		bb.6:
S_BRANCH %bb.4		S_BRANCH %bb.4
		...

		---
		# While collapsing inner endcf, certain blocks ended up getting two S_BRANCH instructions.
		# It happens in the absence of BranchFolding (mostly at -O0) when the irregularly placed BBs are traversed
		rampitecUnsubmitted Not Done Reply Inline Actions It seems you need test running at -O0 to cover it. rampitec: It seems you need test running at -O0 to cover it.
		# in the forward direction and the intervening block between a predecessor and its successor gets optimized
		# away in subsequent iterations, leaving 2 S_BRANCH instructions in the predecessor block.
		# The issue was fixed by iterating the blocks from bottom-up to ensure all endcf pseudos at the bottom of the
		# function are processed first.
		# This test ensures there are no multiple S_BRANCH instructions inserted in any block.

		name: multi_unconditional_branch
		tracksRegLiveness: true
		body: \|
		; GCN-LABEL: name: multi_unconditional_branch
		; GCN: bb.0:
		rampitecUnsubmitted Not Done Reply Inline Actions Wasn't update_mir_test_checks.py changed to use -NEXT checks? rampitec: Wasn't update_mir_test_checks.py changed to use -NEXT checks?
		; GCN: successors: %bb.1(0x40000000), %bb.14(0x40000000)
		; GCN: {{ $}}
		; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
		; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF]], implicit $exec
		; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
		; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
		; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
		; GCN: S_CBRANCH_EXECZ %bb.14, implicit $exec
		; GCN: S_BRANCH %bb.1
		; GCN: {{ $}}
		; GCN: bb.1:
		; GCN: successors: %bb.2(0x40000000), %bb.14(0x40000000)
		; GCN: {{ $}}
		; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
		; GCN: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF1]], implicit $exec
		; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
		; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
		; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
		; GCN: S_CBRANCH_EXECZ %bb.14, implicit $exec
		; GCN: S_BRANCH %bb.2
		; GCN: {{ $}}
		; GCN: bb.2:
		; GCN: successors: %bb.3(0x40000000), %bb.7(0x40000000)
		; GCN: {{ $}}
		; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
		; GCN: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF2]], implicit $exec
		; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
		; GCN: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc
		; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_2]]
		; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
		; GCN: S_BRANCH %bb.3
		; GCN: {{ $}}
		; GCN: bb.3:
		; GCN: successors: %bb.4(0x40000000), %bb.7(0x40000000)
		; GCN: {{ $}}
		; GCN: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
		; GCN: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF3]], implicit $exec
		; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
		; GCN: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc
		; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_3]]
		; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
		; GCN: S_BRANCH %bb.4
		; GCN: {{ $}}
		; GCN: bb.4:
		; GCN: successors: %bb.7(0x80000000)
		; GCN: {{ $}}
		; GCN: S_BRANCH %bb.7
		; GCN: {{ $}}
		; GCN: bb.7:
		; GCN: successors: %bb.8(0x80000000)
		; GCN: {{ $}}
		; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
		; GCN: S_BRANCH %bb.8
		; GCN: {{ $}}
		; GCN: bb.8:
		; GCN: successors: %bb.9(0x80000000)
		; GCN: {{ $}}
		; GCN: S_BRANCH %bb.9
		; GCN: {{ $}}
		; GCN: bb.9:
		; GCN: successors: %bb.11(0x40000000), %bb.12(0x40000000)
		; GCN: {{ $}}
		; GCN: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
		; GCN: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF4]], implicit $exec
		; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
		; GCN: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc
		; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[COPY4]], implicit-def dead $scc
		; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_4]]
		; GCN: S_CBRANCH_EXECZ %bb.12, implicit $exec
		; GCN: S_BRANCH %bb.11
		; GCN: {{ $}}
		; GCN: bb.10:
		; GCN: successors: %bb.14(0x80000000)
		; GCN: {{ $}}
		; GCN: S_BRANCH %bb.14
		; GCN: {{ $}}
		; GCN: bb.11:
		; GCN: successors: %bb.12(0x80000000)
		; GCN: {{ $}}
		; GCN: S_BRANCH %bb.12
		; GCN: {{ $}}
		; GCN: bb.12:
		; GCN: successors: %bb.10(0x40000000), %bb.14(0x40000000)
		; GCN: {{ $}}
		; GCN: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[S_XOR_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
		; GCN: [[S_AND_B64_5:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
		; GCN: $exec = S_XOR_B64_term $exec, [[S_AND_B64_5]], implicit-def $scc
		; GCN: S_CBRANCH_EXECZ %bb.14, implicit $exec
		; GCN: S_BRANCH %bb.10
		; GCN: {{ $}}
		; GCN: bb.14:
		; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
		; GCN: S_ENDPGM 0
		bb.0:
		successors: %bb.1, %bb.14

		%0:vgpr_32 = IMPLICIT_DEF
		%1:sreg_64 = V_CMP_EQ_U32_e64 0, killed %0:vgpr_32, implicit $exec
		%2:sreg_64 = SI_IF %1:sreg_64, %bb.14, implicit-def $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.1

		bb.1:
		; predecessors: %bb.0
		successors: %bb.2, %bb.6

		%3:vgpr_32 = IMPLICIT_DEF
		%4:sreg_64 = V_CMP_EQ_U32_e64 0, killed %3:vgpr_32, implicit $exec
		%5:sreg_64 = SI_IF killed %4:sreg_64, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.2

		bb.2:
		; predecessors: %bb.1
		successors: %bb.3, %bb.7

		%6:vgpr_32 = IMPLICIT_DEF
		%7:sreg_64 = V_CMP_EQ_U32_e64 0, killed %6:vgpr_32, implicit $exec
		%8:sreg_64 = SI_IF killed %7:sreg_64, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.3

		bb.3:
		; predecessors: %bb.2
		successors: %bb.4, %bb.5

		%9:vgpr_32 = IMPLICIT_DEF
		%10:sreg_64 = V_CMP_EQ_U32_e64 0, killed %9:vgpr_32, implicit $exec
		%11:sreg_64 = SI_IF killed %10:sreg_64, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.4

		bb.4:
		; predecessors: %bb.3
		successors: %bb.5

		S_BRANCH %bb.5

		bb.5:
		; predecessors: %bb.3, %bb.4
		successors: %bb.7

		SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.7

		bb.6:
		; predecessors: %bb.1, %bb.13
		successors: %bb.14

		SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.14

		bb.7:
		; predecessors: %bb2, %bb.5
		successors: %bb.8

		SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.8

		bb.8:
		; predecessors: %bb.7
		successors: %bb.9

		S_BRANCH %bb.9

		bb.9:
		; predecessors: %bb.8
		successors: %bb.11, %bb.12

		%12:vgpr_32 = IMPLICIT_DEF
		%13:sreg_64 = V_CMP_EQ_U32_e64 0, killed %12:vgpr_32, implicit $exec
		%14:sreg_64 = SI_IF killed %13:sreg_64, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.11

		bb.10:
		; predecessors: %bb.12
		successors: %bb.13

		S_BRANCH %bb.13

		bb.11:
		; predecessors: %bb.9
		successors: %bb.12

		S_BRANCH %bb.12

		bb.12:
		; predecessors: %bb.9, %bb.11
		successors: %bb.10, %bb.13

		%15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.10

		bb.13:
		; predecessors: %bb.10, %bb.12
		successors: %bb.6

		SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_BRANCH %bb.6

		bb.14:
		; predecessors: %bb.0, %bb.6

		SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
		S_ENDPGM 0
...		...

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Iterate LoweredEndCf in the reverse order
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 396180

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Iterate LoweredEndCf in the reverse orderClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 396180

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

[AMDGPU] Iterate LoweredEndCf in the reverse order
ClosedPublic