This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/GlobalISel: Fix masked control flow with fallthrough blocks
ClosedPublic

Authored by arsenm on May 17 2020, 12:53 PM.

Download Raw Diff

Details

Reviewers

nhaehnle
foad
sameerds
kerbowa

Summary

Unlike SelectionDAGBuilder, IRTranslator omits the unconditonal branch
in fallthrough cases. Confusingly, the control flow pseudos function
in the opposite way the intrinsics are used, and the branch targets
always need to be swapped. We're inverting the target blocks, so we
need to figure out the old fallthrough block and insert a branch to
the original unconditional branch target.

Diff Detail

Event Timeline

arsenm created this revision.May 17 2020, 12:53 PM

Herald added a project: Restricted Project. · View Herald TranscriptMay 17 2020, 12:53 PM

Herald added subscribers: hiraditya, t-tye, tpr and 6 others. · View Herald Transcript

I think the patch is okay, but have a couple of doubts about the tests.

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
40	For this and all other similar updates, does this mean that the existing testcases were actually wrong?
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
232	"backward" is a bit of an understatement when describing this very interesting artifact. I don't know the SI intrinsics very well, but the name suggests it should be a backedge. But this use suggests that the meaning is more general than that: the presence of SI_LOOP instead of a simple conditional branch indicates that the edge is either a backedge or a loop exit.

arsenm marked 2 inline comments as done.May 22 2020, 5:54 AM

arsenm added inline comments.

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
40	Yes
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
232	Yes, I've been confused by this every time I've ever looked at this. If you look at how SI_LOOP is lowered, it's the s_cbranch_execnz which should be the backedge, but the loop intrinsic on return true exits the loop

LGTM!

This revision is now accepted and ready to land.May 22 2020, 7:18 AM

66fe60220ca2b1932e06093294c72b246be54ec8

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

AMDGPULegalizerInfo.cpp

52 lines

SIISelLowering.cpp

1 line

test/

CodeGen/

AMDGPU/

GlobalISel/

divergent-control-flow.ll

2 lines

legalize-amdgcn.if.xfail.mir

21 lines

130 lines

2 lines

12 lines

12 lines

Diff 264511

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 2,272 Lines • ▼ Show 20 Lines	bool AMDGPULegalizerInfo::legalizeBuildVector(

MI.eraseFromParent();		MI.eraseFromParent();
return true;		return true;
}		}

// Return the use branch instruction, otherwise null if the usage is invalid.		// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,		static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,		MachineRegisterInfo &MRI,
MachineInstr *&Br) {		MachineInstr *&Br,
		MachineBasicBlock *&UncondBrTarget) {
Register CondDef = MI.getOperand(0).getReg();		Register CondDef = MI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDef))		if (!MRI.hasOneNonDBGUse(CondDef))
return nullptr;		return nullptr;

		MachineBasicBlock *Parent = MI.getParent();
MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);		MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
if (UseMI.getParent() != MI.getParent() \|\|		if (UseMI.getParent() != Parent \|\|
UseMI.getOpcode() != AMDGPU::G_BRCOND)		UseMI.getOpcode() != AMDGPU::G_BRCOND)
return nullptr;		return nullptr;

// Make sure the cond br is followed by a G_BR		// Make sure the cond br is followed by a G_BR, or is the last instruction.
MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());		MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
if (Next != MI.getParent()->end()) {		if (Next == Parent->end()) {
		MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
		if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
		return nullptr;
		UncondBrTarget = &*NextMBB;
		} else {
if (Next->getOpcode() != AMDGPU::G_BR)		if (Next->getOpcode() != AMDGPU::G_BR)
return nullptr;		return nullptr;
Br = &*Next;		Br = &*Next;
		UncondBrTarget = Br->getOperand(0).getMBB();
}		}

return &UseMI;		return &UseMI;
}		}

Register AMDGPULegalizerInfo::insertLiveInCopy(MachineIRBuilder &B,		Register AMDGPULegalizerInfo::insertLiveInCopy(MachineIRBuilder &B,
MachineRegisterInfo &MRI,		MachineRegisterInfo &MRI,
Register LiveIn,		Register LiveIn,
▲ Show 20 Lines • Show All 1,800 Lines • ▼ Show 20 Lines	bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI = *B.getMRI();		MachineRegisterInfo &MRI = *B.getMRI();

// Replace the use G_BRCOND with the exec manipulate and branch pseudos.		// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
auto IntrID = MI.getIntrinsicID();		auto IntrID = MI.getIntrinsicID();
switch (IntrID) {		switch (IntrID) {
case Intrinsic::amdgcn_if:		case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else: {		case Intrinsic::amdgcn_else: {
MachineInstr *Br = nullptr;		MachineInstr *Br = nullptr;
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {		MachineBasicBlock *UncondBrTarget = nullptr;
		if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
const SIRegisterInfo *TRI		const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());		= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());

B.setInstr(*BrCond);		B.setInstr(*BrCond);
Register Def = MI.getOperand(1).getReg();		Register Def = MI.getOperand(1).getReg();
Register Use = MI.getOperand(3).getReg();		Register Use = MI.getOperand(3).getReg();

MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();		MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
if (Br)
BrTarget = Br->getOperand(0).getMBB();

if (IntrID == Intrinsic::amdgcn_if) {		if (IntrID == Intrinsic::amdgcn_if) {
B.buildInstr(AMDGPU::SI_IF)		B.buildInstr(AMDGPU::SI_IF)
.addDef(Def)		.addDef(Def)
.addUse(Use)		.addUse(Use)
.addMBB(BrTarget);		.addMBB(UncondBrTarget);
} else {		} else {
B.buildInstr(AMDGPU::SI_ELSE)		B.buildInstr(AMDGPU::SI_ELSE)
.addDef(Def)		.addDef(Def)
.addUse(Use)		.addUse(Use)
.addMBB(BrTarget)		.addMBB(UncondBrTarget)
.addImm(0);		.addImm(0);
}		}

if (Br)		if (Br) {
Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());		Br->getOperand(0).setMBB(CondBrTarget);
		} else {
		// The IRTranslator skips inserting the G_BR for fallthrough cases, but
		// since we're swapping branch targets it needs to be reinserted.
		// FIXME: IRTranslator should probably not do this
		B.buildBr(*CondBrTarget);
		}

MRI.setRegClass(Def, TRI->getWaveMaskRegClass());		MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
MRI.setRegClass(Use, TRI->getWaveMaskRegClass());		MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
MI.eraseFromParent();		MI.eraseFromParent();
BrCond->eraseFromParent();		BrCond->eraseFromParent();
return true;		return true;
}		}

return false;		return false;
}		}
case Intrinsic::amdgcn_loop: {		case Intrinsic::amdgcn_loop: {
MachineInstr *Br = nullptr;		MachineInstr *Br = nullptr;
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {		MachineBasicBlock *UncondBrTarget = nullptr;
		if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
const SIRegisterInfo *TRI		const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());		= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());

B.setInstr(*BrCond);		B.setInstr(*BrCond);

MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();		MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
if (Br)
BrTarget = Br->getOperand(0).getMBB();

Register Reg = MI.getOperand(2).getReg();		Register Reg = MI.getOperand(2).getReg();
B.buildInstr(AMDGPU::SI_LOOP)		B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)		.addUse(Reg)
.addMBB(BrTarget);		.addMBB(UncondBrTarget);

if (Br)		if (Br)
Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());		Br->getOperand(0).setMBB(CondBrTarget);
		else
		B.buildBr(*CondBrTarget);

MI.eraseFromParent();		MI.eraseFromParent();
BrCond->eraseFromParent();		BrCond->eraseFromParent();
MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());		MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
return true;		return true;
}		}

return false;		return false;
▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,510 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
if (Intr->getOpcode() == ISD::SETCC) {		if (Intr->getOpcode() == ISD::SETCC) {
// As long as we negate the condition everything is fine		// As long as we negate the condition everything is fine
SetCC = Intr;		SetCC = Intr;
Intr = SetCC->getOperand(0).getNode();		Intr = SetCC->getOperand(0).getNode();

} else {		} else {
// Get the target from BR if we don't negate the condition		// Get the target from BR if we don't negate the condition
BR = findUser(BRCOND, ISD::BR);		BR = findUser(BRCOND, ISD::BR);
		assert(BR && "brcond missing unconditional branch user");
Target = BR->getOperand(1);		Target = BR->getOperand(1);
}		}

unsigned CFNode = isCFIntrinsic(Intr);		unsigned CFNode = isCFIntrinsic(Intr);
if (CFNode == 0) {		if (CFNode == 0) {
// This is a uniform branch so we don't need to legalize.		// This is a uniform branch so we don't need to legalize.
return BRCOND;		return BRCOND;
}		}
▲ Show 20 Lines • Show All 6,502 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

	Show All 31 Lines

	define i32 @divergent_if_swap_brtarget_order1(i32 %value) {			define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
	; CHECK-LABEL: divergent_if_swap_brtarget_order1:			; CHECK-LABEL: divergent_if_swap_brtarget_order1:
	; CHECK: ; %bb.0: ; %entry			; CHECK: ; %bb.0: ; %entry
	; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)			; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0			; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
	; CHECK-NEXT: ; implicit-def: $vgpr0			; CHECK-NEXT: ; implicit-def: $vgpr0
	; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc			; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
	; CHECK-NEXT: s_cbranch_execnz BB1_2			; CHECK-NEXT: s_cbranch_execz BB1_2
				sameerdsUnsubmitted Not Done Reply Inline Actions For this and all other similar updates, does this mean that the existing testcases were actually wrong? sameerds: For this and all other similar updates, does this mean that the existing testcases were…
				arsenmAuthorUnsubmitted Done Reply Inline Actions Yes arsenm: Yes
	; CHECK-NEXT: ; %bb.1: ; %if.true			; CHECK-NEXT: ; %bb.1: ; %if.true
	; CHECK-NEXT: global_load_dword v0, v[0:1], off			; CHECK-NEXT: global_load_dword v0, v[0:1], off
	; CHECK-NEXT: BB1_2: ; %endif			; CHECK-NEXT: BB1_2: ; %endif
	; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]			; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
	; CHECK-NEXT: s_waitcnt vmcnt(0)			; CHECK-NEXT: s_waitcnt vmcnt(0)
	; CHECK-NEXT: s_setpc_b64 s[30:31]			; CHECK-NEXT: s_setpc_b64 s[30:31]
	entry:			entry:
	%c = icmp ne i32 %value, 0			%c = icmp ne i32 %value, 0
	▲ Show 20 Lines • Show All 204 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir

This file was added.

				# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 \| FileCheck -check-prefix=ERR %s

				# Make sure there's no crash if there is somehow no successor block.

				# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)

				---
				name: brcond_si_if_no_succ_block
				body: \|
				bb.0:
				S_NOP 0

				bb.1:
				successors: %bb.1
				liveins: $vgpr0, $vgpr1
				%0:_(s32) = COPY $vgpr0
				%1:_(s32) = COPY $vgpr1
				%2:_(s1) = G_ICMP intpred(ne), %0, %1
				%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
				G_BRCOND %3, %bb.1
				...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir

	Show First 20 Lines • Show All 102 Lines • ▼ Show 20 Lines
	body: \|			body: \|
	; WAVE64-LABEL: name: brcond_si_if			; WAVE64-LABEL: name: brcond_si_if
	; WAVE64: bb.0:			; WAVE64: bb.0:
	; WAVE64: successors: %bb.1(0x80000000)			; WAVE64: successors: %bb.1(0x80000000)
	; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0			; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
	; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1			; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
	; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]			; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
	; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec			; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE64: G_BR %bb.1
	; WAVE64: bb.1:			; WAVE64: bb.1:
	; WAVE32-LABEL: name: brcond_si_if			; WAVE32-LABEL: name: brcond_si_if
	; WAVE32: bb.0:			; WAVE32: bb.0:
	; WAVE32: successors: %bb.1(0x80000000)			; WAVE32: successors: %bb.1(0x80000000)
	; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0			; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
	; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1			; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
	; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]			; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
	; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec			; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE32: G_BR %bb.1
	; WAVE32: bb.1:			; WAVE32: bb.1:
	bb.0:			bb.0:
	successors: %bb.1			successors: %bb.1
	liveins: $vgpr0, $vgpr1			liveins: $vgpr0, $vgpr1
	%0:_(s32) = COPY $vgpr0			%0:_(s32) = COPY $vgpr0
	%1:_(s32) = COPY $vgpr1			%1:_(s32) = COPY $vgpr1
	%2:_(s1) = G_ICMP intpred(ne), %0, %1			%2:_(s1) = G_ICMP intpred(ne), %0, %1
	%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2			%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
	G_BRCOND %3, %bb.1			G_BRCOND %3, %bb.1

	bb.1:			bb.1:
	...			...

	---			---
	name: brcond_si_else			name: brcond_si_else
	body: \|			body: \|
	; WAVE64-LABEL: name: brcond_si_else			; WAVE64-LABEL: name: brcond_si_else
	; WAVE64: bb.0:			; WAVE64: bb.0:
	; WAVE64: successors: %bb.1(0x80000000)			; WAVE64: successors: %bb.1(0x80000000)
	; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0			; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
	; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1			; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
	; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]			; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
	; WAVE64: [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec			; WAVE64: [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE64: G_BR %bb.1
	; WAVE64: bb.1:			; WAVE64: bb.1:
	; WAVE32-LABEL: name: brcond_si_else			; WAVE32-LABEL: name: brcond_si_else
	; WAVE32: bb.0:			; WAVE32: bb.0:
	; WAVE32: successors: %bb.1(0x80000000)			; WAVE32: successors: %bb.1(0x80000000)
	; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0			; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
	; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1			; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
	; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]			; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
	; WAVE32: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec			; WAVE32: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE32: G_BR %bb.1
	; WAVE32: bb.1:			; WAVE32: bb.1:
	bb.0:			bb.0:
	successors: %bb.1			successors: %bb.1
	liveins: $vgpr0, $vgpr1			liveins: $vgpr0, $vgpr1
	%0:_(s32) = COPY $vgpr0			%0:_(s32) = COPY $vgpr0
	%1:_(s32) = COPY $vgpr1			%1:_(s32) = COPY $vgpr1
	%2:_(s1) = G_ICMP intpred(ne), %0, %1			%2:_(s1) = G_ICMP intpred(ne), %0, %1
	%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2			%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
	G_BRCOND %3, %bb.1			G_BRCOND %3, %bb.1

	bb.1:			bb.1:
	...			...

	---			---
	name: brcond_si_loop			name: brcond_si_loop_brcond
				tracksRegLiveness: true
	body: \|			body: \|
	; WAVE64-LABEL: name: brcond_si_loop			; WAVE64-LABEL: name: brcond_si_loop_brcond
	; WAVE64: bb.0:			; WAVE64: bb.0:
	; WAVE64: successors: %bb.1(0x80000000)			; WAVE64: successors: %bb.1(0x80000000)
				; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
	; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0			; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
	; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1			; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
	; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1			; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
	; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
	; WAVE64: bb.1:			; WAVE64: bb.1:
	; WAVE32-LABEL: name: brcond_si_loop			; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; WAVE64: S_NOP 0
				; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE64: G_BR %bb.2
				; WAVE64: bb.2:
				; WAVE64: S_NOP 0
				; WAVE32-LABEL: name: brcond_si_loop_brcond
	; WAVE32: bb.0:			; WAVE32: bb.0:
	; WAVE32: successors: %bb.1(0x80000000)			; WAVE32: successors: %bb.1(0x80000000)
				; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
	; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0			; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
	; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1			; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
	; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1			; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
				; WAVE32: bb.1:
				; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; WAVE32: S_NOP 0
	; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec			; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE32: G_BR %bb.2
				; WAVE32: bb.2:
				; WAVE32: S_NOP 0
				bb.0:
				liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
				%0:_(s32) = COPY $vgpr0
				%1:_(s32) = COPY $vgpr1
				%2:_(s64) = COPY $sgpr0_sgpr1

				bb.1:
				successors: %bb.1, %bb.2
				S_NOP 0
				%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
				G_BRCOND %3, %bb.2
				G_BR %bb.1

				bb.2:
				S_NOP 0
				...

				# This usage is backwards from how the intrinsic is supposed to be
				# used.
				---
				name: brcond_si_loop_brcond_back
				tracksRegLiveness: true
				body: \|
				; WAVE64-LABEL: name: brcond_si_loop_brcond_back
				; WAVE64: bb.0:
				; WAVE64: successors: %bb.1(0x80000000)
				; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
				; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
				; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
				; WAVE64: bb.1:
				; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; WAVE64: S_NOP 0
				; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
				sameerdsUnsubmitted Not Done Reply Inline Actions "backward" is a bit of an understatement when describing this very interesting artifact. I don't know the SI intrinsics very well, but the name suggests it should be a backedge. But this use suggests that the meaning is more general than that: the presence of SI_LOOP instead of a simple conditional branch indicates that the edge is either a backedge or a loop exit. sameerds: "backward" is a bit of an understatement when describing this very interesting artifact. I…
				arsenmAuthorUnsubmitted Done Reply Inline Actions Yes, I've been confused by this every time I've ever looked at this. If you look at how SI_LOOP is lowered, it's the s_cbranch_execnz which should be the backedge, but the loop intrinsic on return true exits the loop arsenm: Yes, I've been confused by this every time I've ever looked at this. If you look at how SI_LOOP…
				; WAVE64: G_BR %bb.1
				; WAVE64: bb.2:
				; WAVE64: S_NOP 0
				; WAVE32-LABEL: name: brcond_si_loop_brcond_back
				; WAVE32: bb.0:
				; WAVE32: successors: %bb.1(0x80000000)
				; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
				; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
				; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
	; WAVE32: bb.1:			; WAVE32: bb.1:
				; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; WAVE32: S_NOP 0
				; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE32: G_BR %bb.1
				; WAVE32: bb.2:
				; WAVE32: S_NOP 0
	bb.0:			bb.0:
	successors: %bb.1
	liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1			liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
	%0:_(s32) = COPY $vgpr0			%0:_(s32) = COPY $vgpr0
	%1:_(s32) = COPY $vgpr1			%1:_(s32) = COPY $vgpr1
	%2:_(s64) = COPY $sgpr0_sgpr1			%2:_(s64) = COPY $sgpr0_sgpr1

				bb.1:
				successors: %bb.1, %bb.2
				S_NOP 0
	%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2			%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
	G_BRCOND %3, %bb.1			G_BRCOND %3, %bb.1
				G_BR %bb.2

				bb.2:
				S_NOP 0
				...

				# This usage is backwards from how the intrinsic is supposed to be
				# used.
				---
				name: brcond_si_loop_brcond_back_fallthrough
				tracksRegLiveness: true
				body: \|
				; WAVE64-LABEL: name: brcond_si_loop_brcond_back_fallthrough
				; WAVE64: bb.0:
				; WAVE64: successors: %bb.1(0x80000000)
				; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
				; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
				; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
				; WAVE64: bb.1:
				; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; WAVE64: S_NOP 0
				; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE64: G_BR %bb.1
				; WAVE64: bb.2:
				; WAVE32-LABEL: name: brcond_si_loop_brcond_back_fallthrough
				; WAVE32: bb.0:
				; WAVE32: successors: %bb.1(0x80000000)
				; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
				; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
				; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
				; WAVE32: bb.1:
				; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; WAVE32: S_NOP 0
				; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
				; WAVE32: G_BR %bb.1
				; WAVE32: bb.2:
				bb.0:
				liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
				%0:_(s32) = COPY $vgpr0
				%1:_(s32) = COPY $vgpr1
				%2:_(s64) = COPY $sgpr0_sgpr1

	bb.1:			bb.1:
				successors: %bb.1, %bb.2
				S_NOP 0
				%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
				G_BRCOND %3, %bb.1

				bb.2:
	...			...

llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

	Show First 20 Lines • Show All 158 Lines • ▼ Show 20 Lines
	; GFX9: ; %bb.0: ; %entry			; GFX9: ; %bb.0: ; %entry
	; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GFX9-NEXT: v_and_b32_e32 v0, 1, v0			; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
	; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0			; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
	; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; GFX9-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; GFX9-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]			; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
	; GFX9-NEXT: s_cbranch_execnz BB2_2			; GFX9-NEXT: s_cbranch_execz BB2_2
	; GFX9-NEXT: ; %bb.1: ; %bb1			; GFX9-NEXT: ; %bb.1: ; %bb1
	; GFX9-NEXT: s_getpc_b64 s[6:7]			; GFX9-NEXT: s_getpc_b64 s[6:7]
	; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4			; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4
	; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+4			; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+4
	; GFX9-NEXT: v_mov_b32_e32 v0, s6			; GFX9-NEXT: v_mov_b32_e32 v0, s6
	; GFX9-NEXT: v_mov_b32_e32 v1, s7			; GFX9-NEXT: v_mov_b32_e32 v1, s7
	; GFX9-NEXT: v_mov_b32_e32 v2, 0			; GFX9-NEXT: v_mov_b32_e32 v2, 0
	; GFX9-NEXT: s_getpc_b64 s[6:7]			; GFX9-NEXT: s_getpc_b64 s[6:7]
	▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll

	Show All 10 Lines
	; CHECK-NEXT: v_or_b32_e32 v5, v1, v3			; CHECK-NEXT: v_or_b32_e32 v5, v1, v3
	; CHECK-NEXT: v_mov_b32_e32 v4, 0			; CHECK-NEXT: v_mov_b32_e32 v4, 0
	; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]			; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
	; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5			; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
	; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CHECK-NEXT: s_cbranch_execnz BB0_2			; CHECK-NEXT: s_cbranch_execz BB0_2
	; CHECK-NEXT: ; %bb.1:			; CHECK-NEXT: ; %bb.1:
	; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2			; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
	; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3			; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3
	; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2			; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
	; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc			; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
	; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5			; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
	; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4			; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
	; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4			; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
	▲ Show 20 Lines • Show All 613 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_or_b32_e32 v1, v9, v5			; CGP-NEXT: v_or_b32_e32 v1, v9, v5
	; CGP-NEXT: v_mov_b32_e32 v0, 0			; CGP-NEXT: v_mov_b32_e32 v0, 0
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1			; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB2_2			; CGP-NEXT: s_cbranch_execz BB2_2
	; CGP-NEXT: ; %bb.1:			; CGP-NEXT: ; %bb.1:
	; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4			; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
	; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5			; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5
	; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4			; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4
	; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc			; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc
	; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1			; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
	; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0			; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
	; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0			; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
	▲ Show 20 Lines • Show All 151 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_or_b32_e32 v5, v3, v7			; CGP-NEXT: v_or_b32_e32 v5, v3, v7
	; CGP-NEXT: v_mov_b32_e32 v4, 0			; CGP-NEXT: v_mov_b32_e32 v4, 0
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5			; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB2_6			; CGP-NEXT: s_cbranch_execz BB2_6
	; CGP-NEXT: ; %bb.5:			; CGP-NEXT: ; %bb.5:
	; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6			; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6
	; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7			; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7
	; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6			; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
	; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc			; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc
	; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5			; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
	; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4			; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
	; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4			; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
	▲ Show 20 Lines • Show All 1,487 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2			; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2
	; CHECK-NEXT: v_or_b32_e32 v7, v1, v5			; CHECK-NEXT: v_or_b32_e32 v7, v1, v5
	; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]			; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
	; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3			; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
	; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CHECK-NEXT: s_cbranch_execnz BB7_2			; CHECK-NEXT: s_cbranch_execz BB7_2
	; CHECK-NEXT: ; %bb.1:			; CHECK-NEXT: ; %bb.1:
	; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4			; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4
	; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5			; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5
	; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4			; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
	; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc			; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc
	; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3			; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
	; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2			; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
	; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2			; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
	▲ Show 20 Lines • Show All 432 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6			; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6
	; CGP-NEXT: v_or_b32_e32 v1, v7, v11			; CGP-NEXT: v_or_b32_e32 v1, v7, v11
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1			; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB8_2			; CGP-NEXT: s_cbranch_execz BB8_2
	; CGP-NEXT: ; %bb.1:			; CGP-NEXT: ; %bb.1:
	; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10			; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
	; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11			; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11
	; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10			; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10
	; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc			; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc
	; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1			; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
	; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0			; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
	; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0			; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
	▲ Show 20 Lines • Show All 151 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_or_b32_e32 v5, v3, v9			; CGP-NEXT: v_or_b32_e32 v5, v3, v9
	; CGP-NEXT: v_mov_b32_e32 v4, 0			; CGP-NEXT: v_mov_b32_e32 v4, 0
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5			; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB8_6			; CGP-NEXT: s_cbranch_execz BB8_6
	; CGP-NEXT: ; %bb.5:			; CGP-NEXT: ; %bb.5:
	; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8			; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8
	; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9			; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9
	; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8			; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8
	; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc			; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc
	; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5			; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
	; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4			; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
	; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4			; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
	▲ Show 20 Lines • Show All 519 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

	Show All 10 Lines
	; CHECK-NEXT: v_or_b32_e32 v5, v1, v3			; CHECK-NEXT: v_or_b32_e32 v5, v1, v3
	; CHECK-NEXT: v_mov_b32_e32 v4, 0			; CHECK-NEXT: v_mov_b32_e32 v4, 0
	; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]			; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
	; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5			; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
	; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CHECK-NEXT: s_cbranch_execnz BB0_2			; CHECK-NEXT: s_cbranch_execz BB0_2
	; CHECK-NEXT: ; %bb.1:			; CHECK-NEXT: ; %bb.1:
	; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2			; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
	; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3			; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3
	; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2			; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
	; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc			; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
	; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5			; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
	; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4			; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
	; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4			; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
	▲ Show 20 Lines • Show All 609 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_or_b32_e32 v1, v9, v5			; CGP-NEXT: v_or_b32_e32 v1, v9, v5
	; CGP-NEXT: v_mov_b32_e32 v0, 0			; CGP-NEXT: v_mov_b32_e32 v0, 0
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1			; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB2_2			; CGP-NEXT: s_cbranch_execz BB2_2
	; CGP-NEXT: ; %bb.1:			; CGP-NEXT: ; %bb.1:
	; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4			; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
	; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5			; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5
	; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4			; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4
	; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc			; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc
	; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1			; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
	; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0			; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
	; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0			; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
	▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_or_b32_e32 v5, v3, v7			; CGP-NEXT: v_or_b32_e32 v5, v3, v7
	; CGP-NEXT: v_mov_b32_e32 v4, 0			; CGP-NEXT: v_mov_b32_e32 v4, 0
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5			; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB2_6			; CGP-NEXT: s_cbranch_execz BB2_6
	; CGP-NEXT: ; %bb.5:			; CGP-NEXT: ; %bb.5:
	; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6			; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6
	; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7			; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7
	; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6			; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
	; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc			; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc
	; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5			; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
	; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4			; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
	; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4			; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
	▲ Show 20 Lines • Show All 1,466 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2			; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2
	; CHECK-NEXT: v_or_b32_e32 v7, v1, v5			; CHECK-NEXT: v_or_b32_e32 v7, v1, v5
	; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]			; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
	; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3			; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
	; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CHECK-NEXT: s_cbranch_execnz BB7_2			; CHECK-NEXT: s_cbranch_execz BB7_2
	; CHECK-NEXT: ; %bb.1:			; CHECK-NEXT: ; %bb.1:
	; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4			; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4
	; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5			; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5
	; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4			; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
	; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc			; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc
	; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3			; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
	; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2			; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
	; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2			; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
	▲ Show 20 Lines • Show All 429 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6			; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6
	; CGP-NEXT: v_or_b32_e32 v1, v7, v11			; CGP-NEXT: v_or_b32_e32 v1, v7, v11
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1			; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB8_2			; CGP-NEXT: s_cbranch_execz BB8_2
	; CGP-NEXT: ; %bb.1:			; CGP-NEXT: ; %bb.1:
	; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10			; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
	; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11			; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11
	; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10			; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10
	; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc			; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc
	; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1			; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
	; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0			; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
	; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0			; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
	▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines
	; CGP-NEXT: v_or_b32_e32 v5, v3, v9			; CGP-NEXT: v_or_b32_e32 v5, v3, v9
	; CGP-NEXT: v_mov_b32_e32 v4, 0			; CGP-NEXT: v_mov_b32_e32 v4, 0
	; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]			; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
	; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1			; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
	; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]			; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
	; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5			; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
	; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]			; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
	; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]			; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
	; CGP-NEXT: s_cbranch_execnz BB8_6			; CGP-NEXT: s_cbranch_execz BB8_6
	; CGP-NEXT: ; %bb.5:			; CGP-NEXT: ; %bb.5:
	; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8			; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8
	; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9			; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9
	; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8			; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8
	; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc			; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc
	; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5			; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
	; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4			; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
	; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4			; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
	▲ Show 20 Lines • Show All 520 Lines • Show Last 20 Lines