Diff 341004

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Show First 20 Lines • Show All 940 Lines • ▼ Show 20 Lines	public:

/// Legalize all operands in this instruction. This function may create new		/// Legalize all operands in this instruction. This function may create new
/// instructions and control-flow around \p MI. If present, \p MDT is		/// instructions and control-flow around \p MI. If present, \p MDT is
/// updated.		/// updated.
/// \returns A new basic block that contains \p MI if new blocks were created.		/// \returns A new basic block that contains \p MI if new blocks were created.
MachineBasicBlock *		MachineBasicBlock *
legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;		legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;

		/// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
		/// was moved to VGPR. \returns true if succeeded.
		bool moveFlatAddrToVALU(MachineInstr &Inst) const;

/// Replace this instruction's opcode with the equivalent VALU		/// Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the		/// opcode. This function will also move the users of \p MI to the
/// VALU if necessary. If present, \p MDT is updated.		/// VALU if necessary. If present, \p MDT is updated.
MachineBasicBlock *moveToVALU(MachineInstr &MI,		MachineBasicBlock *moveToVALU(MachineInstr &MI,
MachineDominatorTree *MDT = nullptr) const;		MachineDominatorTree *MDT = nullptr) const;

void insertNoop(MachineBasicBlock &MBB,		void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;		MachineBasicBlock::iterator MI) const override;
▲ Show 20 Lines • Show All 240 Lines • ▼ Show 20 Lines	namespace AMDGPU {

LLVM_READONLY		LLVM_READONLY
int getSOPKOp(uint16_t Opcode);		int getSOPKOp(uint16_t Opcode);

LLVM_READONLY		LLVM_READONLY
int getGlobalSaddrOp(uint16_t Opcode);		int getGlobalSaddrOp(uint16_t Opcode);

LLVM_READONLY		LLVM_READONLY
		int getGlobalVaddrOp(uint16_t Opcode);

		LLVM_READONLY
int getVCMPXNoSDstOp(uint16_t Opcode);		int getVCMPXNoSDstOp(uint16_t Opcode);

LLVM_READONLY		LLVM_READONLY
int getFlatScratchInstSTfromSS(uint16_t Opcode);		int getFlatScratchInstSTfromSS(uint16_t Opcode);

LLVM_READONLY		LLVM_READONLY
int getFlatScratchInstSSfromSV(uint16_t Opcode);		int getFlatScratchInstSSfromSV(uint16_t Opcode);

Show All 29 Lines

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,006 Lines • ▼ Show 20 Lines	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
}		}
MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);		MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {		if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);		Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
SOff->setReg(SGPR);		SOff->setReg(SGPR);
}		}
}		}

		bool SIInstrInfo::moveFlatAddrToVALU(MachineInstr &Inst) const {
		unsigned Opc = Inst.getOpcode();
		int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
		if (OldSAddrIdx < 0)
		return false;

		assert(isSegmentSpecificFLAT(Inst));

		int NewOpc = AMDGPU::getGlobalVaddrOp(Opc);
		if (NewOpc < 0)
		return false;

		MachineRegisterInfo &MRI = Inst.getParent()->getParent()->getRegInfo();
		foadUnsubmitted Done Reply Inline Actions Use `Inst.getMF()` ? foad: Use `Inst.getMF()` ?
		MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx);
		if (RI.isSGPRReg(MRI, SAddr.getReg()))
		return false;

		int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
		if (NewVAddrIdx < 0)
		return false;

		int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
		assert(OldVAddrIdx >= 0);
		Joe_NashUnsubmitted Done Reply Inline Actions I don't understand why this assert is true. Does some previous check guarantee that? Other than that LGTM, but please wait for @arsenm Joe_Nash: I don't understand why this assert is true. Does some previous check guarantee that? Other than…
		rampitecAuthorUnsubmitted Done Reply Inline Actions All flat global SADDR instructions have $vaddr component and other instructions cannot get to this point as getGlobalVaddrOp() will return -1. This changes in the followup D101408 which can also process flat scratch. rampitec: All flat global SADDR instructions have $vaddr component and other instructions cannot get to…
		Joe_NashUnsubmitted Done Reply Inline Actions I see it now, thanks. Joe_Nash: I see it now, thanks.

		// Check vaddr, it shall be zero
		MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx);
		MachineInstr *Def = MRI.getUniqueVRegDef(VAddr.getReg());
		if (!Def \|\| Def->getOpcode() != AMDGPU::V_MOV_B32_e32 \|\|
		!Def->getOperand(1).isImm() \|\| Def->getOperand(1).getImm() != 0)
		return false;

		const MCInstrDesc &NewDesc = get(NewOpc);
		Inst.setDesc(NewDesc);

		if (OldVAddrIdx == NewVAddrIdx) {
		assert(OldVAddrIdx == NewVAddrIdx);
		foadUnsubmitted Done Reply Inline Actions Redundant assert, you've just tested this condition. foad: Redundant assert, you've just tested this condition.
		MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx);
		MRI.removeRegOperandFromUseList(&NewVAddr);
		MRI.moveOperands(&NewVAddr, &SAddr, 1);
		Inst.RemoveOperand(OldSAddrIdx);
		} else {
		assert(OldSAddrIdx == NewVAddrIdx);
		foadUnsubmitted Done Reply Inline Actions Are these last two lines really necessary? Hasn't MRI.moveOperands already handled this? foad: Are these last two lines really necessary? Hasn't MRI.moveOperands already handled this?
		rampitecAuthorUnsubmitted Done Reply Inline Actions It did not. I have added the comment. rampitec: It did not. I have added the comment.
		Inst.RemoveOperand(OldVAddrIdx);
		}

		return true;
		arsenmUnsubmitted Done Reply Inline Actions Probably should add a comment explaining why this nightmare is here arsenm: Probably should add a comment explaining why this nightmare is here
		}

		foadUnsubmitted Done Reply Inline Actions VAddrDef can't be 0 here, that was already checked earlier. foad: VAddrDef can't be 0 here, that was already checked earlier.
		rampitecAuthorUnsubmitted Done Reply Inline Actions Right, this is a part of D101408. Removed from this patch. rampitec: Right, this is a part of D101408. Removed from this patch.
// FIXME: Remove this when SelectionDAG is obsoleted.		// FIXME: Remove this when SelectionDAG is obsoleted.
		arsenmUnsubmitted Done Reply Inline Actions At this point wouldn't it be simpler to just create a fresh new instruction and delete the old one? arsenm: At this point wouldn't it be simpler to just create a fresh new instruction and delete the old…
		rampitecAuthorUnsubmitted Done Reply Inline Actions Unfortunately callers expect iterator to be intact. Otherwise probably yes, although it would not be less code. rampitec: Unfortunately callers expect iterator to be intact. Otherwise probably yes, although it would…
		arsenmUnsubmitted Done Reply Inline Actions I think in some contexts in SIFixSGPRCopies the original instruction is erased, but it's a bit of a mess arsenm: I think in some contexts in SIFixSGPRCopies the original instruction is erased, but it's a bit…
void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,		void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
MachineInstr &MI) const {		MachineInstr &MI) const {
if (!isSegmentSpecificFLAT(MI))		if (!isSegmentSpecificFLAT(MI))
return;		return;

// Fixup SGPR operands in VGPRs. We only select these when the DAG divergence		// Fixup SGPR operands in VGPRs. We only select these when the DAG divergence
// thinks they are uniform, so a readfirstlane should be valid.		// thinks they are uniform, so a readfirstlane should be valid.
MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr);		MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr);
if (!SAddr \|\| RI.isSGPRClass(MRI.getRegClass(SAddr->getReg())))		if (!SAddr \|\| RI.isSGPRClass(MRI.getRegClass(SAddr->getReg())))
return;		return;

		if (moveFlatAddrToVALU(MI))
		return;

Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);		Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
SAddr->setReg(ToSGPR);		SAddr->setReg(ToSGPR);
}		}

void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,		void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
MachineBasicBlock::iterator I,		MachineBasicBlock::iterator I,
const TargetRegisterClass *DstRC,		const TargetRegisterClass *DstRC,
MachineOperand &Op,		MachineOperand &Op,
▲ Show 20 Lines • Show All 2,690 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstrInfo.td

	Show First 20 Lines • Show All 2,499 Lines • ▼ Show 20 Lines
	def getGlobalSaddrOp : InstrMapping {			def getGlobalSaddrOp : InstrMapping {
	let FilterClass = "GlobalSaddrTable";			let FilterClass = "GlobalSaddrTable";
	let RowFields = ["SaddrOp"];			let RowFields = ["SaddrOp"];
	let ColFields = ["IsSaddr"];			let ColFields = ["IsSaddr"];
	let KeyCol = ["0"];			let KeyCol = ["0"];
	let ValueCols = [["1"]];			let ValueCols = [["1"]];
	}			}

				// Maps a GLOBAL SADDR to its VADDR form.
				def getGlobalVaddrOp : InstrMapping {
				let FilterClass = "GlobalSaddrTable";
				let RowFields = ["SaddrOp"];
				let ColFields = ["IsSaddr"];
				let KeyCol = ["1"];
				let ValueCols = [["0"]];
				}

	// Maps a v_cmpx opcode with sdst to opcode without sdst.			// Maps a v_cmpx opcode with sdst to opcode without sdst.
	def getVCMPXNoSDstOp : InstrMapping {			def getVCMPXNoSDstOp : InstrMapping {
	let FilterClass = "VCMPXNoSDstTable";			let FilterClass = "VCMPXNoSDstTable";
	let RowFields = ["NoSDstOp"];			let RowFields = ["NoSDstOp"];
	let ColFields = ["HasSDst"];			let ColFields = ["HasSDst"];
	let KeyCol = ["1"];			let KeyCol = ["1"];
	let ValueCols = [["0"]];			let ValueCols = [["0"]];
	}			}
	Show All 31 Lines

llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s \| FileCheck --check-prefix=GCN %s

				---
				name: global_load_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_load_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
				; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				; GCN: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
				arsenmUnsubmitted Done Reply Inline Actions Can you also add an IR test where this matters? I'm not understanding why the DAG divergence analysis would let this happen arsenm: Can you also add an IR test where this matters? I'm not understanding why the DAG divergence…
				rampitecAuthorUnsubmitted Done Reply Inline Actions The test case shall be more than 100 instructions long, this is the threshold for memory dependency analysis to give up on the "noclobber" check. The divergence analysis tells it is ok, it is uniform. But noclobber is not set, so the address registers are not known not to be clobberd and we are here. It really happens in large basic blocks. Probably it can also happen if we just had no SALU instructions to do some computations, so ended up with VALU which was pripagated, but that is not the case I have started with. I.e. it is really uniform, and readfirstlane is valid, but we don't want to issue it. I am not really sure we want a test of that size. It will be more than obscure in the first place. rampitec: The test case shall be more than 100 instructions long, this is the threshold for memory…
				foadUnsubmitted Done Reply Inline Actions You could test with -memdep-block-scan-limit=1 ? foad: You could test with -memdep-block-scan-limit=1 ?
				rampitecAuthorUnsubmitted Done Reply Inline Actions I will check if I can produce a reasonable IR test. rampitec: I will check if I can produce a reasonable IR test.
				arsenmUnsubmitted Not Done Reply Inline Actions Whether or not this can be converted to SMRD is a different problem arsenm: Whether or not this can be converted to SMRD is a different problem
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: global_load_saddr_to_valu_non_zero_vaddr
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_load_saddr_to_valu_non_zero_vaddr
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
				; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
				; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
				; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
				; GCN: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
				%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...


				---
				name: global_load_saddr_to_valu_undef_vaddr
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_load_saddr_to_valu_undef_vaddr
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
				; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
				; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
				; GCN: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: global_store_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_store_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
				; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
				; GCN: GLOBAL_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				%4:vgpr_32 = IMPLICIT_DEF
				GLOBAL_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: global_addtid_load_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_addtid_load_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
				; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
				; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
				; GCN: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%4:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR %1, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: global_store_addtid_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_store_addtid_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
				; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
				; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
				; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
				; GCN: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%4:vgpr_32 = IMPLICIT_DEF
				GLOBAL_STORE_DWORD_ADDTID_SADDR %4, %1, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: global_atomic_noret_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_atomic_noret_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
				; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				; GCN: GLOBAL_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				GLOBAL_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: global_atomic_rtn_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: global_atomic_rtn_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0_vgpr1
				; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
				; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				; GCN: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
				; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
				; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
				; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
				; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0_vgpr1
				%0:sreg_64 = COPY $vgpr0_vgpr1

				bb.1:
				%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
				%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
				%4:vgpr_32 = GLOBAL_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec
				%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
				S_CMP_LG_U64 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: scratch_load_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: scratch_load_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0
				; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
				; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec
				; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
				; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0
				%0:sgpr_32 = COPY $vgpr0

				bb.1:
				%1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1
				%4:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %1, 0, 0, implicit $exec, implicit $flat_scr
				%2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc
				S_CMP_LG_U32 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

				---
				name: scratch_store_saddr_to_valu
				tracksRegLiveness: true
				body: \|
				; GCN-LABEL: name: scratch_store_saddr_to_valu
				; GCN: bb.0:
				; GCN: successors: %bb.1(0x80000000)
				; GCN: liveins: $vgpr0
				; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
				; GCN: bb.1:
				; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
				; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
				; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
				; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec
				; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr
				; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
				; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec
				; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
				; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
				; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
				; GCN: bb.2:
				; GCN: S_ENDPGM 0
				bb.0:
				liveins: $vgpr0
				%0:sgpr_32 = COPY $vgpr0

				bb.1:
				%1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1
				%4:vgpr_32 = IMPLICIT_DEF
				SCRATCH_STORE_DWORD_SADDR %4, %1, 0, 0, implicit $exec, implicit $flat_scr
				%2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc
				S_CMP_LG_U32 %2, 0, implicit-def $scc
				S_CBRANCH_SCC1 %bb.1, implicit $scc

				bb.2:
				S_ENDPGM 0
				...

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Change FLAT SADDR to VADDR form in moveToVALU
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 341004

llvm/lib/Target/AMDGPU/SIInstrInfo.h

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

llvm/lib/Target/AMDGPU/SIInstrInfo.td

llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Change FLAT SADDR to VADDR form in moveToVALUClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 341004

llvm/lib/Target/AMDGPU/SIInstrInfo.h

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

llvm/lib/Target/AMDGPU/SIInstrInfo.td

llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir

[AMDGPU] Change FLAT SADDR to VADDR form in moveToVALU
ClosedPublic