This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix FoldImmediate for 16 bit operand
ClosedPublic

Authored by rampitec on May 4 2020, 12:52 PM.

Download Raw Diff

Details

Reviewers

arsenm
vpykhtin

Commits

rG9ef166e65748: [AMDGPU] Fix FoldImmediate for 16 bit operand

Diff Detail

Event Timeline

rampitec created this revision.May 4 2020, 12:52 PM

Herald added a project: Restricted Project. · View Herald TranscriptMay 4 2020, 12:52 PM

Herald added subscribers: kerbowa, hiraditya, t-tye and 7 others. · View Herald Transcript

arsenm added inline comments.May 4 2020, 1:45 PM

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
2514	I think just checking op 0 is sufficient
llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
1	Probably should generate these checks
12	Adding a use instruction for all of these wouldn't hurt in case peephole-opt ever decides to prune dead instructions
109	Needs test with skipped hi16

Addressed review comments.
Added processing of hi16 src subreg, it is just an imm shift.

Always drop target flags on source operand. SubReg and TargetFlags share the same field changing the meaning depending on the operand type. If I do not drop it I get something like:

S_MOV_B32 target-flags(amdgpu-gotprel32-lo) 2048

Technically it could happen with 32 subregs as well, although does not seem to happen.

LGTM.

This revision is now accepted and ready to land.May 5 2020, 1:46 AM

Closed by commit rG9ef166e65748: [AMDGPU] Fix FoldImmediate for 16 bit operand (authored by rampitec). · Explain WhyMay 5 2020, 10:47 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

SIInstrInfo.h

6 lines

SIInstrInfo.cpp

26 lines

test/

CodeGen/

AMDGPU/

fold_16bit_imm.mir

109 lines

Diff 261912

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Show First 20 Lines • Show All 821 Lines • ▼ Show 20 Lines	public:
}		}

/// This form should usually be preferred since it handles operands		/// This form should usually be preferred since it handles operands
/// with unknown register classes.		/// with unknown register classes.
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {		unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);		const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) {		if (MO.isReg()) {
if (unsigned SubReg = MO.getSubReg()) {		if (unsigned SubReg = MO.getSubReg()) {
assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(		return RI.getSubRegIdxSize(SubReg) / 8;
MI.getParent()->getParent()->getRegInfo().
getRegClass(MO.getReg()), SubReg)) >= 32 &&
"Sub-dword subregs are not supported");
return RI.getNumChannelsFromSubReg(SubReg) * 4;
}		}
}		}
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;		return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
}		}

/// Legalize the \p OpIndex operand of this instruction by inserting		/// Legalize the \p OpIndex operand of this instruction by inserting
/// a MOV. For example:		/// a MOV. For example:
/// ADD_I32_e32 VGPR0, 15		/// ADD_I32_e32 VGPR0, 15
▲ Show 20 Lines • Show All 335 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Show First 20 Lines • Show All 2,503 Lines • ▼ Show 20 Lines	bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);		const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
assert(ImmOp);		assert(ImmOp);
// FIXME: We could handle FrameIndex values here.		// FIXME: We could handle FrameIndex values here.
if (!ImmOp->isImm())		if (!ImmOp->isImm())
return false;		return false;

unsigned Opc = UseMI.getOpcode();		unsigned Opc = UseMI.getOpcode();
if (Opc == AMDGPU::COPY) {		if (Opc == AMDGPU::COPY) {
bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());		Register DstReg = UseMI.getOperand(0).getReg();
		Register SrcReg = UseMI.getOperand(1).getReg();
		bool Is16Bit = getOpSize(UseMI, 0) == 2 \|\| getOpSize(UseMI, 1) == 2;
		arsenmUnsubmitted Done Reply Inline Actions I think just checking op 0 is sufficient arsenm: I think just checking op 0 is sufficient
		bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;		unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
		if (RI.isAGPR(*MRI, DstReg)) {
if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))		if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
return false;		return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;		NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
}		}

		if (Is16Bit) {
		if (isVGPRCopy)
		return false; // Do not clobber vgpr_hi16

		if (DstReg.isVirtual() &&
		UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
		return false;

		UseMI.getOperand(0).setSubReg(0);
		UseMI.getOperand(1).setSubReg(0);
		if (DstReg.isPhysical()) {
		DstReg = RI.get32BitRegister(DstReg);
		UseMI.getOperand(0).setReg(DstReg);
		}
		assert(SrcReg.isVirtual());
		}

UseMI.setDesc(get(NewOpc));		UseMI.setDesc(get(NewOpc));
UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());		UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());		UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;		return true;
}		}

if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|		if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|
Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64 \|\|		Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64 \|\|
▲ Show 20 Lines • Show All 4,385 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir

This file was added.

				# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s \| FileCheck -check-prefix=GCN %s
				arsenmUnsubmitted Done Reply Inline Actions Probably should generate these checks arsenm: Probably should generate these checks

				# GCN-LABEL: name: fold_simm_16_sub_to_lo
				# GCN: %1:sgpr_lo16 = COPY killed %0.lo16
				---
				name: fold_simm_16_sub_to_lo
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				%1:sgpr_lo16 = COPY killed %0.lo16

				arsenmUnsubmitted Done Reply Inline Actions Adding a use instruction for all of these wouldn't hurt in case peephole-opt ever decides to prune dead instructions arsenm: Adding a use instruction for all of these wouldn't hurt in case peephole-opt ever decides to…
				...

				# GCN-LABEL: name: fold_simm_16_sub_to_sub
				# GCN: %1:sreg_32 = S_MOV_B32 2048
				---
				name: fold_simm_16_sub_to_sub
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:sreg_32 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_simm_16_sub_to_phys
				# GCN: $sgpr0 = S_MOV_B32 2048
				---
				name: fold_simm_16_sub_to_phys
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				$sgpr0_lo16 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048
				# GCN: %1.lo16:agpr_32 = COPY killed %0.lo16
				---
				name: fold_aimm_16_sub_to_sub_2048
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:agpr_32 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_aimm_16_sub_to_sub_0
				# GCN: %1:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
				---
				name: fold_aimm_16_sub_to_sub_0
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 0
				%1.lo16:agpr_32 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_aimm_16_sub_to_phys
				# GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec
				---
				name: fold_aimm_16_sub_to_phys
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 0
				$agpr0_lo16 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_vimm_16_sub_to_lo
				# GCN: %1:vgpr_lo16 = COPY killed %0.lo16
				---
				name: fold_vimm_16_sub_to_lo
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				%1:vgpr_lo16 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_vimm_16_sub_to_sub
				# GCN: %1.lo16:vgpr_32 = COPY killed %0.lo16
				---
				name: fold_vimm_16_sub_to_sub
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:vgpr_32 = COPY killed %0.lo16

				...

				# GCN-LABEL: name: fold_vimm_16_sub_to_phys
				# GCN: $vgpr0_lo16 = COPY killed %0.lo16
				---
				name: fold_vimm_16_sub_to_phys
				body: \|
				bb.0:

				%0:sreg_32 = S_MOV_B32 2048
				$vgpr0_lo16 = COPY killed %0.lo16

				...
				arsenmUnsubmitted Done Reply Inline Actions Needs test with skipped hi16 arsenm: Needs test with skipped hi16