This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix FoldImmediate for 16 bit operand
ClosedPublic

Authored by rampitec on May 4 2020, 12:52 PM.

Download Raw Diff

Details

Reviewers

arsenm
vpykhtin

Commits

rG9ef166e65748: [AMDGPU] Fix FoldImmediate for 16 bit operand

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

rampitec created this revision.May 4 2020, 12:52 PM

Herald added a project: Restricted Project. · View Herald TranscriptMay 4 2020, 12:52 PM

Herald added subscribers: kerbowa, hiraditya, t-tye and 7 others. · View Herald Transcript

arsenm added inline comments.May 4 2020, 1:45 PM

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
2514	I think just checking op 0 is sufficient
llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
2	Probably should generate these checks
13	Adding a use instruction for all of these wouldn't hurt in case peephole-opt ever decides to prune dead instructions
110	Needs test with skipped hi16

Addressed review comments.
Added processing of hi16 src subreg, it is just an imm shift.

Always drop target flags on source operand. SubReg and TargetFlags share the same field changing the meaning depending on the operand type. If I do not drop it I get something like:

S_MOV_B32 target-flags(amdgpu-gotprel32-lo) 2048

Technically it could happen with 32 subregs as well, although does not seem to happen.

LGTM.

This revision is now accepted and ready to land.May 5 2020, 1:46 AM

Closed by commit rG9ef166e65748: [AMDGPU] Fix FoldImmediate for 16 bit operand (authored by rampitec). · Explain WhyMay 5 2020, 10:47 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

SIInstrInfo.h

6 lines

SIInstrInfo.cpp

34 lines

test/

CodeGen/

AMDGPU/

fold_16bit_imm.mir

257 lines

Diff 262164

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Show First 20 Lines • Show All 821 Lines • ▼ Show 20 Lines	public:
}		}

/// This form should usually be preferred since it handles operands		/// This form should usually be preferred since it handles operands
/// with unknown register classes.		/// with unknown register classes.
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {		unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);		const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) {		if (MO.isReg()) {
if (unsigned SubReg = MO.getSubReg()) {		if (unsigned SubReg = MO.getSubReg()) {
assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(		return RI.getSubRegIdxSize(SubReg) / 8;
MI.getParent()->getParent()->getRegInfo().
getRegClass(MO.getReg()), SubReg)) >= 32 &&
"Sub-dword subregs are not supported");
return RI.getNumChannelsFromSubReg(SubReg) * 4;
}		}
}		}
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;		return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
}		}

/// Legalize the \p OpIndex operand of this instruction by inserting		/// Legalize the \p OpIndex operand of this instruction by inserting
/// a MOV. For example:		/// a MOV. For example:
/// ADD_I32_e32 VGPR0, 15		/// ADD_I32_e32 VGPR0, 15
▲ Show 20 Lines • Show All 335 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Show First 20 Lines • Show All 2,503 Lines • ▼ Show 20 Lines	bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);		const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
assert(ImmOp);		assert(ImmOp);
// FIXME: We could handle FrameIndex values here.		// FIXME: We could handle FrameIndex values here.
if (!ImmOp->isImm())		if (!ImmOp->isImm())
return false;		return false;

unsigned Opc = UseMI.getOpcode();		unsigned Opc = UseMI.getOpcode();
if (Opc == AMDGPU::COPY) {		if (Opc == AMDGPU::COPY) {
bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());		Register DstReg = UseMI.getOperand(0).getReg();
		Register SrcReg = UseMI.getOperand(1).getReg();
		bool Is16Bit = getOpSize(UseMI, 0) == 2;
		arsenmUnsubmitted Done Reply Inline Actions I think just checking op 0 is sufficient arsenm: I think just checking op 0 is sufficient
		bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;		unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {		APInt Imm(32, ImmOp->getImm());
if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
		if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
		Imm = Imm.ashr(16);

		if (RI.isAGPR(*MRI, DstReg)) {
		if (!isInlineConstant(Imm))
return false;		return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;		NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
}		}

		if (Is16Bit) {
		if (isVGPRCopy)
		return false; // Do not clobber vgpr_hi16

		if (DstReg.isVirtual() &&
		UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
		return false;

		UseMI.getOperand(0).setSubReg(0);
		if (DstReg.isPhysical()) {
		DstReg = RI.get32BitRegister(DstReg);
		UseMI.getOperand(0).setReg(DstReg);
		}
		assert(SrcReg.isVirtual());
		}

UseMI.setDesc(get(NewOpc));		UseMI.setDesc(get(NewOpc));
UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());		UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
		UseMI.getOperand(1).setTargetFlags(0);
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());		UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;		return true;
}		}

if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|		if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|
Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64 \|\|		Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64 \|\|
Opc == AMDGPU::V_FMA_F32 \|\| Opc == AMDGPU::V_FMAC_F32_e64 \|\|		Opc == AMDGPU::V_FMA_F32 \|\| Opc == AMDGPU::V_FMAC_F32_e64 \|\|
Opc == AMDGPU::V_FMA_F16 \|\| Opc == AMDGPU::V_FMAC_F16_e64) {		Opc == AMDGPU::V_FMA_F16 \|\| Opc == AMDGPU::V_FMAC_F16_e64) {
▲ Show 20 Lines • Show All 4,383 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s \| FileCheck -check-prefix=GCN %s
				arsenmUnsubmitted Done Reply Inline Actions Probably should generate these checks arsenm: Probably should generate these checks

				---
				name: fold_simm_16_sub_to_lo
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_simm_16_sub_to_lo
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG [[COPY]]
				%0:sreg_32 = S_MOV_B32 2048
				arsenmUnsubmitted Done Reply Inline Actions Adding a use instruction for all of these wouldn't hurt in case peephole-opt ever decides to prune dead instructions arsenm: Adding a use instruction for all of these wouldn't hurt in case peephole-opt ever decides to…
				%1:sgpr_lo16 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_simm_16_sub_to_sub
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_simm_16_sub_to_sub
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:sreg_32 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_simm_16_sub_to_phys
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_simm_16_sub_to_phys
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: $sgpr0 = S_MOV_B32 2048
				; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16
				%0:sreg_32 = S_MOV_B32 2048
				$sgpr0_lo16 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG $sgpr0_lo16

				...

				---
				name: fold_aimm_16_sub_to_sub_2048
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG %1
				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:agpr_32 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_aimm_16_sub_to_sub_0
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
				; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
				; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
				%0:sreg_32 = S_MOV_B32 0
				%1.lo16:agpr_32 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_aimm_16_sub_to_phys
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_aimm_16_sub_to_phys
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
				; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec
				; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16
				%0:sreg_32 = S_MOV_B32 0
				$agpr0_lo16 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG $agpr0_lo16

				...

				---
				name: fold_vimm_16_sub_to_lo
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_vimm_16_sub_to_lo
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG [[COPY]]
				%0:sreg_32 = S_MOV_B32 2048
				%1:vgpr_lo16 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_vimm_16_sub_to_sub
				arsenmUnsubmitted Done Reply Inline Actions Needs test with skipped hi16 arsenm: Needs test with skipped hi16
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_vimm_16_sub_to_sub
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG %1
				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:vgpr_32 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_vimm_16_sub_to_phys
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_vimm_16_sub_to_phys
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16
				%0:sreg_32 = S_MOV_B32 2048
				$vgpr0_lo16 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG $vgpr0_lo16

				...

				---
				name: fold_vimm_16_lo_to_hi
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_vimm_16_lo_to_hi
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG %1
				%0:sreg_32 = S_MOV_B32 2048
				%1.hi16:vgpr_32 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_vimm_16_hi_to_lo
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_vimm_16_hi_to_lo
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16
				; GCN: SI_RETURN_TO_EPILOG %1
				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:vgpr_32 = COPY killed %0.hi16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_simm_16_sub_to_sub_lo_to_hi
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16
				; GCN: SI_RETURN_TO_EPILOG %1
				%0:sreg_32 = S_MOV_B32 2048
				%1.hi16:sreg_32 = COPY killed %0.lo16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_simm_16_sub_to_sub_hi_to_lo_2048
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
				; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:sreg_32 = COPY killed %0.hi16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
				; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
				%0:sreg_32 = S_MOV_B32 134217728
				%1.lo16:sreg_32 = COPY killed %0.hi16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
				; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
				; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
				%0:sreg_32 = S_MOV_B32 2048
				%1.lo16:agpr_32 = COPY killed %0.hi16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536
				; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec
				; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
				%0:sreg_32 = S_MOV_B32 65536
				%1.lo16:agpr_32 = COPY killed %0.hi16
				SI_RETURN_TO_EPILOG %1

				...

				---
				name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
				body: \|
				bb.0:

				; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
				; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
				; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16
				; GCN: SI_RETURN_TO_EPILOG %1
				%0:sreg_32 = S_MOV_B32 134217728
				%1.lo16:agpr_32 = COPY killed %0.hi16
				SI_RETURN_TO_EPILOG %1

				...