This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies
ClosedPublic

Authored by • tstellarAMD on Aug 11 2016, 5:08 AM.

Download Raw Diff

Details

Reviewers

Commits

rG2add8a114054: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies
rL280744: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies

Summary

I put this code here, because I want to re-use it in a few other places.
This supersedes some of the immediate folding code we have in SIFoldOperands.
I think the peephole optimizers is probably a better place for folding
immediates into copies, since it does some register coalescing in the same time.

This will also make it easier to transition SIFoldOperands into a smarter pass,
where it looks at all uses of instruction at once to determine the optimal way to
fold operands. Right now, the pass just considers one operand at a time.

Diff Detail

Repository: rL LLVM

Event Timeline

• tstellarAMD updated this revision to Diff 67675.Aug 11 2016, 5:08 AM

• tstellarAMD retitled this revision from to AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies.

• tstellarAMD updated this object.

• tstellarAMD added a reviewer: arsenm.

• tstellarAMD added a subscriber: llvm-commits.

Herald added subscribers: kzhuravl, arsenm. · View Herald TranscriptAug 11 2016, 5:08 AM

arsenm added inline comments.Aug 11 2016, 11:31 AM

lib/Target/AMDGPU/SIInstrInfo.cpp
1225–1226 ↗	(On Diff #67675)	I don't think this supports modifiers. I don't think there's any reason to ever use the v_mov_b32 as vop3 so it can probably just be skipped

arsenm added inline comments.Sep 1 2016, 3:38 PM

lib/Target/AMDGPU/SIInstrFormats.td
296–297 ↗	(On Diff #67675)	This isn't necessary here since they aren't used for any SOP instructions here

Rebase and drop code for folding v_mov_b32_e64.

Herald added subscribers: nhaehnle, wdng. · View Herald TranscriptSep 6 2016, 9:32 AM

LGTM

This revision is now accepted and ready to land.Sep 6 2016, 9:37 AM

Closed by commit rL280744: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies (authored by tstellar). · Explain WhySep 6 2016, 1:08 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

AMDGPU/

SIInstrInfo.cpp

29 lines

SOPInstructions.td

3 lines

Diff 70461

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp

Show First 20 Lines • Show All 1,178 Lines • ▼ Show 20 Lines	static void removeModOperands(MachineInstr &MI) {
int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,		int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::src2_modifiers);		AMDGPU::OpName::src2_modifiers);

MI.RemoveOperand(Src2ModIdx);		MI.RemoveOperand(Src2ModIdx);
MI.RemoveOperand(Src1ModIdx);		MI.RemoveOperand(Src1ModIdx);
MI.RemoveOperand(Src0ModIdx);		MI.RemoveOperand(Src0ModIdx);
}		}

// TODO: Maybe this should be removed this and custom fold everything in
// SIFoldOperands?
bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,		bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const {		unsigned Reg, MachineRegisterInfo *MRI) const {
if (!MRI->hasOneNonDBGUse(Reg))		if (!MRI->hasOneNonDBGUse(Reg))
return false;		return false;

unsigned Opc = UseMI.getOpcode();		unsigned Opc = UseMI.getOpcode();
		if (Opc == AMDGPU::COPY) {
		bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
		switch (DefMI.getOpcode()) {
		default:
		return false;
		case AMDGPU::S_MOV_B64:
		// TODO: We could fold 64-bit immediates, but this get compilicated
		// when there are sub-registers.
		return false;

		case AMDGPU::V_MOV_B32_e32:
		case AMDGPU::S_MOV_B32:
		break;
		}
		unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
		const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
		assert(ImmOp);
		// FIXME: We could handle FrameIndex values here.
		if (!ImmOp->isImm()) {
		return false;
		}
		UseMI.setDesc(get(NewOpc));
		UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
		UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
		return true;
		}

if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {		if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {
// Don't fold if we are using source modifiers. The new VOP2 instructions		// Don't fold if we are using source modifiers. The new VOP2 instructions
// don't have them.		// don't have them.
if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) \|\|		if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) \|\|
hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) \|\|		hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) \|\|
hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {		hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {
return false;		return false;
}		}
▲ Show 20 Lines • Show All 2,016 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td

Show All 19 Lines	class SOP1_Pseudo <string opName, dag outs, dag ins,
let SubtargetPredicate = isGCN;		let SubtargetPredicate = isGCN;

let mayLoad = 0;		let mayLoad = 0;
let mayStore = 0;		let mayStore = 0;
let hasSideEffects = 0;		let hasSideEffects = 0;
let SALU = 1;		let SALU = 1;
let SOP1 = 1;		let SOP1 = 1;
let SchedRW = [WriteSALU];		let SchedRW = [WriteSALU];
		let UseNamedOperandTable = 1;

string Mnemonic = opName;		string Mnemonic = opName;
string AsmOperands = asmOps;		string AsmOperands = asmOps;

bits<1> has_src0 = 1;		bits<1> has_src0 = 1;
bits<1> has_sdst = 1;		bits<1> has_sdst = 1;
}		}

▲ Show 20 Lines • Show All 1,059 Lines • ▼ Show 20 Lines
def S_CMPK_LE_U32_vi : SOPK_Real_vi <0x0D, S_CMPK_LE_U32>;		def S_CMPK_LE_U32_vi : SOPK_Real_vi <0x0D, S_CMPK_LE_U32>;
def S_ADDK_I32_vi : SOPK_Real_vi <0x0E, S_ADDK_I32>;		def S_ADDK_I32_vi : SOPK_Real_vi <0x0E, S_ADDK_I32>;
def S_MULK_I32_vi : SOPK_Real_vi <0x0F, S_MULK_I32>;		def S_MULK_I32_vi : SOPK_Real_vi <0x0F, S_MULK_I32>;
def S_CBRANCH_I_FORK_vi : SOPK_Real_vi <0x10, S_CBRANCH_I_FORK>;		def S_CBRANCH_I_FORK_vi : SOPK_Real_vi <0x10, S_CBRANCH_I_FORK>;
def S_GETREG_B32_vi : SOPK_Real_vi <0x11, S_GETREG_B32>;		def S_GETREG_B32_vi : SOPK_Real_vi <0x11, S_GETREG_B32>;
def S_SETREG_B32_vi : SOPK_Real_vi <0x12, S_SETREG_B32>;		def S_SETREG_B32_vi : SOPK_Real_vi <0x12, S_SETREG_B32>;
//def S_GETREG_REGRD_B32_vi : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments		//def S_GETREG_REGRD_B32_vi : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments
def S_SETREG_IMM32_B32_vi : SOPK_Real64<0x14, S_SETREG_IMM32_B32>,		def S_SETREG_IMM32_B32_vi : SOPK_Real64<0x14, S_SETREG_IMM32_B32>,
Select_vi<S_SETREG_IMM32_B32.Mnemonic>;		Select_vi<S_SETREG_IMM32_B32.Mnemonic>;
No newline at end of file