This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies
ClosedPublic

Authored by • tstellarAMD on Aug 11 2016, 5:08 AM.

Download Raw Diff

Details

Reviewers

Commits

rG2add8a114054: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies
rL280744: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies

Summary

I put this code here, because I want to re-use it in a few other places.
This supersedes some of the immediate folding code we have in SIFoldOperands.
I think the peephole optimizers is probably a better place for folding
immediates into copies, since it does some register coalescing in the same time.

This will also make it easier to transition SIFoldOperands into a smarter pass,
where it looks at all uses of instruction at once to determine the optimal way to
fold operands. Right now, the pass just considers one operand at a time.

Diff Detail

Event Timeline

• tstellarAMD updated this revision to Diff 67675.Aug 11 2016, 5:08 AM

• tstellarAMD retitled this revision from to AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies.

• tstellarAMD updated this object.

• tstellarAMD added a reviewer: arsenm.

• tstellarAMD added a subscriber: llvm-commits.

Herald added subscribers: kzhuravl, arsenm. · View Herald TranscriptAug 11 2016, 5:08 AM

arsenm added inline comments.Aug 11 2016, 11:31 AM

lib/Target/AMDGPU/SIInstrInfo.cpp
1225–1226	I don't think this supports modifiers. I don't think there's any reason to ever use the v_mov_b32 as vop3 so it can probably just be skipped

arsenm added inline comments.Sep 1 2016, 3:38 PM

lib/Target/AMDGPU/SIInstrFormats.td
296–297	This isn't necessary here since they aren't used for any SOP instructions here

Rebase and drop code for folding v_mov_b32_e64.

Herald added subscribers: nhaehnle, wdng. · View Herald TranscriptSep 6 2016, 9:32 AM

LGTM

This revision is now accepted and ready to land.Sep 6 2016, 9:37 AM

Closed by commit rL280744: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies (authored by tstellar). · Explain WhySep 6 2016, 1:08 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Target/

AMDGPU/

SIInstrFormats.td

2 lines

SIInstrInfo.cpp

32 lines

Diff 67675

lib/Target/AMDGPU/SIInstrFormats.td

	Show First 20 Lines • Show All 287 Lines • ▼ Show 20 Lines
	class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :			class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
	InstSI<outs, ins, asm, pattern> {			InstSI<outs, ins, asm, pattern> {
	let mayLoad = 0;			let mayLoad = 0;
	let mayStore = 0;			let mayStore = 0;
	let hasSideEffects = 0;			let hasSideEffects = 0;
	let isCodeGenOnly = 0;			let isCodeGenOnly = 0;
	let SALU = 1;			let SALU = 1;
	let SOP1 = 1;			let SOP1 = 1;

				let UseNamedOperandTable = 1;
				arsenmUnsubmitted Not Done Reply Inline Actions This isn't necessary here since they aren't used for any SOP instructions here arsenm: This isn't necessary here since they aren't used for any SOP instructions here
	}			}

	class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :			class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
	InstSI <outs, ins, asm, pattern> {			InstSI <outs, ins, asm, pattern> {

	let mayLoad = 0;			let mayLoad = 0;
	let mayStore = 0;			let mayStore = 0;
	let hasSideEffects = 0;			let hasSideEffects = 0;
	▲ Show 20 Lines • Show All 405 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstrInfo.cpp

Show First 20 Lines • Show All 1,204 Lines • ▼ Show 20 Lines	static void removeModOperands(MachineInstr &MI) {
int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,		int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::src2_modifiers);		AMDGPU::OpName::src2_modifiers);

MI.RemoveOperand(Src2ModIdx);		MI.RemoveOperand(Src2ModIdx);
MI.RemoveOperand(Src1ModIdx);		MI.RemoveOperand(Src1ModIdx);
MI.RemoveOperand(Src0ModIdx);		MI.RemoveOperand(Src0ModIdx);
}		}

// TODO: Maybe this should be removed this and custom fold everything in
// SIFoldOperands?
bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,		bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const {		unsigned Reg, MachineRegisterInfo *MRI) const {
if (!MRI->hasOneNonDBGUse(Reg))		if (!MRI->hasOneNonDBGUse(Reg))
return false;		return false;

unsigned Opc = UseMI.getOpcode();		unsigned Opc = UseMI.getOpcode();
		if (Opc == AMDGPU::COPY) {
		bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
		switch (DefMI.getOpcode()) {
		default:
		return false;
		case AMDGPU::V_MOV_B32_e64:
		if (hasModifiersSet(DefMI, AMDGPU::OpName::src0_modifiers))
		return false;
		arsenmUnsubmitted Not Done Reply Inline Actions I don't think this supports modifiers. I don't think there's any reason to ever use the v_mov_b32 as vop3 so it can probably just be skipped arsenm: I don't think this supports modifiers. I don't think there's any reason to ever use the…
		case AMDGPU::S_MOV_B64:
		// TODO: We could fold 64-bit immediates, but this get compilicated
		// when there are sub-registers.
		return false;

		case AMDGPU::V_MOV_B32_e32:
		case AMDGPU::S_MOV_B32:
		break;
		}
		unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
		const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
		assert(ImmOp);
		// FIXME: We could handle FrameIndex values here.
		if (!ImmOp->isImm()) {
		return false;
		}
		UseMI.setDesc(get(NewOpc));
		UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
		UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
		return true;
		}

if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {		if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {
// Don't fold if we are using source modifiers. The new VOP2 instructions		// Don't fold if we are using source modifiers. The new VOP2 instructions
// don't have them.		// don't have them.
if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) \|\|		if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) \|\|
hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) \|\|		hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) \|\|
hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {		hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {
return false;		return false;
}		}
▲ Show 20 Lines • Show All 1,989 Lines • Show Last 20 Lines