Diff 116708

lib/Target/AMDGPU/SIInsertSkips.cpp

Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines	for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();

// When a uniform loop is inside non-uniform control flow, the branch		// When a uniform loop is inside non-uniform control flow, the branch
// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken		// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
// when EXEC = 0. We should skip the loop lest it becomes infinite.		// when EXEC = 0. We should skip the loop lest it becomes infinite.
if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ \|\|		if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ \|\|
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)		I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;		return true;

		// Since V_READFIRSTLANE and V_READLANE produce
		// scalar result there must be the scalar instruction
		// that consumes it. Thus this scalar instructions may
		rampitecUnsubmitted Not Done Reply Inline Actions The comment is misleading. Scalar instructions executed even if exec = 0 (contrarily to the comment). That is unclear if there must be a scalar instruction consuming result of readlane too, since SGPR can be an operand of a vector instruction. rampitec: The comment is misleading. Scalar instructions executed even if exec = 0 (contrarily to the…
		// not be executed when exec mask is zero.
		if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) \|\|
		(I->getOpcode() == AMDGPU::V_READLANE_B32))
		{
		return true;
		}

if (I->isInlineAsm()) {		if (I->isInlineAsm()) {
		rampitecUnsubmitted Not Done Reply Inline Actions What if an user is actually hoisted out the block? What if that is terminator uses it? In both cases users should then read undefined data. rampitec: What if an user is actually hoisted out the block? What if that is terminator uses it? In both…
		rampitecUnsubmitted Not Done Reply Inline Actions It is named operand "src0", using getOperand(1) is not desirable. Also VReg is misleading, it reads like "virtual register". rampitec: It is named operand "src0", using getOperand(1) is not desirable. Also VReg is misleading, it…
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();		const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
const char *AsmStr = I->getOperand(0).getSymbolName();		const char *AsmStr = I->getOperand(0).getSymbolName();

		rampitecUnsubmitted Not Done Reply Inline Actions What is interesting, an user can be also another v_readlane_b32's lane select operand. Subsequent v_readlane_b32 would be executed even with exec = 0 and will read an undefined data. rampitec: What is interesting, an user can be also another v_readlane_b32's lane select operand.
// inlineasm length estimate is number of bytes assuming the longest		// inlineasm length estimate is number of bytes assuming the longest
// instruction.		// instruction.
uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);		uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
NumInstr += MaxAsmSize / MAI->getMaxInstLength();		NumInstr += MaxAsmSize / MAI->getMaxInstLength();
} else {		} else {
++NumInstr;		++NumInstr;
}		}

		rampitecUnsubmitted Not Done Reply Inline Actions Post increment is broken here. rampitec: Post increment is broken here.
if (NumInstr >= SkipThreshold)		if (NumInstr >= SkipThreshold)
		rampitecUnsubmitted Not Done Reply Inline Actions Please follow the general and surrounding style: brace on the same line with expression. rampitec: Please follow the general and surrounding style: brace on the same line with expression.
return true;		return true;
}		}
}		}

return false;		return false;
}		}

bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {		bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
MachineBasicBlock &MBB = *MI.getParent();		MachineBasicBlock &MBB = *MI.getParent();
MachineFunction *MF = MBB.getParent();		MachineFunction *MF = MBB.getParent();

if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS \|\|		if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS \|\|
!shouldSkip(MBB, MBB.getParent()->back()))		!shouldSkip(MBB, MBB.getParent()->back()))
return false;		return false;

MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());		MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());

const DebugLoc &DL = MI.getDebugLoc();		const DebugLoc &DL = MI.getDebugLoc();
		rampitecUnsubmitted Not Done Reply Inline Actions That is broken. Remember that register is physical. Not speaking that is whole function scan is expensive. rampitec: That is broken. Remember that register is physical. Not speaking that is whole function scan is…

// If the exec mask is non-zero, skip the next two instructions		// If the exec mask is non-zero, skip the next two instructions
BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))		BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addMBB(&NextBB);		.addMBB(&NextBB);

MachineBasicBlock::iterator Insert = SkipBB->begin();		MachineBasicBlock::iterator Insert = SkipBB->begin();

// Exec mask is zero: Export to NULL target...		// Exec mask is zero: Export to NULL target...
▲ Show 20 Lines • Show All 176 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Avoid predicated execution of the basic blocks containing scalar instructions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 116708

lib/Target/AMDGPU/SIInsertSkips.cpp

This is an archive of the discontinued LLVM Phabricator instance.

Avoid predicated execution of the basic blocks containing scalar instructionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 116708

lib/Target/AMDGPU/SIInsertSkips.cpp

Avoid predicated execution of the basic blocks containing scalar instructions
ClosedPublic