Diff 109787

llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines
///		///
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "AMDGPU.h"		#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"		#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"		#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"		#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
		#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveInterval.h"		#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"		#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"		#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"		#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"		#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"		#include "llvm/CodeGen/MachineInstr.h"
Show All 38 Lines	if (PS.State & StateExact) {
OS << "Exact";		OS << "Exact";
}		}

return OS;		return OS;
}		}

struct InstrInfo {		struct InstrInfo {
char Needs = 0;		char Needs = 0;
		char Disabled = 0;
char OutNeeds = 0;		char OutNeeds = 0;
};		};

struct BlockInfo {		struct BlockInfo {
char Needs = 0;		char Needs = 0;
char InNeeds = 0;		char InNeeds = 0;
char OutNeeds = 0;		char OutNeeds = 0;
};		};
Show All 18 Lines	private:
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;		DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;		SmallVector<MachineInstr *, 1> LiveMaskQueries;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;		SmallVector<MachineInstr *, 4> LowerToCopyInstrs;

void printInfo();		void printInfo();

void markInstruction(MachineInstr &MI, char Flag,		void markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);		std::vector<WorkItem> &Worklist);
void markUsesWQM(const MachineInstr &MI, std::vector<WorkItem> &Worklist);		void markInstructionUses(const MachineInstr &MI, char Flag,
		std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);		char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);		void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);		void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);		char analyzeFunction(MachineFunction &MF);

bool requiresCorrectState(const MachineInstr &MI) const;		bool requiresCorrectState(const MachineInstr &MI) const;

MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,		MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	for (const auto &BII : Blocks) {
}		}
}		}
}		}

void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,		void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
InstrInfo &II = Instructions[&MI];		InstrInfo &II = Instructions[&MI];

assert(Flag == StateWQM \|\| Flag == StateExact);		assert(Flag == StateWQM);

// Ignore if the instruction is already marked. The typical case is that we		// Remove any disabled states from the flag. The user that required it gets
// mark an instruction WQM multiple times, but for atomics it can happen that		// an undefined value in the helper lanes. For example, this can happen if
// Flag is StateWQM, but Needs is already set to StateExact. In this case,		// the result of an atomic is used by instruction that requires WQM, where
// letting the atomic run in StateExact is correct as per the relevant specs.		// ignoring the request for WQM is correct as per the relevant specs.
if (II.Needs)		Flag &= ~II.Disabled;

		// Ignore if the flag is already encompassed by the existing needs, or we
		// just disabled everything.
		if ((II.Needs & Flag) == Flag)
return;		return;

II.Needs = Flag;		II.Needs \|= Flag;
Worklist.push_back(&MI);		Worklist.push_back(&MI);
}		}

/// Mark all instructions defining the uses in \p MI as WQM.		/// Mark all instructions defining the uses in \p MI with \p Flag.
void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,		void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
		assert(Flag == StateWQM);
for (const MachineOperand &Use : MI.uses()) {		for (const MachineOperand &Use : MI.uses()) {
if (!Use.isReg() \|\| !Use.isUse())		if (!Use.isReg() \|\| !Use.isUse())
continue;		continue;

unsigned Reg = Use.getReg();		unsigned Reg = Use.getReg();

// Handle physical registers that we need to track; this is mostly relevant		// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,		// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.		// e.g. when a loop counter is stored in a VGPR.
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {		if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
if (Reg == AMDGPU::EXEC)		if (Reg == AMDGPU::EXEC)
continue;		continue;

for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {		for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
LiveRange &LR = LIS->getRegUnit(*RegUnit);		LiveRange &LR = LIS->getRegUnit(*RegUnit);
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();		const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
if (!Value)		if (!Value)
continue;		continue;

// Since we're in machine SSA, we do not need to track physical		// Since we're in machine SSA, we do not need to track physical
// registers across basic blocks.		// registers across basic blocks.
if (Value->isPHIDef())		if (Value->isPHIDef())
continue;		continue;

markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,		markInstruction(*LIS->getInstructionFromIndex(Value->def), Flag,
Worklist);		Worklist);
}		}

continue;		continue;
}		}

for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))		for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
markInstruction(DefMI, StateWQM, Worklist);		markInstruction(DefMI, Flag, Worklist);
}		}
}		}

// Scan instructions to determine which ones require an Exact execmask and		// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.		// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,		char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;		char GlobalFlags = 0;
bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");		bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");

for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {		// We need to visit the basic blocks in reverse post-order so that we visit
MachineBasicBlock &MBB = *BI;		// defs before uses, in particular so that we don't accidentally mark an
		// instruction as needing e.g. WQM before visiting it and realizing it needs
		// WQM disabled.
		ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
		for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
		MachineBasicBlock &MBB = **BI;
		BlockInfo &BBI = Blocks[&MBB];

for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {		for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
MachineInstr &MI = *II;		MachineInstr &MI = *II;
		InstrInfo &III = Instructions[&MI];
unsigned Opcode = MI.getOpcode();		unsigned Opcode = MI.getOpcode();
char Flags = 0;		char Flags = 0;

if (TII->isDS(Opcode)) {		if (TII->isDS(Opcode)) {
Flags = StateWQM;		Flags = StateWQM;
} else if (TII->isWQM(Opcode)) {		} else if (TII->isWQM(Opcode)) {
// Sampling instructions don't need to produce results for all pixels		// Sampling instructions don't need to produce results for all pixels
// in a quad, they just require all inputs of a quad to have been		// in a quad, they just require all inputs of a quad to have been
// computed for derivatives.		// computed for derivatives.
markUsesWQM(MI, Worklist);		markInstructionUses(MI, StateWQM, Worklist);
GlobalFlags \|= StateWQM;		GlobalFlags \|= StateWQM;
continue;		continue;
} else if (Opcode == AMDGPU::WQM) {		} else if (Opcode == AMDGPU::WQM) {
// The WQM intrinsic requires its output to have all the helper lanes		// The WQM intrinsic requires its output to have all the helper lanes
// correct, so we need it to be in WQM.		// correct, so we need it to be in WQM.
Flags = StateWQM;		Flags = StateWQM;
LowerToCopyInstrs.push_back(&MI);		LowerToCopyInstrs.push_back(&MI);
} else if (TII->isDisableWQM(MI)) {		} else if (TII->isDisableWQM(MI)) {
Flags = StateExact;		BBI.Needs \|= StateExact;
		if (!(BBI.InNeeds & StateExact)) {
		BBI.InNeeds \|= StateExact;
		Worklist.push_back(&MBB);
		}
		GlobalFlags \|= StateExact;
		III.Disabled = StateWQM;
		continue;
} else {		} else {
if (Opcode == AMDGPU::SI_PS_LIVE) {		if (Opcode == AMDGPU::SI_PS_LIVE) {
LiveMaskQueries.push_back(&MI);		LiveMaskQueries.push_back(&MI);
} else if (WQMOutputs) {		} else if (WQMOutputs) {
// The function is in machine SSA form, which means that physical		// The function is in machine SSA form, which means that physical
// VGPRs correspond to shader inputs and outputs. Inputs are		// VGPRs correspond to shader inputs and outputs. Inputs are
// only used, outputs are only defined.		// only used, outputs are only defined.
for (const MachineOperand &MO : MI.defs()) {		for (const MachineOperand &MO : MI.defs()) {
Show All 25 Lines
void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,		void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
std::vector<WorkItem>& Worklist) {		std::vector<WorkItem>& Worklist) {
MachineBasicBlock *MBB = MI.getParent();		MachineBasicBlock *MBB = MI.getParent();
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references		InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
BlockInfo &BI = Blocks[MBB];		BlockInfo &BI = Blocks[MBB];

// Control flow-type instructions and stores to temporary memory that are		// Control flow-type instructions and stores to temporary memory that are
// followed by WQM computations must themselves be in WQM.		// followed by WQM computations must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !II.Needs &&		if ((II.OutNeeds & StateWQM) && !(II.Disabled & StateWQM) &&
(MI.isTerminator() \|\| (TII->usesVM_CNT(MI) && MI.mayStore()))) {		(MI.isTerminator() \|\| (TII->usesVM_CNT(MI) && MI.mayStore()))) {
Instructions[&MI].Needs = StateWQM;		Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM;		II.Needs = StateWQM;
}		}

// Propagate to block level		// Propagate to block level
BI.Needs \|= II.Needs;		if (II.Needs & StateWQM) {
if ((BI.InNeeds \| II.Needs) != BI.InNeeds) {		BI.Needs \|= StateWQM;
BI.InNeeds \|= II.Needs;		if (!(BI.InNeeds & StateWQM)) {
		BI.InNeeds \|= StateWQM;
Worklist.push_back(MBB);		Worklist.push_back(MBB);
}		}
		}

// Propagate backwards within block		// Propagate backwards within block
if (MachineInstr *PrevMI = MI.getPrevNode()) {		if (MachineInstr *PrevMI = MI.getPrevNode()) {
char InNeeds = II.Needs \| II.OutNeeds;		char InNeeds = II.Needs \| II.OutNeeds;
if (!PrevMI->isPHI()) {		if (!PrevMI->isPHI()) {
InstrInfo &PrevII = Instructions[PrevMI];		InstrInfo &PrevII = Instructions[PrevMI];
if ((PrevII.OutNeeds \| InNeeds) != PrevII.OutNeeds) {		if ((PrevII.OutNeeds \| InNeeds) != PrevII.OutNeeds) {
PrevII.OutNeeds \|= InNeeds;		PrevII.OutNeeds \|= InNeeds;
Worklist.push_back(PrevMI);		Worklist.push_back(PrevMI);
}		}
}		}
}		}

// Propagate WQM flag to instruction inputs		// Propagate WQM flag to instruction inputs
assert(II.Needs != (StateWQM \| StateExact));		assert(!(II.Needs & StateExact));

if (II.Needs == StateWQM)		if (II.Needs != 0)
markUsesWQM(MI, Worklist);		markInstructionUses(MI, II.Needs, Worklist);
}		}

void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,		void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
std::vector<WorkItem>& Worklist) {		std::vector<WorkItem>& Worklist) {
BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.		BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.

// Propagate through instructions		// Propagate through instructions
if (!MBB.empty()) {		if (!MBB.empty()) {
▲ Show 20 Lines • Show All 204 Lines • ▼ Show 20 Lines	void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,

auto II = MBB.getFirstNonPHI(), IE = MBB.end();		auto II = MBB.getFirstNonPHI(), IE = MBB.end();
if (isEntry)		if (isEntry)
++II; // Skip the instruction that saves LiveMask		++II; // Skip the instruction that saves LiveMask

MachineBasicBlock::iterator First = IE;		MachineBasicBlock::iterator First = IE;
for (;;) {		for (;;) {
MachineBasicBlock::iterator Next = II;		MachineBasicBlock::iterator Next = II;
char Needs = 0;		char Needs = StateExact \| StateWQM;
char OutNeeds = 0;		char OutNeeds = 0;

if (First == IE)		if (First == IE)
First = II;		First = II;

if (II != IE) {		if (II != IE) {
MachineInstr &MI = *II;		MachineInstr &MI = *II;

if (requiresCorrectState(MI)) {		if (requiresCorrectState(MI)) {
auto III = Instructions.find(&MI);		auto III = Instructions.find(&MI);
if (III != Instructions.end()) {		if (III != Instructions.end()) {
Needs = III->second.Needs;		if (III->second.Needs & StateWQM)
		Needs = StateWQM;
		else
		Needs &= ~III->second.Disabled;
OutNeeds = III->second.OutNeeds;		OutNeeds = III->second.OutNeeds;
}		}
}		}

if (MI.isTerminator() && !Needs && OutNeeds == StateExact)		if (MI.isTerminator() && OutNeeds == StateExact)
Needs = StateExact;		Needs = StateExact;

if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)		if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
MI.getOperand(3).setImm(1);		MI.getOperand(3).setImm(1);

++Next;		++Next;
} else {		} else {
// End of basic block		// End of basic block
if (BI.OutNeeds & StateWQM)		if (BI.OutNeeds & StateWQM)
Needs = StateWQM;		Needs = StateWQM;
else if (BI.OutNeeds == StateExact)		else if (BI.OutNeeds == StateExact)
Needs = StateExact;		Needs = StateExact;
		else
		Needs = StateWQM \| StateExact;
}		}

if (Needs) {		if (!(Needs & State)) {
if (Needs != State) {
MachineBasicBlock::iterator Before =		MachineBasicBlock::iterator Before =
prepareInsertion(MBB, First, II, Needs == StateWQM,		prepareInsertion(MBB, First, II, Needs == StateWQM,
Needs == StateExact \|\| WQMFromExec);		Needs == StateExact \|\| WQMFromExec);

if (Needs == StateExact) {		if (Needs == StateExact) {
if (!WQMFromExec && (OutNeeds & StateWQM))		if (!WQMFromExec && (OutNeeds & StateWQM))
SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);		SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

toExact(MBB, Before, SavedWQMReg, LiveMaskReg);		toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
		State = StateExact;
} else {		} else {
		assert(Needs == StateWQM);
assert(WQMFromExec == (SavedWQMReg == 0));		assert(WQMFromExec == (SavedWQMReg == 0));

toWQM(MBB, Before, SavedWQMReg);		toWQM(MBB, Before, SavedWQMReg);

if (SavedWQMReg) {		if (SavedWQMReg) {
LIS->createAndComputeVirtRegInterval(SavedWQMReg);		LIS->createAndComputeVirtRegInterval(SavedWQMReg);
SavedWQMReg = 0;		SavedWQMReg = 0;
}		}
		State = StateWQM;
}		}

State = Needs;		First = IE;
}		}

		if (Needs != (StateExact \| StateWQM))
First = IE;		First = IE;
}

if (II == IE)		if (II == IE)
break;		break;
II = Next;		II = Next;
}		}
}		}

void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {		void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
▲ Show 20 Lines • Show All 82 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] refactor WQM pass in preparation for WWM (NFCI)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 109787

llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] refactor WQM pass in preparation for WWM (NFCI)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 109787

llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp

[AMDGPU] refactor WQM pass in preparation for WWM (NFCI)
ClosedPublic