This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Teach the WQM pass about Whole Wavefront Mode and wqm_ctrl
AbandonedPublic

Authored by cwabbott on Jun 27 2017, 3:48 PM.

Download Raw Diff

Details

Reviewers

tstellar
arsenm

Summary

Whole Wavefront Wode (WWM) is required for implementing wavefront
reductions in non-uniform control flow, where we need to use the
inactive lanes to propagate intermediate results, so they need to be
enabled. We need to propagate WWM to uses (unless they're explicitly
marked as exact) so that they also propagate intermediate results
correctly. We do the analysis and exec mask munging during the WQM pass,
since we may get other, non-WWM instructions mixed in the the WWM
instructions, and we'd like to avoid the overhead of switching back and
forth if we can, but only the WQM pass has this information. For
simplicity, WWM is entirely block-local -- blocks are never WWM on entry
or exit of a block, and WWM is not propagated to block inputs/outputs.
This means that computations involving WWM cannot involve control flow,
but we only ever plan to use WWM for a few limited purposes (none of
which involve control flow) anyways.

Right now, the only way to specify WWM is through a pseudo operand on
DPP instructions (added in a separate change). This commit also adds
support for WQM on DPP instructions through wqm_ctrl.

Diff Detail

Build Status

Buildable 7878
Build 7878: arc lint + arc unit

Event Timeline

cwabbott created this revision.Jun 27 2017, 3:48 PM

Harbormaster completed remote builds in B7688: Diff 104292.Jun 27 2017, 3:48 PM

Herald added subscribers: t-tye, tpr, dstuttard and 4 others. · View Herald TranscriptJun 27 2017, 3:48 PM

cwabbott added a parent revision: D34718: [AMDGPU] Add llvm.amdgpu.update.dpp intrinsic.Jun 27 2017, 5:25 PM

cwabbott removed a parent revision: D34718: [AMDGPU] Add llvm.amdgpu.update.dpp intrinsic.

cwabbott added a child revision: D34718: [AMDGPU] Add llvm.amdgpu.update.dpp intrinsic.

cwabbott added a parent revision: D34716: [AMDGPU] Add pseudo "old" and "wqm_mode" source to all DPP instructions.Jun 27 2017, 5:29 PM

cwabbott added a subscriber: llvm-commits.Jun 28 2017, 12:01 PM

cwabbott mentioned this in D34847: [AMDGPU] Mark all export instructions as DisableWQM.Jun 30 2017, 5:20 PM

Actually disable WWM on exit of a block.

Needs tests

Abadon in favor of D35524. While I based that change off of this one, things have changed so much that it's probably better to abandon this and do the review there.

Revision Contents

Path

Size

lib/

Target/

AMDGPU/

SIWholeQuadMode.cpp

113 lines

Diff 104968

lib/Target/AMDGPU/SIWholeQuadMode.cpp

Show First 20 Lines • Show All 78 Lines • ▼ Show 20 Lines
using namespace llvm;		using namespace llvm;

#define DEBUG_TYPE "si-wqm"		#define DEBUG_TYPE "si-wqm"

namespace {		namespace {

enum {		enum {
StateWQM = 0x1,		StateWQM = 0x1,
StateExact = 0x2,		StateWWM = 0x2,
		StateExact = 0x4,
};		};

struct PrintState {		struct PrintState {
public:		public:
int State;		int State;

explicit PrintState(int State) : State(State) {}		explicit PrintState(int State) : State(State) {}
};		};

static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {		static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {
if (PS.State & StateWQM)		if (PS.State & StateWQM)
OS << "WQM";		OS << "WQM";
if (PS.State & StateExact) {		if (PS.State & StateWWM) {
if (PS.State & StateWQM)		if (PS.State & StateWQM)
OS << '\|';		OS << '\|';
		OS << "WWM";
		}
		if (PS.State & StateExact) {
		if (PS.State & (StateWQM \| StateWWM))
		OS << '\|';
OS << "Exact";		OS << "Exact";
}		}

return OS;		return OS;
}		}

struct InstrInfo {		struct InstrInfo {
char Needs = 0;		char Needs = 0;
Show All 12 Lines	struct WorkItem {

WorkItem() = default;		WorkItem() = default;
WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}		WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}
WorkItem(MachineInstr *MI) : MI(MI) {}		WorkItem(MachineInstr *MI) : MI(MI) {}
};		};

class SIWholeQuadMode : public MachineFunctionPass {		class SIWholeQuadMode : public MachineFunctionPass {
private:		private:
		CallingConv::ID callingConv;
const SIInstrInfo *TII;		const SIInstrInfo *TII;
const SIRegisterInfo *TRI;		const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;		MachineRegisterInfo *MRI;
LiveIntervals *LIS;		LiveIntervals *LIS;

DenseMap<const MachineInstr *, InstrInfo> Instructions;		DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;		DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;		SmallVector<MachineInstr *, 1> LiveMaskQueries;

void printInfo();		void printInfo();

void markInstruction(MachineInstr &MI, char Flag,		void markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);		std::vector<WorkItem> &Worklist);
void markUsesWQM(const MachineInstr &MI, std::vector<WorkItem> &Worklist);		void markInstructionUses(const MachineInstr &MI, char Flag,
		std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);		char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);		void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);		void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);		char analyzeFunction(MachineFunction &MF);

bool requiresCorrectState(const MachineInstr &MI) const;		bool requiresCorrectState(const MachineInstr &MI) const;

MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,		MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before);		MachineBasicBlock::iterator Before);
MachineBasicBlock::iterator		MachineBasicBlock::iterator
prepareInsertion(MachineBasicBlock &MBB, MachineBasicBlock::iterator First,		prepareInsertion(MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
MachineBasicBlock::iterator Last, bool PreferLast,		MachineBasicBlock::iterator Last, bool PreferLast,
bool SaveSCC);		bool SaveSCC);
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,		void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
unsigned SaveWQM, unsigned LiveMaskReg);		unsigned SaveWQM, unsigned LiveMaskReg);
void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,		void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
unsigned SavedWQM);		unsigned SavedWQM);
		void toWWM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
		unsigned SaveOrig);
		void fromWWM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
		unsigned SavedOrig);
void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);		void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);

void lowerLiveMaskQueries(unsigned LiveMaskReg);		void lowerLiveMaskQueries(unsigned LiveMaskReg);

public:		public:
static char ID;		static char ID;

SIWholeQuadMode() :		SIWholeQuadMode() :
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	for (const auto &BII : Blocks) {
}		}
}		}
}		}

void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,		void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
InstrInfo &II = Instructions[&MI];		InstrInfo &II = Instructions[&MI];

assert(Flag == StateWQM \|\| Flag == StateExact);		assert(Flag == StateWQM \|\| Flag == StateWWM \|\| Flag == StateExact);

// Ignore if the instruction is already marked. The typical case is that we		// Ignore if the instruction is already marked. The typical case is that we
// mark an instruction WQM multiple times, but for atomics it can happen that		// mark an instruction WQM multiple times, but for atomics it can happen that
// Flag is StateWQM, but Needs is already set to StateExact. In this case,		// Flag is StateWQM, but Needs is already set to StateExact. In this case,
// letting the atomic run in StateExact is correct as per the relevant specs.		// letting the atomic run in StateExact is correct as per the relevant specs.
if (II.Needs)		if (II.Needs)
return;		return;

II.Needs = Flag;		II.Needs = Flag;
Worklist.push_back(&MI);		Worklist.push_back(&MI);
}		}

/// Mark all instructions defining the uses in \p MI as WQM.		/// Mark all instructions defining the uses in \p MI with the given Flag.
void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,		void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
for (const MachineOperand &Use : MI.uses()) {		for (const MachineOperand &Use : MI.uses()) {
if (!Use.isReg() \|\| !Use.isUse())		if (!Use.isReg() \|\| !Use.isUse())
continue;		continue;

unsigned Reg = Use.getReg();		unsigned Reg = Use.getReg();

// Handle physical registers that we need to track; this is mostly relevant		// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,		// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.		// e.g. when a loop counter is stored in a VGPR.
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {		if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
if (Reg == AMDGPU::EXEC)		if (Reg == AMDGPU::EXEC)
continue;		continue;

for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {		for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
LiveRange &LR = LIS->getRegUnit(*RegUnit);		LiveRange &LR = LIS->getRegUnit(*RegUnit);
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();		const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
if (!Value)		if (!Value)
continue;		continue;

// Since we're in machine SSA, we do not need to track physical		// Since we're in machine SSA, we do not need to track physical
// registers across basic blocks.		// registers across basic blocks.
if (Value->isPHIDef())		if (Value->isPHIDef())
continue;		continue;

markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,		markInstruction(*LIS->getInstructionFromIndex(Value->def), Flag,
Worklist);		Worklist);
}		}

continue;		continue;
}		}

for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))		for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
markInstruction(DefMI, StateWQM, Worklist);		markInstruction(DefMI, Flag, Worklist);
}		}
}		}

// Scan instructions to determine which ones require an Exact execmask and		// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.		// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,		char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;		char GlobalFlags = 0;
bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");		bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");

for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {		for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;		MachineBasicBlock &MBB = *BI;

for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {		for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
MachineInstr &MI = *II;		MachineInstr &MI = *II;
unsigned Opcode = MI.getOpcode();		unsigned Opcode = MI.getOpcode();
char Flags = 0;		char Flags = 0;

if (TII->isDS(Opcode)) {		if (TII->isDS(Opcode) && callingConv == CallingConv::AMDGPU_PS) {
Flags = StateWQM;		Flags = StateWQM;
		} else if (TII->isDPP(Opcode)) {
		unsigned wqmType = MI.getOperand(1).getImm();
		if (wqmType == 0) {
		Flags = StateExact;
		} else if (wqmType == 1) {
		Flags = StateWQM;
		} else {
		assert(wqmType == 2);
		Flags = StateWWM;
		}
} else if (TII->isWQM(Opcode)) {		} else if (TII->isWQM(Opcode)) {
// Sampling instructions don't need to produce results for all pixels		// Sampling instructions don't need to produce results for all pixels
// in a quad, they just require all inputs of a quad to have been		// in a quad, they just require all inputs of a quad to have been
// computed for derivatives.		// computed for derivatives.
markUsesWQM(MI, Worklist);		markInstructionUses(MI, StateWQM, Worklist);
GlobalFlags \|= StateWQM;		GlobalFlags \|= StateWQM;
continue;		continue;
} else if (TII->isDisableWQM(MI)) {		} else if (TII->isDisableWQM(MI)) {
Flags = StateExact;		Flags = StateExact;
} else {		} else {
if (Opcode == AMDGPU::SI_PS_LIVE) {		if (Opcode == AMDGPU::SI_PS_LIVE) {
LiveMaskQueries.push_back(&MI);		LiveMaskQueries.push_back(&MI);
} else if (WQMOutputs) {		} else if (WQMOutputs) {
Show All 37 Lines	void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
if ((II.OutNeeds & StateWQM) && !II.Needs &&		if ((II.OutNeeds & StateWQM) && !II.Needs &&
(MI.isTerminator() \|\| (TII->usesVM_CNT(MI) && MI.mayStore()))) {		(MI.isTerminator() \|\| (TII->usesVM_CNT(MI) && MI.mayStore()))) {
Instructions[&MI].Needs = StateWQM;		Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM;		II.Needs = StateWQM;
}		}

// Propagate to block level		// Propagate to block level
BI.Needs \|= II.Needs;		BI.Needs \|= II.Needs;
if ((BI.InNeeds \| II.Needs) != BI.InNeeds) {		if ((BI.InNeeds \| (II.Needs & ~StateWWM)) != BI.InNeeds) {
BI.InNeeds \|= II.Needs;		BI.InNeeds \|= (II.Needs & ~StateWWM);
Worklist.push_back(MBB);		Worklist.push_back(MBB);
}		}

// Propagate backwards within block		// Propagate backwards within block
if (MachineInstr *PrevMI = MI.getPrevNode()) {		if (MachineInstr *PrevMI = MI.getPrevNode()) {
char InNeeds = II.Needs \| II.OutNeeds;		char InNeeds = (II.Needs & ~StateWWM) \| II.OutNeeds;
if (!PrevMI->isPHI()) {		if (!PrevMI->isPHI()) {
InstrInfo &PrevII = Instructions[PrevMI];		InstrInfo &PrevII = Instructions[PrevMI];
if ((PrevII.OutNeeds \| InNeeds) != PrevII.OutNeeds) {		if ((PrevII.OutNeeds \| InNeeds) != PrevII.OutNeeds) {
PrevII.OutNeeds \|= InNeeds;		PrevII.OutNeeds \|= InNeeds;
Worklist.push_back(PrevMI);		Worklist.push_back(PrevMI);
}		}
}		}
}		}

// Propagate WQM flag to instruction inputs		// Propagate WQM and WWM flags to instruction inputs
assert(II.Needs != (StateWQM \| StateExact));		assert(II.Needs != (StateWQM \| StateExact));

if (II.Needs == StateWQM)		if (II.Needs != 0 && II.Needs != StateExact)
markUsesWQM(MI, Worklist);		markInstructionUses(MI, II.Needs, Worklist);
}		}

void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,		void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
std::vector<WorkItem>& Worklist) {		std::vector<WorkItem>& Worklist) {
BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.		BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.

// Propagate through instructions		// Propagate through instructions
if (!MBB.empty()) {		if (!MBB.empty()) {
▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines	if (SavedWQM) {
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC)		AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);		.addReg(AMDGPU::EXEC);
}		}

LIS->InsertMachineInstrInMaps(*MI);		LIS->InsertMachineInstrInMaps(*MI);
}		}

		void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB,
		MachineBasicBlock::iterator Before,
		unsigned SaveOrig)
		{
		MachineInstr *MI;

		assert(SaveOrig);
		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveOrig)
		.addReg(AMDGPU::EXEC);
		LIS->InsertMachineInstrInMaps(*MI);
		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
		.addImm(-1);
		LIS->InsertMachineInstrInMaps(*MI);
		}

		void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB,
		MachineBasicBlock::iterator Before,
		unsigned SavedOrig)
		{
		MachineInstr *MI;

		assert(SavedOrig);
		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
		.addReg(SavedOrig);
		LIS->InsertMachineInstrInMaps(*MI);
		}

void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,		void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
bool isEntry) {		bool isEntry) {
auto BII = Blocks.find(&MBB);		auto BII = Blocks.find(&MBB);
if (BII == Blocks.end())		if (BII == Blocks.end())
return;		return;

const BlockInfo &BI = BII->second;		const BlockInfo &BI = BII->second;

if (!(BI.InNeeds & StateWQM))
return;

// This is a non-entry block that is WQM throughout, so no need to do		// This is a non-entry block that is WQM throughout, so no need to do
// anything.		// anything.
if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)		if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact)
return;		return;

DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");		DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");

unsigned SavedWQMReg = 0;		unsigned SavedWQMReg = 0;
		unsigned SavedNonWWMReg = 0;
bool WQMFromExec = isEntry;		bool WQMFromExec = isEntry;
char State = isEntry ? StateExact : StateWQM;		char State = isEntry ? StateExact : StateWQM;
		char NonWWMState = 0;

auto II = MBB.getFirstNonPHI(), IE = MBB.end();		auto II = MBB.getFirstNonPHI(), IE = MBB.end();
if (isEntry)		if (isEntry)
++II; // Skip the instruction that saves LiveMask		++II; // Skip the instruction that saves LiveMask

MachineBasicBlock::iterator First = IE;		MachineBasicBlock::iterator First = IE;
for (;;) {		for (;;) {
MachineBasicBlock::iterator Next = II;		MachineBasicBlock::iterator Next = II;
Show All 22 Lines	if (II != IE) {

++Next;		++Next;
} else {		} else {
// End of basic block		// End of basic block
if (BI.OutNeeds & StateWQM)		if (BI.OutNeeds & StateWQM)
Needs = StateWQM;		Needs = StateWQM;
else if (BI.OutNeeds == StateExact)		else if (BI.OutNeeds == StateExact)
Needs = StateExact;		Needs = StateExact;
		else
		Needs = StateWQM \| StateExact; // Make sure WWM is disabled
}		}

if (Needs) {		if (Needs) {
if (Needs != State) {		if (!(Needs & State)) {
MachineBasicBlock::iterator Before =		MachineBasicBlock::iterator Before =
prepareInsertion(MBB, First, II, Needs == StateWQM,		prepareInsertion(MBB, First, II, Needs == StateWQM,
Needs == StateExact \|\| WQMFromExec);		Needs == StateExact \|\| WQMFromExec);

if (Needs == StateExact) {		if (State == StateWWM) {
		State = NonWWMState;
		fromWWM(MBB, Before, SavedNonWWMReg);
		}

		if (Needs == StateWWM) {
		NonWWMState = State;
		SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
		toWWM(MBB, Before, SavedNonWWMReg);
		} else if (Needs == StateExact) {
if (!WQMFromExec && (OutNeeds & StateWQM))		if (!WQMFromExec && (OutNeeds & StateWQM))
SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);		SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

toExact(MBB, Before, SavedWQMReg, LiveMaskReg);		toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
} else {		} else if (Needs == StateWQM) {
assert(WQMFromExec == (SavedWQMReg == 0));		assert(WQMFromExec == (SavedWQMReg == 0));

toWQM(MBB, Before, SavedWQMReg);		toWQM(MBB, Before, SavedWQMReg);

if (SavedWQMReg) {		if (SavedWQMReg) {
LIS->createAndComputeVirtRegInterval(SavedWQMReg);		LIS->createAndComputeVirtRegInterval(SavedWQMReg);
SavedWQMReg = 0;		SavedWQMReg = 0;
}		}
Show All 20 Lines	MachineInstr *Copy =
.addReg(LiveMaskReg);		.addReg(LiveMaskReg);

LIS->ReplaceMachineInstrInMaps(MI, Copy);		LIS->ReplaceMachineInstrInMaps(MI, Copy);
MI->eraseFromParent();		MI->eraseFromParent();
}		}
}		}

bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {		bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
return false;

Instructions.clear();		Instructions.clear();
Blocks.clear();		Blocks.clear();
LiveMaskQueries.clear();		LiveMaskQueries.clear();
		callingConv = MF.getFunction()->getCallingConv();

const SISubtarget &ST = MF.getSubtarget<SISubtarget>();		const SISubtarget &ST = MF.getSubtarget<SISubtarget>();

TII = ST.getInstrInfo();		TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();		TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();		MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();		LIS = &getAnalysis<LiveIntervals>();

char GlobalFlags = analyzeFunction(MF);		char GlobalFlags = analyzeFunction(MF);
if (!(GlobalFlags & StateWQM)) {		if (!(GlobalFlags & (StateWQM \| StateWWM))) {
lowerLiveMaskQueries(AMDGPU::EXEC);		lowerLiveMaskQueries(AMDGPU::EXEC);
return !LiveMaskQueries.empty();		return !LiveMaskQueries.empty();
}		}

// Store a copy of the original live mask when required		// Store a copy of the original live mask when required
unsigned LiveMaskReg = 0;		unsigned LiveMaskReg = 0;
{		{
MachineBasicBlock &Entry = MF.front();		MachineBasicBlock &Entry = MF.front();
Show All 37 Lines