Diff 93218

llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h

Show First 20 Lines • Show All 450 Lines • ▼ Show 20 Lines	public:
}		}

MachineBasicBlock *getBB() { return BB; }		MachineBasicBlock *getBB() { return BB; }
MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }		MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }		MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
LiveIntervals *getLIS() { return LIS; }		LiveIntervals *getLIS() { return LIS; }
MachineRegisterInfo *getMRI() { return &MRI; }		MachineRegisterInfo *getMRI() { return &MRI; }
const TargetRegisterInfo *getTRI() { return TRI; }		const TargetRegisterInfo *getTRI() { return TRI; }
		ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
SUnit& getEntrySU() { return EntrySU; }		SUnit& getEntrySU() { return EntrySU; }
SUnit& getExitSU() { return ExitSU; }		SUnit& getExitSU() { return ExitSU; }

void restoreSULinksLeft();		void restoreSULinksLeft();

template<typename _Iterator> void fillVgprSgprCost(_Iterator First,		template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
_Iterator End,		_Iterator End,
unsigned &VgprUsage,		unsigned &VgprUsage,
Show All 40 Lines

llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.cpp

Show First 20 Lines • Show All 656 Lines • ▼ Show 20 Lines	void SIScheduleBlockCreator::colorHighLatenciesAlone() {
for (unsigned i = 0, e = DAGSize; i != e; ++i) {		for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &DAG->SUnits[i];		SUnit *SU = &DAG->SUnits[i];
if (DAG->IsHighLatencySU[SU->NodeNum]) {		if (DAG->IsHighLatencySU[SU->NodeNum]) {
CurrentColoring[SU->NodeNum] = NextReservedID++;		CurrentColoring[SU->NodeNum] = NextReservedID++;
}		}
}		}
}		}

		static bool
		hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
		for (const auto &PredDep : SU.Preds) {
		if (PredDep.getSUnit() == &FromSU &&
		PredDep.getKind() == llvm::SDep::Data)
		return true;
		}
		return false;
		}

void SIScheduleBlockCreator::colorHighLatenciesGroups() {		void SIScheduleBlockCreator::colorHighLatenciesGroups() {
unsigned DAGSize = DAG->SUnits.size();		unsigned DAGSize = DAG->SUnits.size();
unsigned NumHighLatencies = 0;		unsigned NumHighLatencies = 0;
unsigned GroupSize;		unsigned GroupSize;
unsigned Color = NextReservedID;		int Color = NextReservedID;
unsigned Count = 0;		unsigned Count = 0;
std::set<unsigned> FormingGroup;		std::set<unsigned> FormingGroup;

for (unsigned i = 0, e = DAGSize; i != e; ++i) {		for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &DAG->SUnits[i];		SUnit *SU = &DAG->SUnits[i];
if (DAG->IsHighLatencySU[SU->NodeNum])		if (DAG->IsHighLatencySU[SU->NodeNum])
++NumHighLatencies;		++NumHighLatencies;
}		}

if (NumHighLatencies == 0)		if (NumHighLatencies == 0)
return;		return;

if (NumHighLatencies <= 6)		if (NumHighLatencies <= 6)
GroupSize = 2;		GroupSize = 2;
else if (NumHighLatencies <= 12)		else if (NumHighLatencies <= 12)
GroupSize = 3;		GroupSize = 3;
else		else
GroupSize = 4;		GroupSize = 4;

for (unsigned i = 0, e = DAGSize; i != e; ++i) {		for (unsigned SUNum : DAG->TopDownIndex2SU) {
SUnit *SU = &DAG->SUnits[i];		const SUnit &SU = DAG->SUnits[SUNum];
if (DAG->IsHighLatencySU[SU->NodeNum]) {		if (DAG->IsHighLatencySU[SU.NodeNum]) {
unsigned CompatibleGroup = true;		unsigned CompatibleGroup = true;
unsigned ProposedColor = Color;		int ProposedColor = Color;
		std::vector<int> AdditionalElements;

		// We don't want to put in the same block
		// two high latency instructions that depend
		// on each other.
		// One way would be to check canAddEdge
		// in both directions, but that currently is not
		// enough because there the high latency order is
		// enforced (via links).
		// Instead, look at the dependencies between the
		// high latency instructions and deduce if it is
		// a data dependency or not.
for (unsigned j : FormingGroup) {		for (unsigned j : FormingGroup) {
// TODO: Currently CompatibleGroup will always be false,		bool HasSubGraph;
// because the graph enforces the load order. This		std::vector<int> SubGraph;
// can be fixed, but as keeping the load order is often		// By construction (topological order), if SU and
// good for performance that causes a performance hit (both		// DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
// the default scheduler and this scheduler).		// in the parent graph of SU.
// When this scheduler determines a good load order,		#ifndef NDEBUG
// this can be fixed.		SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) \|\|		HasSubGraph);
!DAG->canAddEdge(&DAG->SUnits[j], SU))		assert(!HasSubGraph);
		#endif
		SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
		HasSubGraph);
		if (!HasSubGraph)
		continue; // No dependencies between each other
		else if (SubGraph.size() > 5) {
		// Too many elements would be required to be added to the block.
		CompatibleGroup = false;
		break;
		}
		else {
		// Check the type of dependency
		for (unsigned k : SubGraph) {
		// If in the path to join the two instructions,
		// there is another high latency instruction,
		// or instructions colored for another block
		// abort the merge.
		if (DAG->IsHighLatencySU[k] \|\|
		(CurrentColoring[k] != ProposedColor &&
		CurrentColoring[k] != 0)) {
CompatibleGroup = false;		CompatibleGroup = false;
		break;
}		}
if (!CompatibleGroup \|\| ++Count == GroupSize) {		// If one of the SU in the subgraph depends on the result of SU j,
		// there'll be a data dependency.
		if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
		CompatibleGroup = false;
		break;
		}
		}
		if (!CompatibleGroup)
		break;
		// Same check for the SU
		if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
		CompatibleGroup = false;
		break;
		}
		// Add all the required instructions to the block
		// These cannot live in another block (because they
		// depend (order dependency) on one of the
		// instruction in the block, and are required for the
		// high latency instruction we add.
		AdditionalElements.insert(AdditionalElements.end(),
		SubGraph.begin(), SubGraph.end());
		}
		}
		if (CompatibleGroup) {
		FormingGroup.insert(SU.NodeNum);
		for (unsigned j : AdditionalElements)
		CurrentColoring[j] = ProposedColor;
		CurrentColoring[SU.NodeNum] = ProposedColor;
		++Count;
		}
		// Found one incompatible instruction,
		// or has filled a big enough group.
		// -> start a new one.
		if (!CompatibleGroup) {
		FormingGroup.clear();
		Color = ++NextReservedID;
		ProposedColor = Color;
		FormingGroup.insert(SU.NodeNum);
		CurrentColoring[SU.NodeNum] = ProposedColor;
		Count = 0;
		} else if (Count == GroupSize) {
FormingGroup.clear();		FormingGroup.clear();
Color = ++NextReservedID;		Color = ++NextReservedID;
if (!CompatibleGroup) {
ProposedColor = Color;		ProposedColor = Color;
FormingGroup.insert(SU->NodeNum);
}
Count = 0;		Count = 0;
} else {
FormingGroup.insert(SU->NodeNum);
}		}
CurrentColoring[SU->NodeNum] = ProposedColor;
}		}
}		}
}		}

void SIScheduleBlockCreator::colorComputeReservedDependencies() {		void SIScheduleBlockCreator::colorComputeReservedDependencies() {
unsigned DAGSize = DAG->SUnits.size();		unsigned DAGSize = DAG->SUnits.size();
std::map<std::set<unsigned>, unsigned> ColorCombinations;		std::map<std::set<unsigned>, unsigned> ColorCombinations;

▲ Show 20 Lines • Show All 1,199 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Update SI scheduler colorHighLatenciesGroups
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 93218

llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h

llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.cpp

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Update SI scheduler colorHighLatenciesGroupsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 93218

llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h

llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.cpp

AMDGPU/SI: Update SI scheduler colorHighLatenciesGroups
ClosedPublic