Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Show All 15 Lines | |||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
#include "AMDGPUIGroupLP.h" | #include "AMDGPUIGroupLP.h" | ||||
#include "AMDGPUTargetMachine.h" | #include "AMDGPUTargetMachine.h" | ||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h" | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | ||||
#include "SIInstrInfo.h" | #include "SIInstrInfo.h" | ||||
#include "SIMachineFunctionInfo.h" | #include "SIMachineFunctionInfo.h" | ||||
#include "llvm/ADT/BitmaskEnum.h" | #include "llvm/ADT/BitmaskEnum.h" | ||||
#include "llvm/ADT/DenseMap.h" | |||||
#include "llvm/CodeGen/MachineScheduler.h" | #include "llvm/CodeGen/MachineScheduler.h" | ||||
#include "llvm/CodeGen/TargetOpcodes.h" | #include "llvm/CodeGen/TargetOpcodes.h" | ||||
using namespace llvm; | using namespace llvm; | ||||
#define DEBUG_TYPE "machine-scheduler" | #define DEBUG_TYPE "machine-scheduler" | ||||
namespace { | namespace { | ||||
Show All 23 Lines | LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None), | ||||
"in lds/gds read group.")); | "in lds/gds read group.")); | ||||
static cl::opt<Optional<unsigned>> | static cl::opt<Optional<unsigned>> | ||||
LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None), | LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None), | ||||
cl::Hidden, | cl::Hidden, | ||||
cl::desc("The maximum number of instructions to include " | cl::desc("The maximum number of instructions to include " | ||||
"in lds/gds write group.")); | "in lds/gds write group.")); | ||||
typedef function_ref<bool(const MachineInstr &, const SIInstrInfo *)> | // Components of the mask that determines which instruction types may be may be | ||||
CanAddMIFn; | // classified into a SchedGroup. | ||||
enum class SchedGroupMask { | |||||
NONE = 0u, | |||||
ALU = 1u << 0, | |||||
VALU = 1u << 1, | |||||
SALU = 1u << 2, | |||||
MFMA = 1u << 3, | |||||
VMEM = 1u << 4, | |||||
VMEM_READ = 1u << 5, | |||||
VMEM_WRITE = 1u << 6, | |||||
DS = 1u << 7, | |||||
DS_READ = 1u << 8, | |||||
DS_WRITE = 1u << 9, | |||||
ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS | | |||||
DS_READ | DS_WRITE, | |||||
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL) | |||||
}; | |||||
// Classify instructions into groups to enable fine tuned control over the | // Classify instructions into groups to enable fine tuned control over the | ||||
// scheduler. These groups may be more specific than current SchedModel | // scheduler. These groups may be more specific than current SchedModel | ||||
// instruction classes. | // instruction classes. | ||||
class SchedGroup { | class SchedGroup { | ||||
private: | private: | ||||
// Function that returns true if a non-bundle MI may be inserted into this | // Mask that defines which instruction types can be classified into this | ||||
// group. | // SchedGroup. The instruction types correspond to the mask from SCHED_BARRIER | ||||
const CanAddMIFn canAddMI; | // and SCHED_GROUP_BARRIER. | ||||
SchedGroupMask SGMask; | |||||
// Maximum number of SUnits that can be added to this group. | // Maximum number of SUnits that can be added to this group. | ||||
Optional<unsigned> MaxSize; | Optional<unsigned> MaxSize; | ||||
// SchedGroups will only synchronize with other SchedGroups that have the same | |||||
// SyncID. | |||||
int SyncID = 0; | |||||
// Collection of SUnits that are classified as members of this group. | // Collection of SUnits that are classified as members of this group. | ||||
SmallVector<SUnit *, 32> Collection; | SmallVector<SUnit *, 32> Collection; | ||||
ScheduleDAGInstrs *DAG; | ScheduleDAGInstrs *DAG; | ||||
void tryAddEdge(SUnit *A, SUnit *B) { | const SIInstrInfo *TII; | ||||
// Try to add and edge from SU A to SU B. | |||||
bool tryAddEdge(SUnit *A, SUnit *B); | |||||
// Use SGMask to determine whether we can classify MI as a member of this | |||||
// SchedGroup object. | |||||
bool canAddMI(const MachineInstr &MI) const; | |||||
// Returns true if SU can be added to this SchedGroup. | |||||
bool canAddSU(SUnit &SU) const; | |||||
// Returns true if no more instructions may be added to this group. | |||||
bool isFull() const; | |||||
// Add SU to the SchedGroup. | |||||
void add(SUnit &SU) { | |||||
LLVM_DEBUG(dbgs() << "For SchedGroup with mask " | |||||
<< format_hex((int)SGMask, 10, true) << " adding " | |||||
<< *SU.getInstr()); | |||||
Collection.push_back(&SU); | |||||
} | |||||
public: | |||||
// Add DAG dependencies from all SUnits in this SchedGroup and this SU. If | |||||
// MakePred is true, SU will be a predecessor of the SUnits in this | |||||
// SchedGroup, otherwise SU will be a successor. | |||||
void link(SUnit &SU, bool MakePred = false); | |||||
// Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use | |||||
// the predicate to determine whether SU should be a predecessor (P = true) | |||||
// or a successor (P = false) of this SchedGroup. | |||||
void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P); | |||||
// Add DAG dependencies such that SUnits in this group shall be ordered | |||||
// before SUnits in OtherGroup. | |||||
void link(SchedGroup &OtherGroup); | |||||
// Returns true if no more instructions may be added to this group. | |||||
bool isFull() { return MaxSize && Collection.size() >= *MaxSize; } | |||||
// Identify and add all relevant SUs from the DAG to this SchedGroup. | |||||
void initSchedGroup(); | |||||
// Add instructions to the SchedGroup bottom up starting from RIter. | |||||
// ConflictedInstrs is a set of instructions that should not be added to the | |||||
// SchedGroup even when the other conditions for adding it are satisfied. | |||||
// RIter will be added to the SchedGroup as well, and dependencies will be | |||||
// added so that RIter will always be scheduled at the end of the group. | |||||
void initSchedGroup(std::vector<SUnit>::reverse_iterator RIter, | |||||
DenseSet<SUnit *> &ConflictedInstrs); | |||||
int getSyncID() { return SyncID; } | |||||
SchedGroup(SchedGroupMask SGMask, Optional<unsigned> MaxSize, | |||||
ScheduleDAGInstrs *DAG, const SIInstrInfo *TII) | |||||
: SGMask(SGMask), MaxSize(MaxSize), DAG(DAG), TII(TII) {} | |||||
SchedGroup(SchedGroupMask SGMask, Optional<unsigned> MaxSize, int SyncID, | |||||
ScheduleDAGInstrs *DAG, const SIInstrInfo *TII) | |||||
: SGMask(SGMask), MaxSize(MaxSize), SyncID(SyncID), DAG(DAG), TII(TII) {} | |||||
}; | |||||
class IGroupLPDAGMutation : public ScheduleDAGMutation { | |||||
public: | |||||
const SIInstrInfo *TII; | |||||
ScheduleDAGMI *DAG; | |||||
IGroupLPDAGMutation() = default; | |||||
void apply(ScheduleDAGInstrs *DAGInstrs) override; | |||||
}; | |||||
// DAG mutation that coordinates with the SCHED_BARRIER instruction and | |||||
// corresponding builtin. The mutation adds edges from specific instruction | |||||
// classes determined by the SCHED_BARRIER mask so that they cannot be | |||||
class SchedBarrierDAGMutation : public ScheduleDAGMutation { | |||||
private: | |||||
const SIInstrInfo *TII; | |||||
ScheduleDAGMI *DAG; | |||||
// Organize lists of SchedGroups by their SyncID. SchedGroups / | |||||
// SCHED_GROUP_BARRIERs with different SyncIDs will have no edges added | |||||
// between then. | |||||
DenseMap<int, SmallVector<SchedGroup, 4>> SyncedSchedGroupsMap; | |||||
// Used to track instructions that are already to added to a different | |||||
// SchedGroup with the same SyncID. | |||||
DenseMap<int, DenseSet<SUnit *>> SyncedInstrsMap; | |||||
// Add DAG edges that enforce SCHED_BARRIER ordering. | |||||
void addSchedBarrierEdges(SUnit &SU); | |||||
// Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should | |||||
// not be reordered accross the SCHED_BARRIER. This is used for the base | |||||
// SCHED_BARRIER, and not SCHED_GROUP_BARRIER. The difference is that | |||||
// SCHED_BARRIER will always block all instructions that can be classified | |||||
// into a particular SchedClass, whereas SCHED_GROUP_BARRIER has a fixed size | |||||
// and may only synchronize with some SchedGroups. Returns the inverse of | |||||
// Mask. SCHED_BARRIER's mask describes which instruction types should be | |||||
// allowed to be scheduled across it. Invert the mask to get the | |||||
// SchedGroupMask of instructions that should be barred. | |||||
SchedGroupMask invertSchedBarrierMask(SchedGroupMask Mask) const; | |||||
jrbyrnes: I find it confusing that SchedBarrier uses inversion while SchedGroupBarrier doesn't. | |||||
// Create SchedGroups for a SCHED_GROUP_BARRIER. | |||||
void initSchedGroupBarrier(std::vector<SUnit>::reverse_iterator RIter); | |||||
// Add DAG edges that try to enforce ordering defined by SCHED_GROUP_BARRIER | |||||
// instructions. | |||||
void addSchedGroupBarrierEdges(); | |||||
public: | |||||
void apply(ScheduleDAGInstrs *DAGInstrs) override; | |||||
SchedBarrierDAGMutation() = default; | |||||
}; | |||||
bool SchedGroup::tryAddEdge(SUnit *A, SUnit *B) { | |||||
if (A != B && DAG->canAddEdge(B, A)) { | if (A != B && DAG->canAddEdge(B, A)) { | ||||
DAG->addEdge(B, SDep(A, SDep::Artificial)); | DAG->addEdge(B, SDep(A, SDep::Artificial)); | ||||
LLVM_DEBUG(dbgs() << "Adding edge...\n" | LLVM_DEBUG(dbgs() << "Adding edge...\n" | ||||
<< "from: SU(" << A->NodeNum << ") " << *A->getInstr() | << "from: SU(" << A->NodeNum << ") " << *A->getInstr() | ||||
<< "to: SU(" << B->NodeNum << ") " << *B->getInstr()); | << "to: SU(" << B->NodeNum << ") " << *B->getInstr()); | ||||
return true; | |||||
} | } | ||||
return false; | |||||
} | } | ||||
public: | bool SchedGroup::canAddMI(const MachineInstr &MI) const { | ||||
// Add DAG dependencies from all SUnits in this SchedGroup and this SU. If | bool Result = false; | ||||
// MakePred is true, SU will be a predecessor of the SUnits in this | if (MI.isMetaInstruction() || MI.getOpcode() == AMDGPU::SCHED_BARRIER || | ||||
// SchedGroup, otherwise SU will be a successor. | MI.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) | ||||
void link(SUnit &SU, bool MakePred = false) { | Result = false; | ||||
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) && | |||||
(TII->isVALU(MI) || TII->isMFMA(MI) || TII->isSALU(MI))) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && | |||||
TII->isVALU(MI) && !TII->isMFMA(MI)) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && | |||||
TII->isSALU(MI)) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) && | |||||
TII->isMFMA(MI)) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) && | |||||
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)))) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) && | |||||
MI.mayLoad() && | |||||
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)))) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) && | |||||
MI.mayStore() && | |||||
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)))) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) && | |||||
TII->isDS(MI)) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::DS_READ) != SchedGroupMask::NONE) && | |||||
MI.mayLoad() && TII->isDS(MI)) | |||||
Result = true; | |||||
else if (((SGMask & SchedGroupMask::DS_WRITE) != SchedGroupMask::NONE) && | |||||
MI.mayStore() && TII->isDS(MI)) | |||||
Result = true; | |||||
LLVM_DEBUG( | |||||
dbgs() << "For SchedGroup with mask " << format_hex((int)SGMask, 10, true) | |||||
<< (Result ? " could classify " : " unable to classify ") << MI); | |||||
return Result; | |||||
} | |||||
void SchedGroup::link(SUnit &SU, bool MakePred) { | |||||
for (auto A : Collection) { | for (auto A : Collection) { | ||||
SUnit *B = &SU; | SUnit *B = &SU; | ||||
if (MakePred) | if (MakePred) | ||||
std::swap(A, B); | std::swap(A, B); | ||||
tryAddEdge(A, B); | tryAddEdge(A, B); | ||||
} | } | ||||
} | } | ||||
// Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use | void SchedGroup::link(SUnit &SU, | ||||
// the predicate to determine whether SU should be a predecessor (P = true) | function_ref<bool(const SUnit *A, const SUnit *B)> P) { | ||||
// or a successor (P = false) of this SchedGroup. | |||||
void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P) { | |||||
for (auto A : Collection) { | for (auto A : Collection) { | ||||
SUnit *B = &SU; | SUnit *B = &SU; | ||||
if (P(A, B)) | if (P(A, B)) | ||||
std::swap(A, B); | std::swap(A, B); | ||||
tryAddEdge(A, B); | tryAddEdge(A, B); | ||||
} | } | ||||
} | } | ||||
// Add DAG dependencies such that SUnits in this group shall be ordered | void SchedGroup::link(SchedGroup &OtherGroup) { | ||||
// before SUnits in OtherGroup. | |||||
void link(SchedGroup &OtherGroup) { | |||||
for (auto B : OtherGroup.Collection) | for (auto B : OtherGroup.Collection) | ||||
link(*B); | link(*B); | ||||
} | } | ||||
// Returns true if no more instructions may be added to this group. | bool SchedGroup::isFull() const { | ||||
Not Done ReplyInline ActionsAs in the update to IGroupLP.cpp in trunk, seems like we are not supposed to use hasValue. jrbyrnes: As in the update to IGroupLP.cpp in trunk, seems like we are not supposed to use hasValue. | |||||
Not Done ReplyInline ActionsCompiling with gcc, I get a warning that this function is unused. uabelho: Compiling with gcc, I get a warning that this function is unused.
I'm wondering, there seems to… | |||||
Removed in 7898426a72, thanks! kerbowa: Removed in 7898426a72, thanks! | |||||
bool isFull() { return MaxSize && Collection.size() >= *MaxSize; } | return MaxSize && Collection.size() >= *MaxSize; | ||||
} | |||||
// Returns true if SU can be added to this SchedGroup. | |||||
bool canAddSU(SUnit &SU, const SIInstrInfo *TII) { | |||||
if (isFull()) | |||||
return false; | |||||
bool SchedGroup::canAddSU(SUnit &SU) const { | |||||
MachineInstr &MI = *SU.getInstr(); | MachineInstr &MI = *SU.getInstr(); | ||||
if (MI.getOpcode() != TargetOpcode::BUNDLE) | if (MI.getOpcode() != TargetOpcode::BUNDLE) | ||||
return canAddMI(MI, TII); | return canAddMI(MI); | ||||
// Special case for bundled MIs. | // Special case for bundled MIs. | ||||
const MachineBasicBlock *MBB = MI.getParent(); | const MachineBasicBlock *MBB = MI.getParent(); | ||||
MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B; | MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B; | ||||
while (E != MBB->end() && E->isBundledWithPred()) | while (E != MBB->end() && E->isBundledWithPred()) | ||||
++E; | ++E; | ||||
// Return true if all of the bundled MIs can be added to this group. | // Return true if all of the bundled MIs can be added to this group. | ||||
return std::all_of( | return std::all_of(B, E, [this](MachineInstr &MI) { return canAddMI(MI); }); | ||||
B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); }); | |||||
} | } | ||||
void add(SUnit &SU) { Collection.push_back(&SU); } | void SchedGroup::initSchedGroup() { | ||||
for (auto &SU : DAG->SUnits) { | |||||
SchedGroup(CanAddMIFn canAddMI, Optional<unsigned> MaxSize, | if (isFull()) | ||||
ScheduleDAGInstrs *DAG) | break; | ||||
: canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {} | |||||
}; | |||||
bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | if (canAddSU(SU)) | ||||
return TII->isMFMA(MI); | add(SU); | ||||
} | } | ||||
bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | |||||
return TII->isVALU(MI) && !TII->isMFMA(MI); | |||||
} | } | ||||
bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | static bool canFitIntoPipeline(SUnit &SU, ScheduleDAGInstrs *DAG, | ||||
return TII->isSALU(MI); | DenseSet<SUnit *> &ConflictedInstrs) { | ||||
return std::all_of( | |||||
ConflictedInstrs.begin(), ConflictedInstrs.end(), | |||||
[DAG, &SU](SUnit *SuccSU) { return DAG->canAddEdge(SuccSU, &SU); }); | |||||
} | } | ||||
bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | void SchedGroup::initSchedGroup(std::vector<SUnit>::reverse_iterator RIter, | ||||
return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)); | DenseSet<SUnit *> &ConflictedInstrs) { | ||||
} | SUnit &InitSU = *RIter; | ||||
for (auto E = DAG->SUnits.rend(); RIter != E; ++RIter) { | |||||
auto &SU = *RIter; | |||||
if (isFull()) | |||||
break; | |||||
Not Done ReplyInline ActionsNot possible to have unsized groups? jrbyrnes: Not possible to have unsized groups? | |||||
bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | if (canAddSU(SU) && !ConflictedInstrs.count(&SU) && | ||||
return MI.mayLoad() && | canFitIntoPipeline(SU, DAG, ConflictedInstrs)) { | ||||
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))); | add(SU); | ||||
ConflictedInstrs.insert(&SU); | |||||
} | } | ||||
bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | |||||
return MI.mayStore() && | |||||
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))); | |||||
} | } | ||||
bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | add(InitSU); | ||||
return MI.mayStore() && TII->isDS(MI); | assert(MaxSize); | ||||
(*MaxSize)++; | |||||
} | } | ||||
bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { | // Create a pipeline from the SchedGroups in PipelineOrderGroups such that we | ||||
return MI.mayLoad() && TII->isDS(MI); | // try to enforce the relative ordering of instructions in each group. | ||||
static void makePipeline(SmallVectorImpl<SchedGroup> &PipelineOrderGroups) { | |||||
auto I = PipelineOrderGroups.begin(); | |||||
auto E = PipelineOrderGroups.end(); | |||||
for (; I != E; ++I) { | |||||
auto &GroupA = *I; | |||||
for (auto J = std::next(I); J != E; ++J) { | |||||
auto &GroupB = *J; | |||||
GroupA.link(GroupB); | |||||
} | |||||
} | |||||
} | } | ||||
class IGroupLPDAGMutation : public ScheduleDAGMutation { | // Same as makePipeline but with reverse ordering. | ||||
public: | static void | ||||
const SIInstrInfo *TII; | makeReversePipeline(SmallVectorImpl<SchedGroup> &PipelineOrderGroups) { | ||||
ScheduleDAGMI *DAG; | auto I = PipelineOrderGroups.rbegin(); | ||||
auto E = PipelineOrderGroups.rend(); | |||||
IGroupLPDAGMutation() = default; | for (; I != E; ++I) { | ||||
void apply(ScheduleDAGInstrs *DAGInstrs) override; | auto &GroupA = *I; | ||||
}; | for (auto J = std::next(I); J != E; ++J) { | ||||
auto &GroupB = *J; | |||||
// DAG mutation that coordinates with the SCHED_BARRIER instruction and | GroupA.link(GroupB); | ||||
// corresponding builtin. The mutation adds edges from specific instruction | } | ||||
// classes determined by the SCHED_BARRIER mask so that they cannot be | } | ||||
// scheduled around the SCHED_BARRIER. | } | ||||
class SchedBarrierDAGMutation : public ScheduleDAGMutation { | |||||
private: | |||||
const SIInstrInfo *TII; | |||||
ScheduleDAGMI *DAG; | |||||
// Components of the mask that determines which instructions may not be | |||||
// scheduled across the SCHED_BARRIER. | |||||
enum class SchedBarrierMasks { | |||||
NONE = 0u, | |||||
ALU = 1u << 0, | |||||
VALU = 1u << 1, | |||||
SALU = 1u << 2, | |||||
MFMA = 1u << 3, | |||||
VMEM = 1u << 4, | |||||
VMEM_READ = 1u << 5, | |||||
VMEM_WRITE = 1u << 6, | |||||
DS = 1u << 7, | |||||
DS_READ = 1u << 8, | |||||
DS_WRITE = 1u << 9, | |||||
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ DS_WRITE) | |||||
}; | |||||
// Cache SchedGroups of each type if we have multiple SCHED_BARRIERs in a | |||||
// region. | |||||
// | |||||
std::unique_ptr<SchedGroup> MFMASchedGroup = nullptr; | |||||
std::unique_ptr<SchedGroup> VALUSchedGroup = nullptr; | |||||
std::unique_ptr<SchedGroup> SALUSchedGroup = nullptr; | |||||
std::unique_ptr<SchedGroup> VMEMReadSchedGroup = nullptr; | |||||
std::unique_ptr<SchedGroup> VMEMWriteSchedGroup = nullptr; | |||||
std::unique_ptr<SchedGroup> DSWriteSchedGroup = nullptr; | |||||
std::unique_ptr<SchedGroup> DSReadSchedGroup = nullptr; | |||||
// Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should | |||||
// not be reordered accross the SCHED_BARRIER. | |||||
void getSchedGroupsFromMask(int32_t Mask, | |||||
SmallVectorImpl<SchedGroup *> &SchedGroups); | |||||
// Add DAG edges that enforce SCHED_BARRIER ordering. | |||||
void addSchedBarrierEdges(SUnit &SU); | |||||
// Classify instructions and add them to the SchedGroup. | |||||
void initSchedGroup(SchedGroup *SG); | |||||
// Remove all existing edges from a SCHED_BARRIER. | |||||
void resetSchedBarrierEdges(SUnit &SU); | |||||
public: | |||||
void apply(ScheduleDAGInstrs *DAGInstrs) override; | |||||
SchedBarrierDAGMutation() = default; | |||||
}; | |||||
void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { | void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { | ||||
const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); | ||||
TII = ST.getInstrInfo(); | TII = ST.getInstrInfo(); | ||||
DAG = static_cast<ScheduleDAGMI *>(DAGInstrs); | DAG = static_cast<ScheduleDAGMI *>(DAGInstrs); | ||||
const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); | const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); | ||||
if (!TSchedModel || DAG->SUnits.empty()) | if (!TSchedModel || DAG->SUnits.empty()) | ||||
return; | return; | ||||
LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n"); | LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n"); | ||||
// The order of InstructionGroups in this vector defines the | // The order of InstructionGroups in this vector defines the | ||||
// order in which edges will be added. In other words, given the | // order in which edges will be added. In other words, given the | ||||
// present ordering, we will try to make each VMEMRead instruction | // present ordering, we will try to make each VMEMRead instruction | ||||
// a predecessor of each DSRead instruction, and so on. | // a predecessor of each DSRead instruction, and so on. | ||||
SmallVector<SchedGroup, 4> PipelineOrderGroups = { | SmallVector<SchedGroup, 4> PipelineOrderGroups = { | ||||
SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG), | SchedGroup(SchedGroupMask::VMEM, VMEMGroupMaxSize, DAG, TII), | ||||
SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG), | SchedGroup(SchedGroupMask::DS_READ, LDRGroupMaxSize, DAG, TII), | ||||
SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG), | SchedGroup(SchedGroupMask::MFMA, MFMAGroupMaxSize, DAG, TII), | ||||
SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)}; | SchedGroup(SchedGroupMask::DS_WRITE, LDWGroupMaxSize, DAG, TII)}; | ||||
for (SUnit &SU : DAG->SUnits) { | |||||
LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU)); | |||||
for (auto &SG : PipelineOrderGroups) | for (auto &SG : PipelineOrderGroups) | ||||
if (SG.canAddSU(SU, TII)) | SG.initSchedGroup(); | ||||
SG.add(SU); | |||||
} | |||||
for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) { | makePipeline(PipelineOrderGroups); | ||||
auto &GroupA = PipelineOrderGroups[i]; | |||||
for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) { | |||||
auto &GroupB = PipelineOrderGroups[j]; | |||||
GroupA.link(GroupB); | |||||
} | |||||
} | } | ||||
// Remove all existing edges from a SCHED_BARRIER or SCHED_GROUP_BARRIER. | |||||
static void resetEdges(SUnit &SU, ScheduleDAGInstrs *DAG) { | |||||
assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER || | |||||
SU.getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER); | |||||
while (!SU.Preds.empty()) | |||||
for (auto &P : SU.Preds) | |||||
SU.removePred(P); | |||||
while (!SU.Succs.empty()) | |||||
for (auto &S : SU.Succs) | |||||
for (auto &SP : S.getSUnit()->Preds) | |||||
if (SP.getSUnit() == &SU) | |||||
S.getSUnit()->removePred(SP); | |||||
} | } | ||||
void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { | void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { | ||||
const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); | const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); | ||||
if (!TSchedModel || DAGInstrs->SUnits.empty()) | if (!TSchedModel || DAGInstrs->SUnits.empty()) | ||||
return; | return; | ||||
LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n"); | LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n"); | ||||
const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); | ||||
TII = ST.getInstrInfo(); | TII = ST.getInstrInfo(); | ||||
DAG = static_cast<ScheduleDAGMI *>(DAGInstrs); | DAG = static_cast<ScheduleDAGMI *>(DAGInstrs); | ||||
for (auto &SU : DAG->SUnits) | SyncedInstrsMap.clear(); | ||||
if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER) | SyncedSchedGroupsMap.clear(); | ||||
addSchedBarrierEdges(SU); | for (auto R = DAG->SUnits.rbegin(), E = DAG->SUnits.rend(); R != E; ++R) { | ||||
if (R->getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER) | |||||
addSchedBarrierEdges(*R); | |||||
else if (R->getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) | |||||
initSchedGroupBarrier(R); | |||||
} | |||||
// SCHED_GROUP_BARRIER edges can only be added after we have found and | |||||
Not Done ReplyInline ActionsIf both types of barriers are present -- the SchedBarriers are handled first. However, if there is a conflict between SchedBarrier and SchedGroupBarrier, should SchedBarrier always get the priority? Maybe SchedBarrier should only handle groups not present in SchedGroupBarrier? jrbyrnes: If both types of barriers are present -- the SchedBarriers are handled first. However, if there… | |||||
// initialized all of the SCHED_GROUP_BARRIER SchedGroups. | |||||
addSchedGroupBarrierEdges(); | |||||
} | } | ||||
void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) { | void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) { | ||||
MachineInstr &MI = *SchedBarrier.getInstr(); | MachineInstr &MI = *SchedBarrier.getInstr(); | ||||
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER); | assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER); | ||||
// Remove all existing edges from the SCHED_BARRIER that were added due to the | // Remove all existing edges from the SCHED_BARRIER that were added due to the | ||||
// instruction having side effects. | // instruction having side effects. | ||||
resetSchedBarrierEdges(SchedBarrier); | resetEdges(SchedBarrier, DAG); | ||||
SmallVector<SchedGroup *, 4> SchedGroups; | auto InvertedMask = | ||||
int32_t Mask = MI.getOperand(0).getImm(); | invertSchedBarrierMask((SchedGroupMask)MI.getOperand(0).getImm()); | ||||
getSchedGroupsFromMask(Mask, SchedGroups); | SchedGroup SG(InvertedMask, None, DAG, TII); | ||||
for (auto SG : SchedGroups) | SG.initSchedGroup(); | ||||
SG->link( | // Preserve original instruction ordering relative to the SCHED_BARRIER. | ||||
SchedBarrier, (function_ref<bool(const SUnit *A, const SUnit *B)>)[]( | SG.link( | ||||
const SUnit *A, const SUnit *B) { | SchedBarrier, | ||||
return A->NodeNum > B->NodeNum; | (function_ref<bool(const SUnit *A, const SUnit *B)>)[]( | ||||
}); | const SUnit *A, const SUnit *B) { return A->NodeNum > B->NodeNum; }); | ||||
} | } | ||||
void SchedBarrierDAGMutation::getSchedGroupsFromMask( | SchedGroupMask | ||||
int32_t Mask, SmallVectorImpl<SchedGroup *> &SchedGroups) { | SchedBarrierDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const { | ||||
SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask; | // Invert mask and erase bits for types of instructions that are implied to be | ||||
// See IntrinsicsAMDGPU.td for an explanation of these masks and their | // allowed past the SCHED_BARRIER. | ||||
// mappings. | SchedGroupMask InvertedMask = ~Mask; | ||||
// | |||||
if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE && | // ALU implies VALU, SALU, MFMA. | ||||
(SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { | if ((InvertedMask & SchedGroupMask::ALU) == SchedGroupMask::NONE) | ||||
if (!VALUSchedGroup) { | InvertedMask &= | ||||
VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember, None, DAG); | ~SchedGroupMask::VALU & ~SchedGroupMask::SALU & ~SchedGroupMask::MFMA; | ||||
initSchedGroup(VALUSchedGroup.get()); | // VALU, SALU, MFMA implies ALU. | ||||
} | else if ((InvertedMask & SchedGroupMask::VALU) == SchedGroupMask::NONE || | ||||
(InvertedMask & SchedGroupMask::SALU) == SchedGroupMask::NONE || | |||||
SchedGroups.push_back(VALUSchedGroup.get()); | (InvertedMask & SchedGroupMask::MFMA) == SchedGroupMask::NONE) | ||||
} | InvertedMask &= ~SchedGroupMask::ALU; | ||||
if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE && | // VMEM implies VMEM_READ, VMEM_WRITE. | ||||
(SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { | if ((InvertedMask & SchedGroupMask::VMEM) == SchedGroupMask::NONE) | ||||
if (!SALUSchedGroup) { | InvertedMask &= ~SchedGroupMask::VMEM_READ & ~SchedGroupMask::VMEM_WRITE; | ||||
SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember, None, DAG); | // VMEM_READ, VMEM_WRITE implies VMEM. | ||||
initSchedGroup(SALUSchedGroup.get()); | else if ((InvertedMask & SchedGroupMask::VMEM_READ) == SchedGroupMask::NONE || | ||||
} | (InvertedMask & SchedGroupMask::VMEM_WRITE) == SchedGroupMask::NONE) | ||||
InvertedMask &= ~SchedGroupMask::VMEM; | |||||
SchedGroups.push_back(SALUSchedGroup.get()); | |||||
} | // DS implies DS_READ, DS_WRITE. | ||||
if ((InvertedMask & SchedGroupMask::DS) == SchedGroupMask::NONE) | |||||
if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE && | InvertedMask &= ~SchedGroupMask::DS_READ & ~SchedGroupMask::DS_WRITE; | ||||
(SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { | // DS_READ, DS_WRITE implies DS. | ||||
if (!MFMASchedGroup) { | else if ((InvertedMask & SchedGroupMask::DS_READ) == SchedGroupMask::NONE || | ||||
MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember, None, DAG); | (InvertedMask & SchedGroupMask::DS_WRITE) == SchedGroupMask::NONE) | ||||
initSchedGroup(MFMASchedGroup.get()); | InvertedMask &= ~SchedGroupMask::DS; | ||||
} | |||||
return InvertedMask; | |||||
SchedGroups.push_back(MFMASchedGroup.get()); | } | ||||
} | |||||
void SchedBarrierDAGMutation::initSchedGroupBarrier( | |||||
if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE && | std::vector<SUnit>::reverse_iterator RIter) { | ||||
(SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) { | // Remove all existing edges from the SCHED_GROUP_BARRIER that were added due | ||||
if (!VMEMReadSchedGroup) { | // to the instruction having side effects. | ||||
VMEMReadSchedGroup = | resetEdges(*RIter, DAG); | ||||
std::make_unique<SchedGroup>(isVMEMReadSGMember, None, DAG); | MachineInstr &SGB = *RIter->getInstr(); | ||||
initSchedGroup(VMEMReadSchedGroup.get()); | assert(SGB.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER); | ||||
} | int32_t SGMask = SGB.getOperand(0).getImm(); | ||||
int32_t Size = SGB.getOperand(1).getImm(); | |||||
SchedGroups.push_back(VMEMReadSchedGroup.get()); | int32_t SyncID = SGB.getOperand(2).getImm(); | ||||
} | // Create a new SchedGroup and add it to a list that is mapped to the SyncID. | ||||
// SchedGroups only enforce ordering between SchedGroups with the same SyncID. | |||||
if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE && | auto &SG = SyncedSchedGroupsMap[SyncID].emplace_back((SchedGroupMask)SGMask, | ||||
(SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) { | Size, SyncID, DAG, TII); | ||||
if (!VMEMWriteSchedGroup) { | |||||
VMEMWriteSchedGroup = | // SyncedInstrsMap is used here is used to avoid adding the same SUs in | ||||
std::make_unique<SchedGroup>(isVMEMWriteSGMember, None, DAG); | // multiple SchedGroups that have the same SyncID. This only matters for | ||||
initSchedGroup(VMEMWriteSchedGroup.get()); | // SCHED_GROUP_BARRIER and not SCHED_BARRIER. | ||||
} | SG.initSchedGroup(RIter, SyncedInstrsMap[SG.getSyncID()]); | ||||
} | |||||
SchedGroups.push_back(VMEMWriteSchedGroup.get()); | |||||
} | void SchedBarrierDAGMutation::addSchedGroupBarrierEdges() { | ||||
// Since we traversed the DAG in reverse order when initializing | |||||
if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE && | // SCHED_GROUP_BARRIERs we need to reverse the order in the vector to maintain | ||||
(SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) { | // user intentions and program order. | ||||
if (!DSReadSchedGroup) { | for (auto &SchedGroups : SyncedSchedGroupsMap) | ||||
DSReadSchedGroup = | makeReversePipeline(SchedGroups.second); | ||||
std::make_unique<SchedGroup>(isDSReadSGMember, None, DAG); | |||||
initSchedGroup(DSReadSchedGroup.get()); | |||||
} | |||||
SchedGroups.push_back(DSReadSchedGroup.get()); | |||||
} | |||||
if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE && | |||||
(SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) { | |||||
if (!DSWriteSchedGroup) { | |||||
DSWriteSchedGroup = | |||||
std::make_unique<SchedGroup>(isDSWriteSGMember, None, DAG); | |||||
initSchedGroup(DSWriteSchedGroup.get()); | |||||
} | |||||
SchedGroups.push_back(DSWriteSchedGroup.get()); | |||||
} | |||||
} | |||||
void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) { | |||||
assert(SG); | |||||
for (auto &SU : DAG->SUnits) | |||||
if (SG->canAddSU(SU, TII)) | |||||
SG->add(SU); | |||||
} | |||||
void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) { | |||||
assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER); | |||||
for (auto &P : SU.Preds) | |||||
SU.removePred(P); | |||||
for (auto &S : SU.Succs) { | |||||
for (auto &SP : S.getSUnit()->Preds) { | |||||
if (SP.getSUnit() == &SU) { | |||||
S.getSUnit()->removePred(SP); | |||||
} | |||||
} | |||||
} | |||||
} | } | ||||
} // namespace | } // namespace | ||||
namespace llvm { | namespace llvm { | ||||
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() { | std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() { | ||||
return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr; | return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr; | ||||
} | } | ||||
std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() { | std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() { | ||||
return std::make_unique<SchedBarrierDAGMutation>(); | return std::make_unique<SchedBarrierDAGMutation>(); | ||||
} | } | ||||
} // end namespace llvm | } // end namespace llvm |
I find it confusing that SchedBarrier uses inversion while SchedGroupBarrier doesn't.