Page MenuHomePhabricator

No OneTemporary

File Metadata

Created
Fri, Jan 24, 4:06 PM
Index: llvm/trunk/include/llvm/CodeGen/DFAPacketizer.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/DFAPacketizer.h (revision 216123)
+++ llvm/trunk/include/llvm/CodeGen/DFAPacketizer.h (revision 216124)
@@ -1,167 +1,165 @@
//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This class implements a deterministic finite automaton (DFA) based
// packetizing mechanism for VLIW architectures. It provides APIs to
// determine whether there exists a legal mapping of instructions to
// functional unit assignments in a packet. The DFA is auto-generated from
// the target's Schedule.td file.
//
// A DFA consists of 3 major elements: states, inputs, and transitions. For
// the packetizing mechanism, the input is the set of instruction classes for
// a target. The state models all possible combinations of functional unit
// consumption for a given set of instructions in a packet. A transition
// models the addition of an instruction to a packet. In the DFA constructed
// by this class, if an instruction can be added to a packet, then a valid
// transition exists from the corresponding state. Invalid transitions
// indicate that the instruction cannot be added to the current packet.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_DFAPACKETIZER_H
#define LLVM_CODEGEN_DFAPACKETIZER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include <map>
namespace llvm {
class MCInstrDesc;
class MachineInstr;
class MachineLoopInfo;
class MachineDominatorTree;
class InstrItineraryData;
class DefaultVLIWScheduler;
class SUnit;
class DFAPacketizer {
private:
typedef std::pair<unsigned, unsigned> UnsignPair;
const InstrItineraryData *InstrItins;
int CurrentState;
const int (*DFAStateInputTable)[2];
const unsigned *DFAStateEntryTable;
// CachedTable is a map from <FromState, Input> to ToState.
DenseMap<UnsignPair, unsigned> CachedTable;
// ReadTable - Read the DFA transition table and update CachedTable.
void ReadTable(unsigned int state);
public:
DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
const unsigned *SET);
// Reset the current state to make all resources available.
void clearResources() {
CurrentState = 0;
}
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool canReserveResources(const llvm::MCInstrDesc *MID);
// reserveResources - Reserve the resources occupied by a MCInstrDesc and
// change the current state to reflect that change.
void reserveResources(const llvm::MCInstrDesc *MID);
// canReserveResources - Check if the resources occupied by a machine
// instruction are available in the current state.
bool canReserveResources(llvm::MachineInstr *MI);
// reserveResources - Reserve the resources occupied by a machine
// instruction and change the current state to reflect that change.
void reserveResources(llvm::MachineInstr *MI);
const InstrItineraryData *getInstrItins() const { return InstrItins; }
};
// VLIWPacketizerList - Implements a simple VLIW packetizer using DFA. The
// packetizer works on machine basic blocks. For each instruction I in BB, the
// packetizer consults the DFA to see if machine resources are available to
// execute I. If so, the packetizer checks if I depends on any instruction J in
// the current packet. If no dependency is found, I is added to current packet
// and machine resource is marked as taken. If any dependency is found, a target
// API call is made to prune the dependence.
class VLIWPacketizerList {
protected:
const TargetMachine &TM;
const MachineFunction &MF;
const TargetInstrInfo *TII;
// The VLIW Scheduler.
DefaultVLIWScheduler *VLIWScheduler;
// Vector of instructions assigned to the current packet.
std::vector<MachineInstr*> CurrentPacketMIs;
// DFA resource tracker.
DFAPacketizer *ResourceTracker;
// Generate MI -> SU map.
std::map<MachineInstr*, SUnit*> MIToSUnit;
public:
- VLIWPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA);
+ VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, bool IsPostRA);
virtual ~VLIWPacketizerList();
// PacketizeMIs - Implement this API in the backend to bundle instructions.
void PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr);
// getResourceTracker - return ResourceTracker
DFAPacketizer *getResourceTracker() {return ResourceTracker;}
// addToPacket - Add MI to the current packet.
virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
MachineBasicBlock::iterator MII = MI;
CurrentPacketMIs.push_back(MI);
ResourceTracker->reserveResources(MI);
return MII;
}
// endPacket - End the current packet.
void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
// initPacketizerState - perform initialization before packetizing
// an instruction. This function is supposed to be overrided by
// the target dependent packetizer.
virtual void initPacketizerState() { return; }
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
virtual bool ignorePseudoInstruction(MachineInstr *I,
MachineBasicBlock *MBB) {
return false;
}
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
virtual bool isSoloInstruction(MachineInstr *MI) {
return true;
}
// isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
// together.
virtual bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
return false;
}
// isLegalToPruneDependencies - Is it legal to prune dependece between SUI
// and SUJ.
virtual bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
return false;
}
};
}
#endif
Index: llvm/trunk/include/llvm/CodeGen/MachineScheduler.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineScheduler.h (revision 216123)
+++ llvm/trunk/include/llvm/CodeGen/MachineScheduler.h (revision 216124)
@@ -1,953 +1,953 @@
//==- MachineScheduler.h - MachineInstr Scheduling Pass ----------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides an interface for customizing the standard MachineScheduler
// pass. Note that the entire pass may be replaced as follows:
//
// <Target>TargetMachine::createPassConfig(PassManagerBase &PM) {
// PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID);
// ...}
//
// The MachineScheduler pass is only responsible for choosing the regions to be
// scheduled. Targets can override the DAG builder and scheduler without
// replacing the pass as follows:
//
// ScheduleDAGInstrs *<Target>PassConfig::
// createMachineScheduler(MachineSchedContext *C) {
// return new CustomMachineScheduler(C);
// }
//
// The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
// scheduling while updating the instruction stream, register pressure, and live
// intervals. Most targets don't need to override the DAG builder and list
// schedulier, but subtargets that require custom scheduling heuristics may
// plugin an alternate MachineSchedStrategy. The strategy is responsible for
// selecting the highest priority node from the list:
//
// ScheduleDAGInstrs *<Target>PassConfig::
// createMachineScheduler(MachineSchedContext *C) {
// return new ScheduleDAGMI(C, CustomStrategy(C));
// }
//
// The DAG builder can also be customized in a sense by adding DAG mutations
// that will run after DAG building and before list scheduling. DAG mutations
// can adjust dependencies based on target-specific knowledge or add weak edges
// to aid heuristics:
//
// ScheduleDAGInstrs *<Target>PassConfig::
// createMachineScheduler(MachineSchedContext *C) {
// ScheduleDAGMI *DAG = new ScheduleDAGMI(C, CustomStrategy(C));
// DAG->addMutation(new CustomDependencies(DAG->TII, DAG->TRI));
// return DAG;
// }
//
// A target that supports alternative schedulers can use the
// MachineSchedRegistry to allow command line selection. This can be done by
// implementing the following boilerplate:
//
// static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
// return new CustomMachineScheduler(C);
// }
// static MachineSchedRegistry
// SchedCustomRegistry("custom", "Run my target's custom scheduler",
// createCustomMachineSched);
//
//
// Finally, subtargets that don't need to implement custom heuristics but would
// like to configure the GenericScheduler's policy for a given scheduler region,
// including scheduling direction and register pressure tracking policy, can do
// this:
//
// void <SubTarget>Subtarget::
// overrideSchedPolicy(MachineSchedPolicy &Policy,
// MachineInstr *begin,
// MachineInstr *end,
// unsigned NumRegionInstrs) const {
// Policy.<Flag> = true;
// }
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
#define LLVM_CODEGEN_MACHINESCHEDULER_H
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include <memory>
namespace llvm {
extern cl::opt<bool> ForceTopDown;
extern cl::opt<bool> ForceBottomUp;
class AliasAnalysis;
class LiveIntervals;
class MachineDominatorTree;
class MachineLoopInfo;
class RegisterClassInfo;
class ScheduleDAGInstrs;
class SchedDFSResult;
class ScheduleHazardRecognizer;
/// MachineSchedContext provides enough context from the MachineScheduler pass
/// for the target to instantiate a scheduler.
struct MachineSchedContext {
MachineFunction *MF;
const MachineLoopInfo *MLI;
const MachineDominatorTree *MDT;
const TargetPassConfig *PassConfig;
AliasAnalysis *AA;
LiveIntervals *LIS;
RegisterClassInfo *RegClassInfo;
MachineSchedContext();
virtual ~MachineSchedContext();
};
/// MachineSchedRegistry provides a selection of available machine instruction
/// schedulers.
class MachineSchedRegistry : public MachinePassRegistryNode {
public:
typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *);
// RegisterPassParser requires a (misnamed) FunctionPassCtor type.
typedef ScheduleDAGCtor FunctionPassCtor;
static MachinePassRegistry Registry;
MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
: MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
Registry.Add(this);
}
~MachineSchedRegistry() { Registry.Remove(this); }
// Accessors.
//
MachineSchedRegistry *getNext() const {
return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
}
static MachineSchedRegistry *getList() {
return (MachineSchedRegistry *)Registry.getList();
}
static void setListener(MachinePassRegistryListener *L) {
Registry.setListener(L);
}
};
class ScheduleDAGMI;
/// Define a generic scheduling policy for targets that don't provide their own
/// MachineSchedStrategy. This can be overriden for each scheduling region
/// before building the DAG.
struct MachineSchedPolicy {
// Allow the scheduler to disable register pressure tracking.
bool ShouldTrackPressure;
// Allow the scheduler to force top-down or bottom-up scheduling. If neither
// is true, the scheduler runs in both directions and converges.
bool OnlyTopDown;
bool OnlyBottomUp;
MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
OnlyBottomUp(false) {}
};
/// MachineSchedStrategy - Interface to the scheduling algorithm used by
/// ScheduleDAGMI.
///
/// Initialization sequence:
/// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots
class MachineSchedStrategy {
virtual void anchor();
public:
virtual ~MachineSchedStrategy() {}
/// Optionally override the per-region scheduling policy.
virtual void initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {}
/// Check if pressure tracking is needed before building the DAG and
/// initializing this strategy. Called after initPolicy.
virtual bool shouldTrackPressure() const { return true; }
/// Initialize the strategy after building the DAG for a new region.
virtual void initialize(ScheduleDAGMI *DAG) = 0;
/// Notify this strategy that all roots have been released (including those
/// that depend on EntrySU or ExitSU).
virtual void registerRoots() {}
/// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
/// schedule the node at the top of the unscheduled region. Otherwise it will
/// be scheduled at the bottom.
virtual SUnit *pickNode(bool &IsTopNode) = 0;
/// \brief Scheduler callback to notify that a new subtree is scheduled.
virtual void scheduleTree(unsigned SubtreeID) {}
/// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
/// instruction and updated scheduled/remaining flags in the DAG nodes.
virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
/// When all predecessor dependencies have been resolved, free this node for
/// top-down scheduling.
virtual void releaseTopNode(SUnit *SU) = 0;
/// When all successor dependencies have been resolved, free this node for
/// bottom-up scheduling.
virtual void releaseBottomNode(SUnit *SU) = 0;
};
/// Mutate the DAG as a postpass after normal DAG building.
class ScheduleDAGMutation {
virtual void anchor();
public:
virtual ~ScheduleDAGMutation() {}
virtual void apply(ScheduleDAGMI *DAG) = 0;
};
/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
/// schedules machine instructions according to the given MachineSchedStrategy
/// without much extra book-keeping. This is the common functionality between
/// PreRA and PostRA MachineScheduler.
class ScheduleDAGMI : public ScheduleDAGInstrs {
protected:
AliasAnalysis *AA;
std::unique_ptr<MachineSchedStrategy> SchedImpl;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
/// The top of the unscheduled zone.
MachineBasicBlock::iterator CurrentTop;
/// The bottom of the unscheduled zone.
MachineBasicBlock::iterator CurrentBottom;
/// Record the next node in a scheduled cluster.
const SUnit *NextClusterPred;
const SUnit *NextClusterSucc;
#ifndef NDEBUG
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
unsigned NumInstrsScheduled;
#endif
public:
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
bool IsPostRA)
- : ScheduleDAGInstrs(*C->MF, C->MLI, C->MDT, IsPostRA,
+ : ScheduleDAGInstrs(*C->MF, C->MLI, IsPostRA,
/*RemoveKillFlags=*/IsPostRA, C->LIS),
AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(),
CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) {
#ifndef NDEBUG
NumInstrsScheduled = 0;
#endif
}
// Provide a vtable anchor
~ScheduleDAGMI() override;
/// Return true if this DAG supports VReg liveness and RegPressure.
virtual bool hasVRegLiveness() const { return false; }
/// Add a postprocessing step to the DAG builder.
/// Mutations are applied in the order that they are added after normal DAG
/// building and before MachineSchedStrategy initialization.
///
/// ScheduleDAGMI takes ownership of the Mutation object.
void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
Mutations.push_back(std::move(Mutation));
}
/// \brief True if an edge can be added from PredSU to SuccSU without creating
/// a cycle.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
/// \brief Add a DAG edge to the given SU with the given predecessor
/// dependence data.
///
/// \returns true if the edge may be added without creating a cycle OR if an
/// equivalent edge already existed (false indicates failure).
bool addEdge(SUnit *SuccSU, const SDep &PredDep);
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
/// region. This covers all instructions in a block, while schedule() may only
/// cover a subset.
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) override;
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
/// reorderable instructions.
void schedule() override;
/// Change the position of an instruction within the basic block and update
/// live ranges and region boundary iterators.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
const SUnit *getNextClusterPred() const { return NextClusterPred; }
const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
void viewGraph(const Twine &Name, const Twine &Title) override;
void viewGraph() override;
protected:
// Top-Level entry points for the schedule() driver...
/// Apply each ScheduleDAGMutation step in order. This allows different
/// instances of ScheduleDAGMI to perform custom DAG postprocessing.
void postprocessDAG();
/// Release ExitSU predecessors and setup scheduler queues.
void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
/// Update scheduler DAG and queues after scheduling an instruction.
void updateQueues(SUnit *SU, bool IsTopNode);
/// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
void placeDebugValues();
/// \brief dump the scheduled Sequence.
void dumpSchedule() const;
// Lesser helpers...
bool checkSchedLimit();
void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
SmallVectorImpl<SUnit*> &BotRoots);
void releaseSucc(SUnit *SU, SDep *SuccEdge);
void releaseSuccessors(SUnit *SU);
void releasePred(SUnit *SU, SDep *PredEdge);
void releasePredecessors(SUnit *SU);
};
/// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
/// machine instructions while updating LiveIntervals and tracking regpressure.
class ScheduleDAGMILive : public ScheduleDAGMI {
protected:
RegisterClassInfo *RegClassInfo;
/// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
/// will be empty.
SchedDFSResult *DFSResult;
BitVector ScheduledTrees;
MachineBasicBlock::iterator LiveRegionEnd;
// Map each SU to its summary of pressure changes. This array is updated for
// liveness during bottom-up scheduling. Top-down scheduling may proceed but
// has no affect on the pressure diffs.
PressureDiffs SUPressureDiffs;
/// Register pressure in this region computed by initRegPressure.
bool ShouldTrackPressure;
IntervalPressure RegPressure;
RegPressureTracker RPTracker;
/// List of pressure sets that exceed the target's pressure limit before
/// scheduling, listed in increasing set ID order. Each pressure set is paired
/// with its max pressure in the currently scheduled regions.
std::vector<PressureChange> RegionCriticalPSets;
/// The top of the unscheduled zone.
IntervalPressure TopPressure;
RegPressureTracker TopRPTracker;
/// The bottom of the unscheduled zone.
IntervalPressure BotPressure;
RegPressureTracker BotRPTracker;
public:
ScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S)
: ScheduleDAGMI(C, std::move(S), /*IsPostRA=*/false),
RegClassInfo(C->RegClassInfo), DFSResult(nullptr),
ShouldTrackPressure(false), RPTracker(RegPressure),
TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
virtual ~ScheduleDAGMILive();
/// Return true if this DAG supports VReg liveness and RegPressure.
bool hasVRegLiveness() const override { return true; }
/// \brief Return true if register pressure tracking is enabled.
bool isTrackingPressure() const { return ShouldTrackPressure; }
/// Get current register pressure for the top scheduled instructions.
const IntervalPressure &getTopPressure() const { return TopPressure; }
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
/// Get current register pressure for the bottom scheduled instructions.
const IntervalPressure &getBotPressure() const { return BotPressure; }
const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
/// Get register pressure for the entire scheduling region before scheduling.
const IntervalPressure &getRegPressure() const { return RegPressure; }
const std::vector<PressureChange> &getRegionCriticalPSets() const {
return RegionCriticalPSets;
}
PressureDiff &getPressureDiff(const SUnit *SU) {
return SUPressureDiffs[SU->NodeNum];
}
/// Compute a DFSResult after DAG building is complete, and before any
/// queue comparisons.
void computeDFSResult();
/// Return a non-null DFS result if the scheduling strategy initialized it.
const SchedDFSResult *getDFSResult() const { return DFSResult; }
BitVector &getScheduledTrees() { return ScheduledTrees; }
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
/// region. This covers all instructions in a block, while schedule() may only
/// cover a subset.
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) override;
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
/// reorderable instructions.
void schedule() override;
/// Compute the cyclic critical path through the DAG.
unsigned computeCyclicCriticalPath();
protected:
// Top-Level entry points for the schedule() driver...
/// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
/// enabled. This sets up three trackers. RPTracker will cover the entire DAG
/// region, TopTracker and BottomTracker will be initialized to the top and
/// bottom of the DAG region without covereing any unscheduled instruction.
void buildDAGWithRegPressure();
/// Move an instruction and update register pressure.
void scheduleMI(SUnit *SU, bool IsTopNode);
// Lesser helpers...
void initRegPressure();
void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
void updateScheduledPressure(const SUnit *SU,
const std::vector<unsigned> &NewMaxPressure);
};
//===----------------------------------------------------------------------===//
///
/// Helpers for implementing custom MachineSchedStrategy classes. These take
/// care of the book-keeping associated with list scheduling heuristics.
///
//===----------------------------------------------------------------------===//
/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
///
/// This is a convenience class that may be used by implementations of
/// MachineSchedStrategy.
class ReadyQueue {
unsigned ID;
std::string Name;
std::vector<SUnit*> Queue;
public:
ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
unsigned getID() const { return ID; }
StringRef getName() const { return Name; }
// SU is in this queue if it's NodeQueueID is a superset of this ID.
bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
bool empty() const { return Queue.empty(); }
void clear() { Queue.clear(); }
unsigned size() const { return Queue.size(); }
typedef std::vector<SUnit*>::iterator iterator;
iterator begin() { return Queue.begin(); }
iterator end() { return Queue.end(); }
ArrayRef<SUnit*> elements() { return Queue; }
iterator find(SUnit *SU) {
return std::find(Queue.begin(), Queue.end(), SU);
}
void push(SUnit *SU) {
Queue.push_back(SU);
SU->NodeQueueId |= ID;
}
iterator remove(iterator I) {
(*I)->NodeQueueId &= ~ID;
*I = Queue.back();
unsigned idx = I - Queue.begin();
Queue.pop_back();
return Queue.begin() + idx;
}
void dump();
};
/// Summarize the unscheduled region.
struct SchedRemainder {
// Critical path through the DAG in expected latency.
unsigned CriticalPath;
unsigned CyclicCritPath;
// Scaled count of micro-ops left to schedule.
unsigned RemIssueCount;
bool IsAcyclicLatencyLimited;
// Unscheduled resources
SmallVector<unsigned, 16> RemainingCounts;
void reset() {
CriticalPath = 0;
CyclicCritPath = 0;
RemIssueCount = 0;
IsAcyclicLatencyLimited = false;
RemainingCounts.clear();
}
SchedRemainder() { reset(); }
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
};
/// Each Scheduling boundary is associated with ready queues. It tracks the
/// current cycle in the direction of movement, and maintains the state
/// of "hazards" and other interlocks at the current cycle.
class SchedBoundary {
public:
/// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
enum {
TopQID = 1,
BotQID = 2,
LogMaxQID = 2
};
ScheduleDAGMI *DAG;
const TargetSchedModel *SchedModel;
SchedRemainder *Rem;
ReadyQueue Available;
ReadyQueue Pending;
ScheduleHazardRecognizer *HazardRec;
private:
/// True if the pending Q should be checked/updated before scheduling another
/// instruction.
bool CheckPending;
// For heuristics, keep a list of the nodes that immediately depend on the
// most recently scheduled node.
SmallPtrSet<const SUnit*, 8> NextSUs;
/// Number of cycles it takes to issue the instructions scheduled in this
/// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
/// See getStalls().
unsigned CurrCycle;
/// Micro-ops issued in the current cycle
unsigned CurrMOps;
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle;
// The expected latency of the critical path in this scheduled zone.
unsigned ExpectedLatency;
// The latency of dependence chains leading into this zone.
// For each node scheduled bottom-up: DLat = max DLat, N.Depth.
// For each cycle scheduled: DLat -= 1.
unsigned DependentLatency;
/// Count the scheduled (issued) micro-ops that can be retired by
/// time=CurrCycle assuming the first scheduled instr is retired at time=0.
unsigned RetiredMOps;
// Count scheduled resources that have been executed. Resources are
// considered executed if they become ready in the time that it takes to
// saturate any resource including the one in question. Counts are scaled
// for direct comparison with other resources. Counts can be compared with
// MOps * getMicroOpFactor and Latency * getLatencyFactor.
SmallVector<unsigned, 16> ExecutedResCounts;
/// Cache the max count for a single resource.
unsigned MaxExecutedResCount;
// Cache the critical resources ID in this scheduled zone.
unsigned ZoneCritResIdx;
// Is the scheduled region resource limited vs. latency limited.
bool IsResourceLimited;
// Record the highest cycle at which each resource has been reserved by a
// scheduled instruction.
SmallVector<unsigned, 16> ReservedCycles;
#ifndef NDEBUG
// Remember the greatest possible stall as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
unsigned MaxObservedStall;
#endif
public:
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(nullptr), SchedModel(nullptr), Rem(nullptr), Available(ID, Name+".A"),
Pending(ID << LogMaxQID, Name+".P"),
HazardRec(nullptr) {
reset();
}
~SchedBoundary();
void reset();
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
SchedRemainder *rem);
bool isTop() const {
return Available.getID() == TopQID;
}
/// Number of cycles to issue the instructions scheduled in this zone.
unsigned getCurrCycle() const { return CurrCycle; }
/// Micro-ops issued in the current cycle
unsigned getCurrMOps() const { return CurrMOps; }
/// Return true if the given SU is used by the most recently scheduled
/// instruction.
bool isNextSU(const SUnit *SU) const { return NextSUs.count(SU); }
// The latency of dependence chains leading into this zone.
unsigned getDependentLatency() const { return DependentLatency; }
/// Get the number of latency cycles "covered" by the scheduled
/// instructions. This is the larger of the critical path within the zone
/// and the number of cycles required to issue the instructions.
unsigned getScheduledLatency() const {
return std::max(ExpectedLatency, CurrCycle);
}
unsigned getUnscheduledLatency(SUnit *SU) const {
return isTop() ? SU->getHeight() : SU->getDepth();
}
unsigned getResourceCount(unsigned ResIdx) const {
return ExecutedResCounts[ResIdx];
}
/// Get the scaled count of scheduled micro-ops and resources, including
/// executed resources.
unsigned getCriticalCount() const {
if (!ZoneCritResIdx)
return RetiredMOps * SchedModel->getMicroOpFactor();
return getResourceCount(ZoneCritResIdx);
}
/// Get a scaled count for the minimum execution time of the scheduled
/// micro-ops that are ready to execute by getExecutedCount. Notice the
/// feedback loop.
unsigned getExecutedCount() const {
return std::max(CurrCycle * SchedModel->getLatencyFactor(),
MaxExecutedResCount);
}
unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
// Is the scheduled region resource limited vs. latency limited.
bool isResourceLimited() const { return IsResourceLimited; }
/// Get the difference between the given SUnit's ready time and the current
/// cycle.
unsigned getLatencyStallCycles(SUnit *SU);
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
bool checkHazard(SUnit *SU);
unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
unsigned getOtherResourceCount(unsigned &OtherCritIdx);
void releaseNode(SUnit *SU, unsigned ReadyCycle);
void releaseTopNode(SUnit *SU);
void releaseBottomNode(SUnit *SU);
void bumpCycle(unsigned NextCycle);
void incExecutedResources(unsigned PIdx, unsigned Count);
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
void bumpNode(SUnit *SU);
void releasePending();
void removeReady(SUnit *SU);
/// Call this before applying any other heuristics to the Available queue.
/// Updates the Available/Pending Q's if necessary and returns the single
/// available instruction, or NULL if there are multiple candidates.
SUnit *pickOnlyChoice();
#ifndef NDEBUG
void dumpScheduledState();
#endif
};
/// Base class for GenericScheduler. This class maintains information about
/// scheduling candidates based on TargetSchedModel making it easy to implement
/// heuristics for either preRA or postRA scheduling.
class GenericSchedulerBase : public MachineSchedStrategy {
public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason {
NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
#ifndef NDEBUG
static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
#endif
/// Policy for scheduling the next instruction in the candidate's zone.
struct CandPolicy {
bool ReduceLatency;
unsigned ReduceResIdx;
unsigned DemandResIdx;
CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
};
/// Status of an instruction's critical resource consumption.
struct SchedResourceDelta {
// Count critical resources in the scheduled region required by SU.
unsigned CritResources;
// Count critical resources from another region consumed by SU.
unsigned DemandedResources;
SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
bool operator==(const SchedResourceDelta &RHS) const {
return CritResources == RHS.CritResources
&& DemandedResources == RHS.DemandedResources;
}
bool operator!=(const SchedResourceDelta &RHS) const {
return !operator==(RHS);
}
};
/// Store the state used by GenericScheduler heuristics, required for the
/// lifetime of one invocation of pickNode().
struct SchedCandidate {
CandPolicy Policy;
// The best SUnit candidate.
SUnit *SU;
// The reason for this candidate.
CandReason Reason;
// Set of reasons that apply to multiple candidates.
uint32_t RepeatReasonSet;
// Register pressure values for the best candidate.
RegPressureDelta RPDelta;
// Critical resource consumption of the best candidate.
SchedResourceDelta ResDelta;
SchedCandidate(const CandPolicy &policy)
: Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {}
bool isValid() const { return SU; }
// Copy the status of another candidate without changing policy.
void setBest(SchedCandidate &Best) {
assert(Best.Reason != NoCand && "uninitialized Sched candidate");
SU = Best.SU;
Reason = Best.Reason;
RPDelta = Best.RPDelta;
ResDelta = Best.ResDelta;
}
bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
void initResourceDelta(const ScheduleDAGMI *DAG,
const TargetSchedModel *SchedModel);
};
protected:
const MachineSchedContext *Context;
const TargetSchedModel *SchedModel;
const TargetRegisterInfo *TRI;
SchedRemainder Rem;
protected:
GenericSchedulerBase(const MachineSchedContext *C):
Context(C), SchedModel(nullptr), TRI(nullptr) {}
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
SchedBoundary *OtherZone);
#ifndef NDEBUG
void traceCandidate(const SchedCandidate &Cand);
#endif
};
/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
class GenericScheduler : public GenericSchedulerBase {
ScheduleDAGMILive *DAG;
// State of the top and bottom scheduled instruction boundaries.
SchedBoundary Top;
SchedBoundary Bot;
MachineSchedPolicy RegionPolicy;
public:
GenericScheduler(const MachineSchedContext *C):
GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"),
Bot(SchedBoundary::BotQID, "BotQ") {}
void initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) override;
bool shouldTrackPressure() const override {
return RegionPolicy.ShouldTrackPressure;
}
void initialize(ScheduleDAGMI *dag) override;
SUnit *pickNode(bool &IsTopNode) override;
void schedNode(SUnit *SU, bool IsTopNode) override;
void releaseTopNode(SUnit *SU) override {
Top.releaseTopNode(SU);
}
void releaseBottomNode(SUnit *SU) override {
Bot.releaseBottomNode(SU);
}
void registerRoots() override;
protected:
void checkAcyclicLatency();
void tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
RegPressureTracker &TempTracker);
SUnit *pickNodeBidirectional(bool &IsTopNode);
void pickNodeFromQueue(SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
void reschedulePhysRegCopies(SUnit *SU, bool isTop);
};
/// PostGenericScheduler - Interface to the scheduling algorithm used by
/// ScheduleDAGMI.
///
/// Callbacks from ScheduleDAGMI:
/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
class PostGenericScheduler : public GenericSchedulerBase {
ScheduleDAGMI *DAG;
SchedBoundary Top;
SmallVector<SUnit*, 8> BotRoots;
public:
PostGenericScheduler(const MachineSchedContext *C):
GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
virtual ~PostGenericScheduler() {}
void initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) override {
/* no configurable policy */
};
/// PostRA scheduling does not track pressure.
bool shouldTrackPressure() const override { return false; }
void initialize(ScheduleDAGMI *Dag) override;
void registerRoots() override;
SUnit *pickNode(bool &IsTopNode) override;
void scheduleTree(unsigned SubtreeID) override {
llvm_unreachable("PostRA scheduler does not support subtree analysis.");
}
void schedNode(SUnit *SU, bool IsTopNode) override;
void releaseTopNode(SUnit *SU) override {
Top.releaseTopNode(SU);
}
// Only called for roots.
void releaseBottomNode(SUnit *SU) override {
BotRoots.push_back(SU);
}
protected:
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
void pickNodeFromQueue(SchedCandidate &Cand);
};
} // namespace llvm
#endif
Index: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (revision 216123)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (revision 216124)
@@ -1,281 +1,279 @@
//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ScheduleDAGInstrs class, which implements
// scheduling for a MachineInstr-based dependency graph.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
#include "llvm/ADT/SparseMultiSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class MachineFrameInfo;
class MachineLoopInfo;
class MachineDominatorTree;
class LiveIntervals;
class RegPressureTracker;
class PressureDiffs;
/// An individual mapping from virtual register number to SUnit.
struct VReg2SUnit {
unsigned VirtReg;
SUnit *SU;
VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
}
};
/// Record a physical register access.
/// For non-data-dependent uses, OpIdx == -1.
struct PhysRegSUOper {
SUnit *SU;
int OpIdx;
unsigned Reg;
PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}
unsigned getSparseSetIndex() const { return Reg; }
};
/// Use a SparseMultiSet to track physical registers. Storage is only
/// allocated once for the pass. It can be cleared in constant time and reused
/// without any frees.
typedef SparseMultiSet<PhysRegSUOper, llvm::identity<unsigned>, uint16_t>
Reg2SUnitsMap;
/// Use SparseSet as a SparseMap by relying on the fact that it never
/// compares ValueT's, only unsigned keys. This allows the set to be cleared
/// between scheduling regions in constant time as long as ValueT does not
/// require a destructor.
typedef SparseSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMap;
/// Track local uses of virtual registers. These uses are gathered by the DAG
/// builder and may be consulted by the scheduler to avoid iterating an entire
/// vreg use list.
typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2UseMap;
/// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
/// MachineInstrs.
class ScheduleDAGInstrs : public ScheduleDAG {
protected:
const MachineLoopInfo *MLI;
- const MachineDominatorTree *MDT;
const MachineFrameInfo *MFI;
/// Live Intervals provides reaching defs in preRA scheduling.
LiveIntervals *LIS;
/// TargetSchedModel provides an interface to the machine model.
TargetSchedModel SchedModel;
/// isPostRA flag indicates vregs cannot be present.
bool IsPostRA;
/// True if the DAG builder should remove kill flags (in preparation for
/// rescheduling).
bool RemoveKillFlags;
/// The standard DAG builder does not normally include terminators as DAG
/// nodes because it does not create the necessary dependencies to prevent
/// reordering. A specialized scheduler can override
/// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
/// it has taken responsibility for scheduling the terminator correctly.
bool CanHandleTerminators;
/// State specific to the current scheduling region.
/// ------------------------------------------------
/// The block in which to insert instructions
MachineBasicBlock *BB;
/// The beginning of the range to be scheduled.
MachineBasicBlock::iterator RegionBegin;
/// The end of the range to be scheduled.
MachineBasicBlock::iterator RegionEnd;
/// Instructions in this region (distance(RegionBegin, RegionEnd)).
unsigned NumRegionInstrs;
/// After calling BuildSchedGraph, each machine instruction in the current
/// scheduling region is mapped to an SUnit.
DenseMap<MachineInstr*, SUnit*> MISUnitMap;
/// After calling BuildSchedGraph, each vreg used in the scheduling region
/// is mapped to a set of SUnits. These include all local vreg uses, not
/// just the uses for a singly defined vreg.
VReg2UseMap VRegUses;
/// State internal to DAG building.
/// -------------------------------
/// Defs, Uses - Remember where defs and uses of each register are as we
/// iterate upward through the instructions. This is allocated here instead
/// of inside BuildSchedGraph to avoid the need for it to be initialized and
/// destructed for each block.
Reg2SUnitsMap Defs;
Reg2SUnitsMap Uses;
/// Track the last instruction in this region defining each virtual register.
VReg2SUnitMap VRegDefs;
/// PendingLoads - Remember where unknown loads are after the most recent
/// unknown store, as we iterate. As with Defs and Uses, this is here
/// to minimize construction/destruction.
std::vector<SUnit *> PendingLoads;
/// DbgValues - Remember instruction that precedes DBG_VALUE.
/// These are generated by buildSchedGraph but persist so they can be
/// referenced when emitting the final schedule.
typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
DbgValueVector;
DbgValueVector DbgValues;
MachineInstr *FirstDbgValue;
/// Set of live physical registers for updating kill flags.
BitVector LiveRegs;
public:
explicit ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
- const MachineDominatorTree *mdt,
bool IsPostRAFlag,
bool RemoveKillFlags = false,
LiveIntervals *LIS = nullptr);
virtual ~ScheduleDAGInstrs() {}
bool isPostRA() const { return IsPostRA; }
/// \brief Expose LiveIntervals for use in DAG mutators and such.
LiveIntervals *getLIS() const { return LIS; }
/// \brief Get the machine model for instruction scheduling.
const TargetSchedModel *getSchedModel() const { return &SchedModel; }
/// \brief Resolve and cache a resolved scheduling class for an SUnit.
const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
return SU->SchedClass;
}
/// begin - Return an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator begin() const { return RegionBegin; }
/// end - Return an iterator to the bottom of the current scheduling region.
MachineBasicBlock::iterator end() const { return RegionEnd; }
/// newSUnit - Creates a new SUnit and return a ptr to it.
SUnit *newSUnit(MachineInstr *MI);
/// getSUnit - Return an existing SUnit for this MI, or NULL.
SUnit *getSUnit(MachineInstr *MI) const;
/// startBlock - Prepare to perform scheduling in the given block.
virtual void startBlock(MachineBasicBlock *BB);
/// finishBlock - Clean up after scheduling in the given block.
virtual void finishBlock();
/// Initialize the scheduler state for the next scheduling region.
virtual void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs);
/// Notify that the scheduler has finished scheduling the current region.
virtual void exitRegion();
/// buildSchedGraph - Build SUnits from the MachineBasicBlock that we are
/// input.
void buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker = nullptr,
PressureDiffs *PDiffs = nullptr);
/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier. We want to
/// make sure instructions which define registers that are either used by
/// the terminator or are live-out are properly scheduled. This is
/// especially important when the definition latency of the return value(s)
/// are too high to be hidden by the branch or when the liveout registers
/// used by instructions in the fallthrough block.
void addSchedBarrierDeps();
/// schedule - Order nodes according to selected style, filling
/// in the Sequence member.
///
/// Typically, a scheduling algorithm will implement schedule() without
/// overriding enterRegion() or exitRegion().
virtual void schedule() = 0;
/// finalizeSchedule - Allow targets to perform final scheduling actions at
/// the level of the whole MachineFunction. By default does nothing.
virtual void finalizeSchedule() {}
void dumpNode(const SUnit *SU) const override;
/// Return a label for a DAG node that points to an instruction.
std::string getGraphNodeLabel(const SUnit *SU) const override;
/// Return a label for the region of code covered by the DAG.
std::string getDAGName() const override;
/// \brief Fix register kill flags that scheduling has made invalid.
void fixupKills(MachineBasicBlock *MBB);
protected:
void initSUnits();
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
/// \brief PostRA helper for rewriting kill flags.
void startBlockForKills(MachineBasicBlock *BB);
/// \brief Toggle a register operand kill flag.
///
/// Other adjustments may be made to the instruction if necessary. Return
/// true if the operand has been deleted, false if not.
bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
};
/// newSUnit - Creates a new SUnit and return a ptr to it.
inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
#ifndef NDEBUG
const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
#endif
SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
assert((Addr == nullptr || Addr == &SUnits[0]) &&
"SUnits std::vector reallocated on the fly!");
SUnits.back().OrigNode = &SUnits.back();
return &SUnits.back();
}
/// getSUnit - Return an existing SUnit for this MI, or NULL.
inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
if (I == MISUnitMap.end())
return nullptr;
return I->second;
}
} // namespace llvm
#endif
Index: llvm/trunk/lib/Target/R600/R600Packetizer.cpp
===================================================================
--- llvm/trunk/lib/Target/R600/R600Packetizer.cpp (revision 216123)
+++ llvm/trunk/lib/Target/R600/R600Packetizer.cpp (revision 216124)
@@ -1,410 +1,408 @@
//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass implements instructions packetization for R600. It unsets isLast
/// bit of instructions inside a bundle and substitutes src register with
/// PreviousVector when applicable.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "packets"
namespace {
class R600Packetizer : public MachineFunctionPass {
public:
static char ID;
R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
const char *getPassName() const override {
return "R600 Packetizer";
}
bool runOnMachineFunction(MachineFunction &Fn) override;
};
char R600Packetizer::ID = 0;
class R600PacketizerList : public VLIWPacketizerList {
private:
const R600InstrInfo *TII;
const R600RegisterInfo &TRI;
bool VLIW5;
bool ConsideredInstUsesAlreadyWrittenVectorElement;
unsigned getSlot(const MachineInstr *MI) const {
return TRI.getHWRegChan(MI->getOperand(0).getReg());
}
/// \returns register to PV chan mapping for bundle/single instructions that
/// immediately precedes I.
DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
const {
DenseMap<unsigned, unsigned> Result;
I--;
if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
return Result;
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
if (I->isBundle())
BI++;
int LastDstChan = -1;
do {
bool isTrans = false;
int BISlot = getSlot(BI);
if (LastDstChan >= BISlot)
isTrans = true;
LastDstChan = BISlot;
if (TII->isPredicated(BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
continue;
int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
if (DstIdx == -1) {
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
if (isTrans || TII->isTransOnly(BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
BI->getOpcode() == AMDGPU::DOT4_eg) {
Result[Dst] = AMDGPU::PV_X;
continue;
}
if (Dst == AMDGPU::OQAP) {
continue;
}
unsigned PVReg = 0;
switch (TRI.getHWRegChan(Dst)) {
case 0:
PVReg = AMDGPU::PV_X;
break;
case 1:
PVReg = AMDGPU::PV_Y;
break;
case 2:
PVReg = AMDGPU::PV_Z;
break;
case 3:
PVReg = AMDGPU::PV_W;
break;
default:
llvm_unreachable("Invalid Chan");
}
Result[Dst] = PVReg;
} while ((++BI)->isBundledWithPred());
return Result;
}
void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
const {
unsigned Ops[] = {
AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
AMDGPU::OpName::src2
};
for (unsigned i = 0; i < 3; i++) {
int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
if (OperandIdx < 0)
continue;
unsigned Src = MI->getOperand(OperandIdx).getReg();
const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
if (It != PVs.end())
MI->getOperand(OperandIdx).setReg(It->second);
}
}
public:
// Ctor.
- R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT)
- : VLIWPacketizerList(MF, MLI, MDT, true),
+ R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
+ : VLIWPacketizerList(MF, MLI, true),
TII(static_cast<const R600InstrInfo *>(
MF.getSubtarget().getInstrInfo())),
TRI(TII->getRegisterInfo()) {
VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
}
// initPacketizerState - initialize some internal flags.
void initPacketizerState() override {
ConsideredInstUsesAlreadyWrittenVectorElement = false;
}
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool ignorePseudoInstruction(MachineInstr *MI,
MachineBasicBlock *MBB) override {
return false;
}
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
bool isSoloInstruction(MachineInstr *MI) override {
if (TII->isVector(*MI))
return true;
if (!TII->isALUInstr(MI->getOpcode()))
return true;
if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
return true;
// XXX: This can be removed once the packetizer properly handles all the
// LDS instruction group restrictions.
if (TII->isLDSInstr(MI->getOpcode()))
return true;
return false;
}
// isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
// together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
if (getSlot(MII) == getSlot(MIJ))
ConsideredInstUsesAlreadyWrittenVectorElement = true;
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
if (PredI != PredJ)
return false;
if (SUJ->isSucc(SUI)) {
for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
const SDep &Dep = SUJ->Succs[i];
if (Dep.getSUnit() != SUI)
continue;
if (Dep.getKind() == SDep::Anti)
continue;
if (Dep.getKind() == SDep::Output)
if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
continue;
return false;
}
}
bool ARDef = TII->definesAddressRegister(MII) ||
TII->definesAddressRegister(MIJ);
bool ARUse = TII->usesAddressRegister(MII) ||
TII->usesAddressRegister(MIJ);
if (ARDef && ARUse)
return false;
return true;
}
// isLegalToPruneDependencies - Is it legal to prune dependece between SUI
// and SUJ.
bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
return false;
}
void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
MI->getOperand(LastOp).setImm(Bit);
}
bool isBundlableWithCurrentPMI(MachineInstr *MI,
const DenseMap<unsigned, unsigned> &PV,
std::vector<R600InstrInfo::BankSwizzle> &BS,
bool &isTransSlot) {
isTransSlot = TII->isTransOnly(MI);
assert (!isTransSlot || VLIW5);
// Is the dst reg sequence legal ?
if (!isTransSlot && !CurrentPacketMIs.empty()) {
if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
if (ConsideredInstUsesAlreadyWrittenVectorElement &&
!TII->isVectorOnly(MI) && VLIW5) {
isTransSlot = true;
DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
}
else
return false;
}
}
// Are the Constants limitations met ?
CurrentPacketMIs.push_back(MI);
if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
DEBUG(
dbgs() << "Couldn't pack :\n";
MI->dump();
dbgs() << "with the following packets :\n";
for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
CurrentPacketMIs[i]->dump();
dbgs() << "\n";
}
dbgs() << "because of Consts read limitations\n";
);
CurrentPacketMIs.pop_back();
return false;
}
// Is there a BankSwizzle set that meet Read Port limitations ?
if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
PV, BS, isTransSlot)) {
DEBUG(
dbgs() << "Couldn't pack :\n";
MI->dump();
dbgs() << "with the following packets :\n";
for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
CurrentPacketMIs[i]->dump();
dbgs() << "\n";
}
dbgs() << "because of Read port limitations\n";
);
CurrentPacketMIs.pop_back();
return false;
}
// We cannot read LDS source registrs from the Trans slot.
if (isTransSlot && TII->readsLDSSrcReg(MI))
return false;
CurrentPacketMIs.pop_back();
return true;
}
MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override {
MachineBasicBlock::iterator FirstInBundle =
CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
const DenseMap<unsigned, unsigned> &PV =
getPreviousVector(FirstInBundle);
std::vector<R600InstrInfo::BankSwizzle> BS;
bool isTransSlot;
if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
MachineInstr *MI = CurrentPacketMIs[i];
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
AMDGPU::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS[i]);
}
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
AMDGPU::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS.back());
if (!CurrentPacketMIs.empty())
setIsLastBit(CurrentPacketMIs.back(), 0);
substitutePV(MI, PV);
MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
if (isTransSlot) {
endPacket(std::next(It)->getParent(), std::next(It));
}
return It;
}
endPacket(MI->getParent(), MI);
if (TII->isTransOnly(MI))
return MI;
return VLIWPacketizerList::addToPacket(MI);
}
};
bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
// Instantiate the packetizer.
- R600PacketizerList Packetizer(Fn, MLI, MDT);
+ R600PacketizerList Packetizer(Fn, MLI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
//
// Loop over all basic blocks and remove KILL pseudo-instructions
// These instructions confuse the dependence analysis. Consider:
// D0 = ... (Insn 0)
// R0 = KILL R0, D0 (Insn 1)
// R0 = ... (Insn 2)
// Here, Insn 1 will result in the dependence graph not emitting an output
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
//
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
MBB->erase(DeleteMI);
End = MBB->end();
continue;
}
++MI;
}
}
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
// Find scheduling regions and schedule / packetize each region.
unsigned RemainingCount = MBB->size();
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin();) {
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingCount) {
if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
break;
}
I = MBB->begin();
// Skip empty scheduling regions.
if (I == RegionEnd) {
RegionEnd = std::prev(RegionEnd);
--RemainingCount;
continue;
}
// Skip regions with one instruction.
if (I == std::prev(RegionEnd)) {
RegionEnd = std::prev(RegionEnd);
continue;
}
Packetizer.PacketizeMIs(MBB, I, RegionEnd);
RegionEnd = I;
}
}
return true;
}
} // end anonymous namespace
llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
return new R600Packetizer(tm);
}
Index: llvm/trunk/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
===================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp (revision 216123)
+++ llvm/trunk/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp (revision 216124)
@@ -1,1428 +1,1426 @@
//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements a simple VLIW packetizer using DFA. The packetizer works on
// machine basic blocks. For each instruction I in BB, the packetizer consults
// the DFA to see if machine resources are available to execute I. If so, the
// packetizer checks if I depends on any instruction J in the current packet.
// If no dependency is found, I is added to current packet and machine resource
// is marked as taken. If any dependency is found, a target API call is made to
// prune the dependence.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DFAPacketizer.h"
#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <map>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "packets"
static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
cl::ZeroOrMore, cl::Hidden, cl::init(true),
cl::desc("Allow non-solo packetization of volatile memory references"));
namespace llvm {
void initializeHexagonPacketizerPass(PassRegistry&);
}
namespace {
class HexagonPacketizer : public MachineFunctionPass {
public:
static char ID;
HexagonPacketizer() : MachineFunctionPass(ID) {
initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
const char *getPassName() const override {
return "Hexagon Packetizer";
}
bool runOnMachineFunction(MachineFunction &Fn) override;
};
char HexagonPacketizer::ID = 0;
class HexagonPacketizerList : public VLIWPacketizerList {
private:
// Has the instruction been promoted to a dot-new instruction.
bool PromotedToDotNew;
// Has the instruction been glued to allocframe.
bool GlueAllocframeStore;
// Has the feeder instruction been glued to new value jump.
bool GlueToNewValueJump;
// Check if there is a dependence between some instruction already in this
// packet and this instruction.
bool Dependence;
// Only check for dependence if there are resources available to
// schedule this instruction.
bool FoundSequentialDependence;
/// \brief A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
// Track MIs with ignored dependece.
std::vector<MachineInstr*> IgnoreDepMIs;
public:
// Ctor.
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT,
const MachineBranchProbabilityInfo *MBPI);
// initPacketizerState - initialize some internal flags.
void initPacketizerState() override;
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool ignorePseudoInstruction(MachineInstr *MI,
MachineBasicBlock *MBB) override;
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
bool isSoloInstruction(MachineInstr *MI) override;
// isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
// together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
// isLegalToPruneDependencies - Is it legal to prune dependece between SUI
// and SUJ.
bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
private:
bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC);
bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC);
bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit,
MachineBasicBlock::iterator &MII);
bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit);
bool DemoteToDotOld(MachineInstr* MI);
bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
std::map <MachineInstr*, SUnit*> MIToSUnit);
bool RestrictingDepExistInPacket(MachineInstr*,
unsigned, std::map <MachineInstr*, SUnit*>);
bool isNewifiable(MachineInstr* MI);
bool isCondInst(MachineInstr* MI);
bool tryAllocateResourcesForConstExt(MachineInstr* MI);
bool canReserveResourcesForConstExt(MachineInstr *MI);
void reserveResourcesForConstExt(MachineInstr* MI);
bool isNewValueInst(MachineInstr* MI);
};
}
INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
false, false)
// HexagonPacketizerList Ctor.
HexagonPacketizerList::HexagonPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, MDT, true){
+ MachineFunction &MF, MachineLoopInfo &MLI,
+ const MachineBranchProbabilityInfo *MBPI)
+ : VLIWPacketizerList(MF, MLI, true) {
this->MBPI = MBPI;
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
const MachineBranchProbabilityInfo *MBPI =
&getAnalysis<MachineBranchProbabilityInfo>();
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI);
+ HexagonPacketizerList Packetizer(Fn, MLI, MBPI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
//
// Loop over all basic blocks and remove KILL pseudo-instructions
// These instructions confuse the dependence analysis. Consider:
// D0 = ... (Insn 0)
// R0 = KILL R0, D0 (Insn 1)
// R0 = ... (Insn 2)
// Here, Insn 1 will result in the dependence graph not emitting an output
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
//
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
if (MI->isKill()) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
MBB->erase(DeleteMI);
End = MBB->end();
continue;
}
++MI;
}
}
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
// Find scheduling regions and schedule / packetize each region.
unsigned RemainingCount = MBB->size();
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin();) {
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingCount) {
if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
break;
}
I = MBB->begin();
// Skip empty scheduling regions.
if (I == RegionEnd) {
RegionEnd = std::prev(RegionEnd);
--RemainingCount;
continue;
}
// Skip regions with one instruction.
if (I == std::prev(RegionEnd)) {
RegionEnd = std::prev(RegionEnd);
continue;
}
Packetizer.PacketizeMIs(MBB, I, RegionEnd);
RegionEnd = I;
}
}
return true;
}
static bool IsIndirectCall(MachineInstr* MI) {
return ((MI->getOpcode() == Hexagon::CALLR) ||
(MI->getOpcode() == Hexagon::CALLRv3));
}
// Reserve resources for constant extender. Trigure an assertion if
// reservation fail.
void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
MachineFunction *MF = MI->getParent()->getParent();
MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
} else {
MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
llvm_unreachable("can not reserve resources for constant extender.");
}
return;
}
bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
"Should only be called for constant extended instructions");
MachineFunction *MF = MI->getParent()->getParent();
MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
MI->getDebugLoc());
bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
MF->DeleteMachineInstr(PseudoMI);
return CanReserve;
}
// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
// true, otherwise, return false.
bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
MachineFunction *MF = MI->getParent()->getParent();
MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
return true;
} else {
MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
return false;
}
}
bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
SDep::Kind DepType,
unsigned DepReg) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
const HexagonRegisterInfo *QRI =
(const HexagonRegisterInfo *)TM.getSubtargetImpl()->getRegisterInfo();
// Check for lr dependence
if (DepReg == QRI->getRARegister()) {
return true;
}
if (QII->isDeallocRet(MI)) {
if (DepReg == QRI->getFrameRegister() ||
DepReg == QRI->getStackRegister())
return true;
}
// Check if this is a predicate dependence
const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
if (RC == &Hexagon::PredRegsRegClass) {
return true;
}
//
// Lastly check for an operand used in an indirect call
// If we had an attribute for checking if an instruction is an indirect call,
// then we could have avoided this relatively brittle implementation of
// IsIndirectCall()
//
// Assumes that the first operand of the CALLr is the function address
//
if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
MachineOperand MO = MI->getOperand(0);
if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
return true;
}
}
return false;
}
static bool IsRegDependence(const SDep::Kind DepType) {
return (DepType == SDep::Data || DepType == SDep::Anti ||
DepType == SDep::Output);
}
static bool IsDirectJump(MachineInstr* MI) {
return (MI->getOpcode() == Hexagon::JMP);
}
static bool IsSchedBarrier(MachineInstr* MI) {
switch (MI->getOpcode()) {
case Hexagon::BARRIER:
return true;
}
return false;
}
static bool IsControlFlow(MachineInstr* MI) {
return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
}
static bool IsLoopN(MachineInstr *MI) {
return (MI->getOpcode() == Hexagon::LOOP0_i ||
MI->getOpcode() == Hexagon::LOOP0_r);
}
/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
/// callee-saved register.
static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
unsigned CalleeSavedReg = *CSR;
if (MI->modifiesRegister(CalleeSavedReg, TRI))
return true;
}
return false;
}
// Returns true if an instruction can be promoted to .new predicate
// or new-value store.
bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
if ( isCondInst(MI) || QII->mayBeNewStore(MI))
return true;
else
return false;
}
bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
const MCInstrDesc& TID = MI->getDesc();
// bug 5670: until that is fixed,
// this portion is disabled.
if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) ||
|| QII->isConditionalTransfer(MI)
|| QII->isConditionalALU32(MI)
|| QII->isConditionalLoad(MI)
|| QII->isConditionalStore(MI)) {
return true;
}
return false;
}
// Promote an instructiont to its .new form.
// At this time, we have already made a call to CanPromoteToDotNew
// and made sure that it can *indeed* be promoted.
bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
SDep::Kind DepType, MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC) {
assert (DepType == SDep::Data);
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
int NewOpcode;
if (RC == &Hexagon::PredRegsRegClass)
NewOpcode = QII->GetDotNewPredOp(MI, MBPI);
else
NewOpcode = QII->GetDotNewOp(MI);
MI->setDesc(QII->get(NewOpcode));
return true;
}
bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
int NewOpcode = QII->GetDotOldOp(MI->getOpcode());
MI->setDesc(QII->get(NewOpcode));
return true;
}
enum PredicateKind {
PK_False,
PK_True,
PK_Unknown
};
/// Returns true if an instruction is predicated on p0 and false if it's
/// predicated on !p0.
static PredicateKind getPredicateSense(MachineInstr* MI,
const HexagonInstrInfo *QII) {
if (!QII->isPredicated(MI))
return PK_Unknown;
if (QII->isPredicatedTrue(MI))
return PK_True;
return PK_False;
}
static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
const HexagonInstrInfo *QII) {
assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
#ifndef NDEBUG
// Post Increment means duplicates. Use dense map to find duplicates in the
// list. Caution: Densemap initializes with the minimum of 64 buckets,
// whereas there are at most 5 operands in the post increment.
DenseMap<unsigned, unsigned> DefRegsSet;
for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
if (MI->getOperand(opNum).isReg() &&
MI->getOperand(opNum).isDef()) {
DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
}
for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
if (MI->getOperand(opNum).isReg() &&
MI->getOperand(opNum).isUse()) {
if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
return MI->getOperand(opNum);
}
}
#else
if (MI->getDesc().mayLoad()) {
// The 2nd operand is always the post increment operand in load.
assert(MI->getOperand(1).isReg() &&
"Post increment operand has be to a register.");
return (MI->getOperand(1));
}
if (MI->getDesc().mayStore()) {
// The 1st operand is always the post increment operand in store.
assert(MI->getOperand(0).isReg() &&
"Post increment operand has be to a register.");
return (MI->getOperand(0));
}
#endif
// we should never come here.
llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
}
// get the value being stored
static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
// value being stored is always the last operand.
return (MI->getOperand(MI->getNumOperands()-1));
}
// can be new value store?
// Following restrictions are to be respected in convert a store into
// a new value store.
// 1. If an instruction uses auto-increment, its address register cannot
// be a new-value register. Arch Spec 5.4.2.1
// 2. If an instruction uses absolute-set addressing mode,
// its address register cannot be a new-value register.
// Arch Spec 5.4.2.1.TODO: This is not enabled as
// as absolute-set address mode patters are not implemented.
// 3. If an instruction produces a 64-bit result, its registers cannot be used
// as new-value registers. Arch Spec 5.4.2.2.
// 4. If the instruction that sets a new-value register is conditional, then
// the instruction that uses the new-value register must also be conditional,
// and both must always have their predicates evaluate identically.
// Arch Spec 5.4.2.3.
// 5. There is an implied restriction of a packet can not have another store,
// if there is a new value store in the packet. Corollary, if there is
// already a store in a packet, there can not be a new value store.
// Arch Spec: 3.4.4.2
bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
MachineInstr *PacketMI, unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Make sure we are looking at the store, that can be promoted.
if (!QII->mayBeNewStore(MI))
return false;
// Make sure there is dependency and can be new value'ed
if (GetStoreValueOperand(MI).isReg() &&
GetStoreValueOperand(MI).getReg() != DepReg)
return false;
const HexagonRegisterInfo *QRI =
(const HexagonRegisterInfo *)TM.getSubtargetImpl()->getRegisterInfo();
const MCInstrDesc& MCID = PacketMI->getDesc();
// first operand is always the result
const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF);
// if there is already an store in the packet, no can do new value store
// Arch Spec 3.4.4.2.
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end();
(VI != VE); ++VI) {
SUnit* PacketSU = MIToSUnit[*VI];
if (PacketSU->getInstr()->getDesc().mayStore() ||
// if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
// then we don't need this
PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME)
return false;
}
if (PacketRC == &Hexagon::DoubleRegsRegClass) {
// new value store constraint: double regs can not feed into new value store
// arch spec section: 5.4.2.2
return false;
}
// Make sure it's NOT the post increment register that we are going to
// new value.
if (QII->isPostIncrement(MI) &&
MI->getDesc().mayStore() &&
GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
return false;
}
if (QII->isPostIncrement(PacketMI) &&
PacketMI->getDesc().mayLoad() &&
GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
// if source is post_inc, or absolute-set addressing,
// it can not feed into new value store
// r3 = memw(r2++#4)
// memw(r30 + #-1404) = r2.new -> can not be new value store
// arch spec section: 5.4.2.1
return false;
}
// If the source that feeds the store is predicated, new value store must
// also be predicated.
if (QII->isPredicated(PacketMI)) {
if (!QII->isPredicated(MI))
return false;
// Check to make sure that they both will have their predicates
// evaluate identically
unsigned predRegNumSrc = 0;
unsigned predRegNumDst = 0;
const TargetRegisterClass* predRegClass = nullptr;
// Get predicate register used in the source instruction
for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
if ( PacketMI->getOperand(opNum).isReg())
predRegNumSrc = PacketMI->getOperand(opNum).getReg();
predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
if (predRegClass == &Hexagon::PredRegsRegClass) {
break;
}
}
assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
("predicate register not found in a predicated PacketMI instruction"));
// Get predicate register used in new-value store instruction
for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
if ( MI->getOperand(opNum).isReg())
predRegNumDst = MI->getOperand(opNum).getReg();
predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
if (predRegClass == &Hexagon::PredRegsRegClass) {
break;
}
}
assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
("predicate register not found in a predicated MI instruction"));
// New-value register producer and user (store) need to satisfy these
// constraints:
// 1) Both instructions should be predicated on the same register.
// 2) If producer of the new-value register is .new predicated then store
// should also be .new predicated and if producer is not .new predicated
// then store should not be .new predicated.
// 3) Both new-value register producer and user should have same predicate
// sense, i.e, either both should be negated or both should be none negated.
if (( predRegNumDst != predRegNumSrc) ||
QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) ||
getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) {
return false;
}
}
// Make sure that other than the new-value register no other store instruction
// register has been modified in the same packet. Predicate registers can be
// modified by they should not be modified between the producer and the store
// instruction as it will make them both conditional on different values.
// We already know this to be true for all the instructions before and
// including PacketMI. Howerver, we need to perform the check for the
// remaining instructions in the packet.
std::vector<MachineInstr*>::iterator VI;
std::vector<MachineInstr*>::iterator VE;
unsigned StartCheck = 0;
for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
(VI != VE); ++VI) {
SUnit* TempSU = MIToSUnit[*VI];
MachineInstr* TempMI = TempSU->getInstr();
// Following condition is true for all the instructions until PacketMI is
// reached (StartCheck is set to 0 before the for loop).
// StartCheck flag is 1 for all the instructions after PacketMI.
if (TempMI != PacketMI && !StartCheck) // start processing only after
continue; // encountering PacketMI
StartCheck = 1;
if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
continue;
for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
if (MI->getOperand(opNum).isReg() &&
TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(),
QRI))
return false;
}
}
// Make sure that for non-POST_INC stores:
// 1. The only use of reg is DepReg and no other registers.
// This handles V4 base+index registers.
// The following store can not be dot new.
// Eg. r0 = add(r0, #3)a
// memw(r1+r0<<#2) = r0
if (!QII->isPostIncrement(MI) &&
GetStoreValueOperand(MI).isReg() &&
GetStoreValueOperand(MI).getReg() == DepReg) {
for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
if (MI->getOperand(opNum).isReg() &&
MI->getOperand(opNum).getReg() == DepReg) {
return false;
}
}
// 2. If data definition is because of implicit definition of the register,
// do not newify the store. Eg.
// %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
// STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
if (PacketMI->getOperand(opNum).isReg() &&
PacketMI->getOperand(opNum).getReg() == DepReg &&
PacketMI->getOperand(opNum).isDef() &&
PacketMI->getOperand(opNum).isImplicit()) {
return false;
}
}
}
// Can be dot new store.
return true;
}
// can this MI to promoted to either
// new value store or new value jump
bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
SUnit *PacketSU, unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit,
MachineBasicBlock::iterator &MII)
{
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
const HexagonRegisterInfo *QRI =
(const HexagonRegisterInfo *)TM.getSubtargetImpl()->getRegisterInfo();
if (!QRI->Subtarget.hasV4TOps() ||
!QII->mayBeNewStore(MI))
return false;
MachineInstr *PacketMI = PacketSU->getInstr();
// Check to see the store can be new value'ed.
if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
return true;
// Check to see the compare/jump can be new value'ed.
// This is done as a pass on its own. Don't need to check it here.
return false;
}
// Check to see if an instruction can be dot new
// There are three kinds.
// 1. dot new on predicate - V2/V3/V4
// 2. dot new on stores NV/ST - V4
// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
SUnit *PacketSU, unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC )
{
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Already a dot new instruction.
if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI))
return false;
if (!isNewifiable(MI))
return false;
// predicate .new
if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI))
return true;
else if (RC != &Hexagon::PredRegsRegClass &&
!QII->mayBeNewStore(MI)) // MI is not a new-value store
return false;
else {
// Create a dot new machine instruction to see if resources can be
// allocated. If not, bail out now.
int NewOpcode = QII->GetDotNewOp(MI);
const MCInstrDesc &desc = QII->get(NewOpcode);
DebugLoc dl;
MachineInstr *NewMI =
MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
if (!ResourcesAvailable)
return false;
// new value store only
// new new value jump generated as a passes
if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
return false;
}
}
return true;
}
// Go through the packet instructions and search for anti dependency
// between them and DepReg from MI
// Consider this case:
// Trying to add
// a) %R1<def> = TFRI_cdNotPt %P3, 2
// to this packet:
// {
// b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
// c) %P3<def> = TFR_PdRs %R23
// d) %R1<def> = TFRI_cdnPt %P3, 4
// }
// The P3 from a) and d) will be complements after
// a)'s P3 is converted to .new form
// Anti Dep between c) and b) is irrelevant for this case
bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
unsigned DepReg,
std::map <MachineInstr*, SUnit*> MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
SUnit* PacketSUDep = MIToSUnit[MI];
for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
// We only care for dependencies to predicated instructions
if(!QII->isPredicated(*VIN)) continue;
// Scheduling Unit for current insn in the packet
SUnit* PacketSU = MIToSUnit[*VIN];
// Look at dependencies between current members of the packet
// and predicate defining instruction MI.
// Make sure that dependency is on the exact register
// we care about.
if (PacketSU->isSucc(PacketSUDep)) {
for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
(PacketSU->Succs[i].getKind() == SDep::Anti) &&
(PacketSU->Succs[i].getReg() == DepReg)) {
return true;
}
}
}
}
return false;
}
/// Gets the predicate register of a predicated instruction.
static unsigned getPredicatedRegister(MachineInstr *MI,
const HexagonInstrInfo *QII) {
/// We use the following rule: The first predicate register that is a use is
/// the predicate register of a predicated instruction.
assert(QII->isPredicated(MI) && "Must be predicated instruction");
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
MachineOperand &Op = *OI;
if (Op.isReg() && Op.getReg() && Op.isUse() &&
Hexagon::PredRegsRegClass.contains(Op.getReg()))
return Op.getReg();
}
llvm_unreachable("Unknown instruction operand layout");
return 0;
}
// Given two predicated instructions, this function detects whether
// the predicates are complements
bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// If we don't know the predicate sense of the instructions bail out early, we
// need it later.
if (getPredicateSense(MI1, QII) == PK_Unknown ||
getPredicateSense(MI2, QII) == PK_Unknown)
return false;
// Scheduling unit for candidate
SUnit* SU = MIToSUnit[MI1];
// One corner case deals with the following scenario:
// Trying to add
// a) %R24<def> = TFR_cPt %P0, %R25
// to this packet:
//
// {
// b) %R25<def> = TFR_cNotPt %P0, %R24
// c) %P0<def> = CMPEQri %R26, 1
// }
//
// On general check a) and b) are complements, but
// presence of c) will convert a) to .new form, and
// then it is not a complement
// We attempt to detect it by analyzing existing
// dependencies in the packet
// Analyze relationships between all existing members of the packet.
// Look for Anti dependecy on the same predicate reg
// as used in the candidate
for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
// Scheduling Unit for current insn in the packet
SUnit* PacketSU = MIToSUnit[*VIN];
// If this instruction in the packet is succeeded by the candidate...
if (PacketSU->isSucc(SU)) {
for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
// The corner case exist when there is true data
// dependency between candidate and one of current
// packet members, this dep is on predicate reg, and
// there already exist anti dep on the same pred in
// the packet.
if (PacketSU->Succs[i].getSUnit() == SU &&
PacketSU->Succs[i].getKind() == SDep::Data &&
Hexagon::PredRegsRegClass.contains(
PacketSU->Succs[i].getReg()) &&
// Here I know that *VIN is predicate setting instruction
// with true data dep to candidate on the register
// we care about - c) in the above example.
// Now I need to see if there is an anti dependency
// from c) to any other instruction in the
// same packet on the pred reg of interest
RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
MIToSUnit)) {
return false;
}
}
}
}
// If the above case does not apply, check regular
// complement condition.
// Check that the predicate register is the same and
// that the predicate sense is different
// We also need to differentiate .old vs. .new:
// !p0 is not complimentary to p0.new
unsigned PReg1 = getPredicatedRegister(MI1, QII);
unsigned PReg2 = getPredicatedRegister(MI2, QII);
return ((PReg1 == PReg2) &&
Hexagon::PredRegsRegClass.contains(PReg1) &&
Hexagon::PredRegsRegClass.contains(PReg2) &&
(getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) &&
(QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
}
// initPacketizerState - Initialize packetizer flags
void HexagonPacketizerList::initPacketizerState() {
Dependence = false;
PromotedToDotNew = false;
GlueToNewValueJump = false;
GlueAllocframeStore = false;
FoundSequentialDependence = false;
return;
}
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
MachineBasicBlock *MBB) {
if (MI->isDebugValue())
return true;
// We must print out inline assembly
if (MI->isInlineAsm())
return false;
// We check if MI has any functional units mapped to it.
// If it doesn't, we ignore the instruction.
const MCInstrDesc& TID = MI->getDesc();
unsigned SchedClass = TID.getSchedClass();
const InstrStage* IS =
ResourceTracker->getInstrItins()->beginStage(SchedClass);
unsigned FuncUnits = IS->getUnits();
return !FuncUnits;
}
// isSoloInstruction: - Returns true for instructions that must be
// scheduled in their own packet.
bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
if (MI->isInlineAsm())
return true;
if (MI->isEHLabel())
return true;
// From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
// trap, pause, barrier, icinva, isync, and syncht are solo instructions.
// They must not be grouped with other instructions in a packet.
if (IsSchedBarrier(MI))
return true;
return false;
}
// isLegalToPacketizeTogether:
// SUI is the current instruction that is out side of the current packet.
// SUJ is the current instruction inside the current packet against which that
// SUI will be packetized.
bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineInstr *I = SUI->getInstr();
MachineInstr *J = SUJ->getInstr();
assert(I && J && "Unable to packetize null instruction!");
const MCInstrDesc &MCIDI = I->getDesc();
const MCInstrDesc &MCIDJ = J->getDesc();
MachineBasicBlock::iterator II = I;
const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
const HexagonRegisterInfo *QRI =
(const HexagonRegisterInfo *)TM.getSubtargetImpl()->getRegisterInfo();
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Inline asm cannot go in the packet.
if (I->getOpcode() == Hexagon::INLINEASM)
llvm_unreachable("Should not meet inline asm here!");
if (isSoloInstruction(I))
llvm_unreachable("Should not meet solo instr here!");
// A save callee-save register function call can only be in a packet
// with instructions that don't write to the callee-save registers.
if ((QII->isSaveCalleeSavedRegsCall(I) &&
DoesModifyCalleeSavedReg(J, QRI)) ||
(QII->isSaveCalleeSavedRegsCall(J) &&
DoesModifyCalleeSavedReg(I, QRI))) {
Dependence = true;
return false;
}
// Two control flow instructions cannot go in the same packet.
if (IsControlFlow(I) && IsControlFlow(J)) {
Dependence = true;
return false;
}
// A LoopN instruction cannot appear in the same packet as a jump or call.
if (IsLoopN(I) &&
(IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) {
Dependence = true;
return false;
}
if (IsLoopN(J) &&
(IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) {
Dependence = true;
return false;
}
// dealloc_return cannot appear in the same packet as a conditional or
// unconditional jump.
if (QII->isDeallocRet(I) &&
(MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) {
Dependence = true;
return false;
}
// V4 allows dual store. But does not allow second store, if the
// first store is not in SLOT0. New value store, new value jump,
// dealloc_return and memop always take SLOT0.
// Arch spec 3.4.4.2
if (QRI->Subtarget.hasV4TOps()) {
if (MCIDI.mayStore() && MCIDJ.mayStore() &&
(QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
Dependence = true;
return false;
}
if ((QII->isMemOp(J) && MCIDI.mayStore())
|| (MCIDJ.mayStore() && QII->isMemOp(I))
|| (QII->isMemOp(J) && QII->isMemOp(I))) {
Dependence = true;
return false;
}
//if dealloc_return
if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
Dependence = true;
return false;
}
// If an instruction feeds new value jump, glue it.
MachineBasicBlock::iterator NextMII = I;
++NextMII;
if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
MachineInstr *NextMI = NextMII;
bool secondRegMatch = false;
bool maintainNewValueJump = false;
if (NextMI->getOperand(1).isReg() &&
I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
secondRegMatch = true;
maintainNewValueJump = true;
}
if (!secondRegMatch &&
I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
maintainNewValueJump = true;
}
for (std::vector<MachineInstr*>::iterator
VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end();
(VI != VE && maintainNewValueJump); ++VI) {
SUnit* PacketSU = MIToSUnit[*VI];
// NVJ can not be part of the dual jump - Arch Spec: section 7.8
if (PacketSU->getInstr()->getDesc().isCall()) {
Dependence = true;
break;
}
// Validate
// 1. Packet does not have a store in it.
// 2. If the first operand of the nvj is newified, and the second
// operand is also a reg, it (second reg) is not defined in
// the same packet.
// 3. If the second operand of the nvj is newified, (which means
// first operand is also a reg), first reg is not defined in
// the same packet.
if (PacketSU->getInstr()->getDesc().mayStore() ||
PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
// Check #2.
(!secondRegMatch && NextMI->getOperand(1).isReg() &&
PacketSU->getInstr()->modifiesRegister(
NextMI->getOperand(1).getReg(), QRI)) ||
// Check #3.
(secondRegMatch &&
PacketSU->getInstr()->modifiesRegister(
NextMI->getOperand(0).getReg(), QRI))) {
Dependence = true;
break;
}
}
if (!Dependence)
GlueToNewValueJump = true;
else
return false;
}
}
if (SUJ->isSucc(SUI)) {
for (unsigned i = 0;
(i < SUJ->Succs.size()) && !FoundSequentialDependence;
++i) {
if (SUJ->Succs[i].getSUnit() != SUI) {
continue;
}
SDep::Kind DepType = SUJ->Succs[i].getKind();
// For direct calls:
// Ignore register dependences for call instructions for
// packetization purposes except for those due to r31 and
// predicate registers.
//
// For indirect calls:
// Same as direct calls + check for true dependences to the register
// used in the indirect call.
//
// We completely ignore Order dependences for call instructions
//
// For returns:
// Ignore register dependences for return instructions like jumpr,
// dealloc return unless we have dependencies on the explicit uses
// of the registers used by jumpr (like r31) or dealloc return
// (like r29 or r30).
//
// TODO: Currently, jumpr is handling only return of r31. So, the
// following logic (specificaly IsCallDependent) is working fine.
// We need to enable jumpr for register other than r31 and then,
// we need to rework the last part, where it handles indirect call
// of that (IsCallDependent) function. Bug 6216 is opened for this.
//
unsigned DepReg = 0;
const TargetRegisterClass* RC = nullptr;
if (DepType == SDep::Data) {
DepReg = SUJ->Succs[i].getReg();
RC = QRI->getMinimalPhysRegClass(DepReg);
}
if ((MCIDI.isCall() || MCIDI.isReturn()) &&
(!IsRegDependence(DepType) ||
!IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
/* do nothing */
}
// For instructions that can be promoted to dot-new, try to promote.
else if ((DepType == SDep::Data) &&
CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
PromoteToDotNew(I, DepType, II, RC)) {
PromotedToDotNew = true;
/* do nothing */
}
else if ((DepType == SDep::Data) &&
(QII->isNewValueJump(I))) {
/* do nothing */
}
// For predicated instructions, if the predicates are complements
// then there can be no dependence.
else if (QII->isPredicated(I) &&
QII->isPredicated(J) &&
ArePredicatesComplements(I, J, MIToSUnit)) {
/* do nothing */
}
else if (IsDirectJump(I) &&
!MCIDJ.isBranch() &&
!MCIDJ.isCall() &&
(DepType == SDep::Order)) {
// Ignore Order dependences between unconditional direct branches
// and non-control-flow instructions
/* do nothing */
}
else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
(DepType != SDep::Output)) {
// Ignore all dependences for jumps except for true and output
// dependences
/* do nothing */
}
// Ignore output dependences due to superregs. We can
// write to two different subregisters of R1:0 for instance
// in the same cycle
//
//
// Let the
// If neither I nor J defines DepReg, then this is a
// superfluous output dependence. The dependence must be of the
// form:
// R0 = ...
// R1 = ...
// and there is an output dependence between the two instructions
// with
// DepReg = D0
// We want to ignore these dependences.
// Ideally, the dependence constructor should annotate such
// dependences. We can then avoid this relatively expensive check.
//
else if (DepType == SDep::Output) {
// DepReg is the register that's responsible for the dependence.
unsigned DepReg = SUJ->Succs[i].getReg();
// Check if I and J really defines DepReg.
if (I->definesRegister(DepReg) ||
J->definesRegister(DepReg)) {
FoundSequentialDependence = true;
break;
}
}
// We ignore Order dependences for
// 1. Two loads unless they are volatile.
// 2. Two stores in V4 unless they are volatile.
else if ((DepType == SDep::Order) &&
!I->hasOrderedMemoryRef() &&
!J->hasOrderedMemoryRef()) {
if (QRI->Subtarget.hasV4TOps() &&
// hexagonv4 allows dual store.
MCIDI.mayStore() && MCIDJ.mayStore()) {
/* do nothing */
}
// store followed by store-- not OK on V2
// store followed by load -- not OK on all (OK if addresses
// are not aliased)
// load followed by store -- OK on all
// load followed by load -- OK on all
else if ( !MCIDJ.mayStore()) {
/* do nothing */
}
else {
FoundSequentialDependence = true;
break;
}
}
// For V4, special case ALLOCFRAME. Even though there is dependency
// between ALLOCAFRAME and subsequent store, allow it to be
// packetized in a same packet. This implies that the store is using
// caller's SP. Hense, offset needs to be updated accordingly.
else if (DepType == SDep::Data
&& QRI->Subtarget.hasV4TOps()
&& J->getOpcode() == Hexagon::ALLOCFRAME
&& (I->getOpcode() == Hexagon::STrid
|| I->getOpcode() == Hexagon::STriw
|| I->getOpcode() == Hexagon::STrib)
&& I->getOperand(0).getReg() == QRI->getStackRegister()
&& QII->isValidOffset(I->getOpcode(),
I->getOperand(1).getImm() -
(FrameSize + HEXAGON_LRFP_SIZE)))
{
GlueAllocframeStore = true;
// Since this store is to be glued with allocframe in the same
// packet, it will use SP of the previous stack frame, i.e
// caller's SP. Therefore, we need to recalculate offset according
// to this change.
I->getOperand(1).setImm(I->getOperand(1).getImm() -
(FrameSize + HEXAGON_LRFP_SIZE));
}
//
// Skip over anti-dependences. Two instructions that are
// anti-dependent can share a packet
//
else if (DepType != SDep::Anti) {
FoundSequentialDependence = true;
break;
}
}
if (FoundSequentialDependence) {
Dependence = true;
return false;
}
}
return true;
}
// isLegalToPruneDependencies
bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
MachineInstr *I = SUI->getInstr();
assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
if (Dependence) {
// Check if the instruction was promoted to a dot-new. If so, demote it
// back into a dot-old.
if (PromotedToDotNew) {
DemoteToDotOld(I);
}
// Check if the instruction (must be a store) was glued with an Allocframe
// instruction. If so, restore its offset to its original value, i.e. use
// curent SP instead of caller's SP.
if (GlueAllocframeStore) {
I->getOperand(1).setImm(I->getOperand(1).getImm() +
FrameSize + HEXAGON_LRFP_SIZE);
}
return false;
}
return true;
}
MachineBasicBlock::iterator
HexagonPacketizerList::addToPacket(MachineInstr *MI) {
MachineBasicBlock::iterator MII = MI;
MachineBasicBlock *MBB = MI->getParent();
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
if (GlueToNewValueJump) {
++MII;
MachineInstr *nvjMI = MII;
assert(ResourceTracker->canReserveResources(MI));
ResourceTracker->reserveResources(MI);
if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
!tryAllocateResourcesForConstExt(MI)) {
endPacket(MBB, MI);
ResourceTracker->reserveResources(MI);
assert(canReserveResourcesForConstExt(MI) &&
"Ensure that there is a slot");
reserveResourcesForConstExt(MI);
// Reserve resources for new value jump constant extender.
assert(canReserveResourcesForConstExt(MI) &&
"Ensure that there is a slot");
reserveResourcesForConstExt(nvjMI);
assert(ResourceTracker->canReserveResources(nvjMI) &&
"Ensure that there is a slot");
} else if ( // Extended instruction takes two slots in the packet.
// Try reserve and allocate 4-byte in the current packet first.
(QII->isExtended(nvjMI)
&& (!tryAllocateResourcesForConstExt(nvjMI)
|| !ResourceTracker->canReserveResources(nvjMI)))
|| // For non-extended instruction, no need to allocate extra 4 bytes.
(!QII->isExtended(nvjMI) &&
!ResourceTracker->canReserveResources(nvjMI)))
{
endPacket(MBB, MI);
// A new and empty packet starts.
// We are sure that the resources requirements can be satisfied.
// Therefore, do not need to call "canReserveResources" anymore.
ResourceTracker->reserveResources(MI);
if (QII->isExtended(nvjMI))
reserveResourcesForConstExt(nvjMI);
}
// Here, we are sure that "reserveResources" would succeed.
ResourceTracker->reserveResources(nvjMI);
CurrentPacketMIs.push_back(MI);
CurrentPacketMIs.push_back(nvjMI);
} else {
if ( (QII->isExtended(MI) || QII->isConstExtended(MI))
&& ( !tryAllocateResourcesForConstExt(MI)
|| !ResourceTracker->canReserveResources(MI)))
{
endPacket(MBB, MI);
// Check if the instruction was promoted to a dot-new. If so, demote it
// back into a dot-old
if (PromotedToDotNew) {
DemoteToDotOld(MI);
}
reserveResourcesForConstExt(MI);
}
// In case that "MI" is not an extended insn,
// the resource availability has already been checked.
ResourceTracker->reserveResources(MI);
CurrentPacketMIs.push_back(MI);
}
return MII;
}
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
FunctionPass *llvm::createHexagonPacketizer() {
return new HexagonPacketizer();
}
Index: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (revision 216123)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (revision 216124)
@@ -1,1533 +1,1532 @@
//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements the ScheduleDAGInstrs class, which implements re-scheduling
// of MachineInstrs.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
using namespace llvm;
#define DEBUG_TYPE "misched"
static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
cl::desc("Enable use of AA during MI GAD construction"));
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction"));
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
- const MachineDominatorTree *mdt,
bool IsPostRAFlag,
bool RemoveKillFlags,
LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
CanHandleTerminators(false), FirstDbgValue(nullptr) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
"Virtual registers must be removed prior to PostRA scheduling");
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
SchedModel.init(*ST.getSchedModel(), &ST, TII);
}
/// getUnderlyingObjectFromInt - This is the function that does the work of
/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObjectFromInt(const Value *V) {
do {
if (const Operator *U = dyn_cast<Operator>(V)) {
// If we find a ptrtoint, we can transfer control back to the
// regular getUnderlyingObjectFromInt.
if (U->getOpcode() == Instruction::PtrToInt)
return U->getOperand(0);
// If we find an add of a constant, a multiplied value, or a phi, it's
// likely that the other operand will lead us to the base
// object. We don't have to worry about the case where the
// object address is somehow being computed by the multiply,
// because our callers only care when the result is an
// identifiable object.
if (U->getOpcode() != Instruction::Add ||
(!isa<ConstantInt>(U->getOperand(1)) &&
Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
!isa<PHINode>(U->getOperand(1))))
return V;
V = U->getOperand(0);
} else {
return V;
}
assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
} while (1);
}
/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static void getUnderlyingObjects(const Value *V,
SmallVectorImpl<Value *> &Objects) {
SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 4> Working(1, V);
do {
V = Working.pop_back_val();
SmallVector<Value *, 4> Objs;
GetUnderlyingObjects(const_cast<Value *>(V), Objs);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
V = *I;
if (!Visited.insert(V))
continue;
if (Operator::getOpcode(V) == Instruction::IntToPtr) {
const Value *O =
getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
if (O->getType()->isPointerTy()) {
Working.push_back(O);
continue;
}
}
Objects.push_back(const_cast<Value *>(V));
}
} while (!Working.empty());
}
typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
UnderlyingObjectsVector;
/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
/// information and it can be tracked to a normal reference to a known
/// object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo *MFI,
UnderlyingObjectsVector &Objects) {
if (!MI->hasOneMemOperand() ||
(!(*MI->memoperands_begin())->getValue() &&
!(*MI->memoperands_begin())->getPseudoValue()) ||
(*MI->memoperands_begin())->isVolatile())
return;
if (const PseudoSourceValue *PSV =
(*MI->memoperands_begin())->getPseudoValue()) {
// For now, ignore PseudoSourceValues which may alias LLVM IR values
// because the code that uses this function has no way to cope with
// such aliases.
if (!PSV->isAliased(MFI)) {
bool MayAlias = PSV->mayAlias(MFI);
Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
}
return;
}
const Value *V = (*MI->memoperands_begin())->getValue();
if (!V)
return;
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(V, Objs);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
V = *I;
if (!isIdentifiedObject(V)) {
Objects.clear();
return;
}
Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
}
}
void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
BB = bb;
}
void ScheduleDAGInstrs::finishBlock() {
// Subclasses should no longer refer to the old block.
BB = nullptr;
}
/// Initialize the DAG and common scheduler state for the current scheduling
/// region. This does not actually create the DAG, only clears it. The
/// scheduling driver may call BuildSchedGraph multiple times per scheduling
/// region.
void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) {
assert(bb == BB && "startBlock should set BB");
RegionBegin = begin;
RegionEnd = end;
NumRegionInstrs = regioninstrs;
}
/// Close the current scheduling region. Don't clear any state in case the
/// driver wants to refer to the previous scheduling region.
void ScheduleDAGInstrs::exitRegion() {
// Nothing to do.
}
/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier by adding
/// the exit SU to the register defs and use list. This is because we want to
/// make sure instructions which define registers that are either used by
/// the terminator or are live-out are properly scheduled. This is
/// especially important when the definition latency of the return value(s)
/// are too high to be hidden by the branch or when the liveout registers
/// used by instructions in the fallthrough block.
void ScheduleDAGInstrs::addSchedBarrierDeps() {
MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
ExitSU.setInstr(ExitMI);
bool AllDepKnown = ExitMI &&
(ExitMI->isCall() || ExitMI->isBarrier());
if (ExitMI && AllDepKnown) {
// If it's a call or a barrier, add dependencies on the defs and uses of
// instruction.
for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = ExitMI->getOperand(i);
if (!MO.isReg() || MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (TRI->isPhysicalRegister(Reg))
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
else {
assert(!IsPostRA && "Virtual register encountered after regalloc.");
if (MO.readsReg()) // ignore undef operands
addVRegUseDeps(&ExitSU, i);
}
}
} else {
// For others, e.g. fallthrough, conditional branch, assume the exit
// uses all the registers that are livein to the successor blocks.
assert(Uses.empty() && "Uses in set before adding deps?");
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
if (!Uses.contains(Reg))
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
}
}
}
/// MO is an operand of SU's instruction that defines a physical register. Add
/// data dependencies from SU to any uses of the physical register.
void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
assert(MO.isDef() && "expect physreg def");
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
continue;
for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
SUnit *UseSU = I->SU;
if (UseSU == SU)
continue;
// Adjust the dependence latency using operand def/use information,
// then allow the target to perform its own adjustments.
int UseOp = I->OpIdx;
MachineInstr *RegUse = nullptr;
SDep Dep;
if (UseOp < 0)
Dep = SDep(SU, SDep::Artificial);
else {
// Set the hasPhysRegDefs only for physreg defs that have a use within
// the scheduling region.
SU->hasPhysRegDefs = true;
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
}
Dep.setLatency(
SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
UseOp));
ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
}
}
/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
/// this SUnit to following instructions in the same scheduling region that
/// depend the physical register referenced at OperIdx.
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
// Optionally add output and anti dependencies. For anti
// dependencies we use a latency of 0 because for a multi-issue
// target we want to allow the defining instruction to issue
// in the same cycle as the using instruction.
// TODO: Using a latency of 1 here for output dependencies assumes
// there's no cost for reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Defs.contains(*Alias))
continue;
for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
SUnit *DefSU = I->SU;
if (DefSU == &ExitSU)
continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(*Alias))) {
if (Kind == SDep::Anti)
DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
else {
SDep Dep(SU, Kind, /*Reg=*/*Alias);
Dep.setLatency(
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
DefSU->addPred(Dep);
}
}
}
}
if (!MO.isDef()) {
SU->hasPhysRegUses = true;
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
if (RemoveKillFlags)
MO.setIsKill(false);
}
else {
addPhysRegDataDeps(SU, OperIdx);
unsigned Reg = MO.getReg();
// clear this register's use list
if (Uses.contains(Reg))
Uses.eraseAll(Reg);
if (!MO.isDead()) {
Defs.eraseAll(Reg);
} else if (SU->isCall) {
// Calls will not be reordered because of chain dependencies (see
// below). Since call operands are dead, calls may continue to be added
// to the DefList making dependence checking quadratic in the size of
// the block. Instead, we leave only one call at the back of the
// DefList.
Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
Reg2SUnitsMap::iterator B = P.first;
Reg2SUnitsMap::iterator I = P.second;
for (bool isBegin = I == B; !isBegin; /* empty */) {
isBegin = (--I) == B;
if (!I->SU->isCall)
break;
I = Defs.erase(I);
}
}
// Defs are pushed in the order they are visited and never reordered.
Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
}
}
/// addVRegDefDeps - Add register output and data dependencies from this SUnit
/// to instructions that occur later in the same scheduling region if they read
/// from or write to the virtual register defined at OperIdx.
///
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
// Singly defined vregs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
// Check here if there are any others...
if (MRI.hasOneDef(Reg))
return;
// Add output dependence to the next nearest def of this vreg.
//
// Unless this definition is dead, the output dependence should be
// transitively redundant with antidependencies from this definition's
// uses. We're conservative for now until we have a way to guarantee the uses
// are not eliminated sometime during scheduling. The output dependence edge
// is also useful if output latency exceeds def-use latency.
VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI == VRegDefs.end())
VRegDefs.insert(VReg2SUnit(Reg, SU));
else {
SUnit *DefSU = DefI->SU;
if (DefSU != SU && DefSU != &ExitSU) {
SDep Dep(SU, SDep::Output, Reg);
Dep.setLatency(
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
DefSU->addPred(Dep);
}
DefI->SU = SU;
}
}
/// addVRegUseDeps - Add a register data dependency if the instruction that
/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
/// register antidependency from this SUnit to instructions that occur later in
/// the same scheduling region if they write the virtual register.
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
// Record this local VReg use.
VReg2UseMap::iterator UI = VRegUses.find(Reg);
for (; UI != VRegUses.end(); ++UI) {
if (UI->SU == SU)
break;
}
if (UI == VRegUses.end())
VRegUses.insert(VReg2SUnit(Reg, SU));
// Lookup this operand's reaching definition.
assert(LIS && "vreg dependencies requires LiveIntervals");
LiveQueryResult LRQ
= LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
VNInfo *VNI = LRQ.valueIn();
// VNI will be valid because MachineOperand::readsReg() is checked by caller.
assert(VNI && "No value to read by operand");
MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
// Phis and other noninstructions (after coalescing) have a NULL Def.
if (Def) {
SUnit *DefSU = getSUnit(Def);
if (DefSU) {
// The reaching Def lives within this scheduling region.
// Create a data dependence.
SDep dep(DefSU, SDep::Data, Reg);
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
int DefOp = Def->findRegisterDefOperandIdx(Reg);
dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
SU->addPred(dep);
}
}
// Add antidependence to the following def of the vreg it uses.
VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI != VRegDefs.end() && DefI->SU != SU)
DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
}
/// Return true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() &&
(!MI->mayLoad() || !MI->isInvariantLoad(AA))))
return true;
return false;
}
// This MI might have either incomplete info, or known to be unsafe
// to deal with (i.e. volatile object).
static inline bool isUnsafeMemoryObject(MachineInstr *MI,
const MachineFrameInfo *MFI) {
if (!MI || MI->memoperands_empty())
return true;
// We purposefully do no check for hasOneMemOperand() here
// in hope to trigger an assert downstream in order to
// finish implementation.
if ((*MI->memoperands_begin())->isVolatile() ||
MI->hasUnmodeledSideEffects())
return true;
if ((*MI->memoperands_begin())->getPseudoValue()) {
// Similarly to getUnderlyingObjectForInstr:
// For now, ignore PseudoSourceValues which may alias LLVM IR values
// because the code that uses this function has no way to cope with
// such aliases.
return true;
}
const Value *V = (*MI->memoperands_begin())->getValue();
if (!V)
return true;
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(V, Objs);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
IE = Objs.end(); I != IE; ++I) {
// Does this pointer refer to a distinct and identifiable object?
if (!isIdentifiedObject(*I))
return true;
}
return false;
}
/// This returns true if the two MIs need a chain edge betwee them.
/// If these are not even memory operations, we still may need
/// chain deps between them. The question really is - could
/// these two MIs be reordered during scheduling from memory dependency
/// point of view.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
MachineInstr *MIa,
MachineInstr *MIb) {
// Cover a trivial case - no edge is need to itself.
if (MIa == MIb)
return false;
// FIXME: Need to handle multiple memory operands to support all targets.
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
return true;
if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
return true;
// If we are dealing with two "normal" loads, we do not need an edge
// between them - they could be reordered.
if (!MIa->mayStore() && !MIb->mayStore())
return false;
// To this point analysis is generic. From here on we do need AA.
if (!AA)
return true;
MachineMemOperand *MMOa = *MIa->memoperands_begin();
MachineMemOperand *MMOb = *MIb->memoperands_begin();
if (!MMOa->getValue() || !MMOb->getValue())
return true;
// The following interface to AA is fashioned after DAGCombiner::isAlias
// and operates with MachineMemOperand offset with some important
// assumptions:
// - LLVM fundamentally assumes flat address spaces.
// - MachineOperand offset can *only* result from legalization and
// cannot affect queries other than the trivial case of overlap
// checking.
// - These offsets never wrap and never step outside
// of allocated objects.
// - There should never be any negative offsets here.
//
// FIXME: Modify API to hide this math from "user"
// FIXME: Even before we go to AA we can reason locally about some
// memory objects. It can save compile time, and possibly catch some
// corner cases not currently covered.
assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
AliasAnalysis::AliasResult AAResult = AA->alias(
AliasAnalysis::Location(MMOa->getValue(), Overlapa,
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
AliasAnalysis::Location(MMOb->getValue(), Overlapb,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != AliasAnalysis::NoAlias);
}
/// This recursive function iterates over chain deps of SUb looking for
/// "latest" node that needs a chain edge to SUa.
static unsigned
iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
SmallPtrSet<const SUnit*, 16> &Visited) {
if (!SUa || !SUb || SUb == ExitSU)
return *Depth;
// Remember visited nodes.
if (!Visited.insert(SUb))
return *Depth;
// If there is _some_ dependency already in place, do not
// descend any further.
// TODO: Need to make sure that if that dependency got eliminated or ignored
// for any reason in the future, we would not violate DAG topology.
// Currently it does not happen, but makes an implicit assumption about
// future implementation.
//
// Independently, if we encounter node that is some sort of global
// object (like a call) we already have full set of dependencies to it
// and we can stop descending.
if (SUa->isSucc(SUb) ||
isGlobalMemoryObject(AA, SUb->getInstr()))
return *Depth;
// If we do need an edge, or we have exceeded depth budget,
// add that edge to the predecessors chain of SUb,
// and stop descending.
if (*Depth > 200 ||
MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
SUb->addPred(SDep(SUa, SDep::MayAliasMem));
return *Depth;
}
// Track current depth.
(*Depth)++;
// Iterate over chain dependencies only.
for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
I != E; ++I)
if (I->isCtrl())
iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
return *Depth;
}
/// This function assumes that "downward" from SU there exist
/// tail/leaf of already constructed DAG. It iterates downward and
/// checks whether SU can be aliasing any node dominated
/// by it.
static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
unsigned LatencyToLoad) {
if (!SU)
return;
SmallPtrSet<const SUnit*, 16> Visited;
unsigned Depth = 0;
for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
I != IE; ++I) {
if (SU == *I)
continue;
if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
SDep Dep(SU, SDep::MayAliasMem);
Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
(*I)->addPred(Dep);
}
// Now go through all the chain successors and iterate from them.
// Keep track of visited nodes.
for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
JE = (*I)->Succs.end(); J != JE; ++J)
if (J->isCtrl())
iterateChainSucc (AA, MFI, SU, J->getSUnit(),
ExitSU, &Depth, Visited);
}
}
/// Check whether two objects need a chain edge, if so, add it
/// otherwise remember the rejected SU.
static inline
void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
SUnit *SUa, SUnit *SUb,
std::set<SUnit *> &RejectList,
unsigned TrueMemOrderLatency = 0,
bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
}
else {
// Duplicate entries should be ignored.
RejectList.insert(SUb);
DEBUG(dbgs() << "\tReject chain dep between SU("
<< SUa->NodeNum << ") and SU("
<< SUb->NodeNum << ")\n");
}
}
/// Create an SUnit for each real instruction, numbered in top-down toplological
/// order. The instruction order A < B, implies that no edge exists from B to A.
///
/// Map each real instruction to its SUnit.
///
/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
/// instead of pointers.
///
/// MachineScheduler relies on initSUnits numbering the nodes by their order in
/// the original instruction list.
void ScheduleDAGInstrs::initSUnits() {
// We'll be allocating one SUnit for each real instruction in the region,
// which is contained within a basic block.
SUnits.reserve(NumRegionInstrs);
for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
MachineInstr *MI = I;
if (MI->isDebugValue())
continue;
SUnit *SU = newSUnit(MI);
MISUnitMap[MI] = SU;
SU->isCall = MI->isCall();
SU->isCommutable = MI->isCommutable();
// Assign the Latency field of SU using target-provided information.
SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
// If this SUnit uses a reserved or unbuffered resource, mark it as such.
//
// Reserved resources block an instruction from issuing and stall the
// entire pipeline. These are identified by BufferSize=0.
//
// Unbuffered resources prevent execution of subsequent instructions that
// require the same resources. This is used for in-order execution pipelines
// within an out-of-order core. These are identified by BufferSize=1.
if (SchedModel.hasInstrSchedModel()) {
const MCSchedClassDesc *SC = getSchedClass(SU);
for (TargetSchedModel::ProcResIter
PI = SchedModel.getWriteProcResBegin(SC),
PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
case 0:
SU->hasReservedResource = true;
break;
case 1:
SU->isUnbuffered = true;
break;
default:
break;
}
}
}
}
}
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs) {
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
MISUnitMap.clear();
ScheduleDAG::clearDAG();
// Create an SUnit for each real instruction.
initSUnits();
if (PDiffs)
PDiffs->init(SUnits.size());
// We build scheduling units by walking a block's instruction list from bottom
// to top.
// Remember where a generic side-effecting instruction is as we procede.
SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
// Memory references to specific known memory locations are tracked
// so that they can be given more precise dependencies. We track
// separately the known memory locations that may alias and those
// that are known not to alias
MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
std::set<SUnit*> RejectMemNodes;
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValues.clear();
FirstDbgValue = nullptr;
assert(Defs.empty() && Uses.empty() &&
"Only BuildGraph should update Defs/Uses");
Defs.setUniverse(TRI->getNumRegs());
Uses.setUniverse(TRI->getNumRegs());
assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
VRegUses.clear();
VRegDefs.setUniverse(MRI.getNumVirtRegs());
VRegUses.setUniverse(MRI.getNumVirtRegs());
// Model data dependencies between instructions being scheduled and the
// ExitSU.
addSchedBarrierDeps();
// Walk the list of instructions, from bottom moving up.
MachineInstr *DbgMI = nullptr;
for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
MachineInstr *MI = std::prev(MII);
if (MI && DbgMI) {
DbgValues.push_back(std::make_pair(DbgMI, MI));
DbgMI = nullptr;
}
if (MI->isDebugValue()) {
DbgMI = MI;
continue;
}
SUnit *SU = MISUnitMap[MI];
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
assert(RPTracker->getPos() == std::prev(MII) &&
"RPTracker can't find MI");
}
assert(
(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
"Cannot schedule terminators or labels!");
// Add register-based dependencies (data, anti, and output).
bool HasVRegDef = false;
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
const MachineOperand &MO = MI->getOperand(j);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (TRI->isPhysicalRegister(Reg))
addPhysRegDeps(SU, j);
else {
assert(!IsPostRA && "Virtual register encountered!");
if (MO.isDef()) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
}
else if (MO.readsReg()) // ignore undef operands
addVRegUseDeps(SU, j);
}
}
// If we haven't seen any uses in this scheduling region, create a
// dependence edge to ExitSU to model the live-out latency. This is required
// for vreg defs with no in-region use, and prefetches with no vreg def.
//
// FIXME: NumDataSuccs would be more precise than NumSuccs here. This
// check currently relies on being called before adding chain deps.
if (SU->NumSuccs == 0 && SU->Latency > 1
&& (HasVRegDef || MI->mayLoad())) {
SDep Dep(SU, SDep::Artificial);
Dep.setLatency(SU->Latency - 1);
ExitSU.addPred(Dep);
}
// Add chain dependencies.
// Chain dependencies used to enforce memory order should have
// latency of 0 (except for true dependency of Store followed by
// aliased Load... we estimate that with a single cycle of latency
// assuming the hardware will bypass)
// Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
// after stack slots are lowered to actual addresses.
// TODO: Use an AliasAnalysis and do real alias-analysis queries, and
// produce more precise dependence information.
unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
if (isGlobalMemoryObject(AA, MI)) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
I->second[i]->addPred(SDep(SU, SDep::Barrier));
}
}
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
SDep Dep(SU, SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
I->second[i]->addPred(Dep);
}
}
// Add SU to the barrier chain.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
BarrierChain = SU;
// This is a barrier event that acts as a pivotal node in the DAG,
// so it is safe to clear list of exposed nodes.
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
RejectMemNodes.clear();
NonAliasMemDefs.clear();
NonAliasMemUses.clear();
// fall-through
new_alias_chain:
// Chain all possibly aliasing memory references though SU.
if (AliasChain) {
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
}
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
TrueMemOrderLatency);
}
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
} else if (MI->mayStore()) {
UnderlyingObjectsVector Objs;
getUnderlyingObjectsForInstr(MI, MFI, Objs);
if (Objs.empty()) {
// Treat all other stores conservatively.
goto new_alias_chain;
}
bool MayAlias = false;
for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
K != KE; ++K) {
ValueType V = K->getPointer();
bool ThisMayAlias = K->getInt();
if (ThisMayAlias)
MayAlias = true;
// A store to a specific PseudoSourceValue. Add precise dependencies.
// Record the def in MemDefs, first adding a dep if there is
// an existing def.
MapVector<ValueType, std::vector<SUnit *> >::iterator I =
((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
0, true);
// If we're not using AA, then we only need one store per object.
if (!AAForDep)
I->second.clear();
I->second.push_back(SU);
} else {
if (ThisMayAlias) {
if (!AAForDep)
AliasMemDefs[V].clear();
AliasMemDefs[V].push_back(SU);
} else {
if (!AAForDep)
NonAliasMemDefs[V].clear();
NonAliasMemDefs[V].push_back(SU);
}
}
// Handle the uses in MemUses, if there are any.
MapVector<ValueType, std::vector<SUnit *> >::iterator J =
((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
}
}
if (MayAlias) {
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
// But we also should check dependent instructions for the
// SU in question.
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
}
// Add dependence on barrier chain, if needed.
// There is no point to check aliasing on barrier event. Even if
// SU and barrier _could_ be reordered, they should not. In addition,
// we have lost all RejectMemNodes below barrier.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
} else if (MI->mayLoad()) {
bool MayAlias = true;
if (MI->isInvariantLoad(AA)) {
// Invariant load, no chain dependencies needed!
} else {
UnderlyingObjectsVector Objs;
getUnderlyingObjectsForInstr(MI, MFI, Objs);
if (Objs.empty()) {
// A load with no underlying object. Depend on all
// potentially aliasing stores.
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i],
RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
} else {
MayAlias = false;
}
for (UnderlyingObjectsVector::iterator
J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
ValueType V = J->getPointer();
bool ThisMayAlias = J->getInt();
if (ThisMayAlias)
MayAlias = true;
// A load from a specific PseudoSourceValue. Add precise dependencies.
MapVector<ValueType, std::vector<SUnit *> >::iterator I =
((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i],
RejectMemNodes, 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
else
NonAliasMemUses[V].push_back(SU);
}
if (MayAlias)
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
}
}
if (DbgMI)
FirstDbgValue = DbgMI;
Defs.clear();
Uses.clear();
VRegDefs.clear();
PendingLoads.clear();
}
/// \brief Initialize register live-range state for updating kills.
void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
// Start with no live registers.
LiveRegs.reset();
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI) {
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
// Repeat, for reg and all subregs.
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.set(*SubRegs);
}
}
}
bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
// Setting kill flag...
if (!MO.isKill()) {
MO.setIsKill(true);
return false;
}
// If MO itself is live, clear the kill flag...
if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
return false;
}
// If any subreg of MO is live, then create an imp-def for that
// subreg and keep MO marked as killed.
MO.setIsKill(false);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
MachineInstrBuilder MIB(MF, MI);
for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
if (LiveRegs.test(*SubRegs)) {
MIB.addReg(*SubRegs, RegState::ImplicitDefine);
AllDead = false;
}
}
if(AllDead)
MO.setIsKill(true);
return false;
}
// FIXME: Reuse the LivePhysRegs utility for this.
void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
LiveRegs.resize(TRI->getNumRegs());
BitVector killedRegs(TRI->getNumRegs());
startBlockForKills(MBB);
// Examine block from end to start...
unsigned Count = MBB->size();
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
I != E; --Count) {
MachineInstr *MI = --I;
if (MI->isDebugValue())
continue;
// Update liveness. Registers that are defed but not used in this
// instruction are now dead. Mark register and all subregs as they
// are completely defined.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isRegMask())
LiveRegs.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isDef()) continue;
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
// Repeat for reg and all subregs.
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.reset(*SubRegs);
}
// Examine all used registers and set/clear kill flag. When a
// register is used multiple times we only set the kill flag on
// the first use. Don't set kill flags on undef operands.
killedRegs.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
bool kill = false;
if (!killedRegs.test(Reg)) {
kill = true;
// A register is not killed if any subregs are live...
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
if (LiveRegs.test(*SubRegs)) {
kill = false;
break;
}
}
// If subreg is not live, then register is killed if it became
// live in this instruction
if (kill)
kill = !LiveRegs.test(Reg);
}
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
// Warning: toggleKillFlag may invalidate MO.
toggleKillFlag(MI, MO);
DEBUG(MI->dump());
}
killedRegs.set(Reg);
}
// Mark any used register (that is not using undef) and subregs as
// now live...
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.set(*SubRegs);
}
}
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
SU->getInstr()->dump();
#endif
}
std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
std::string s;
raw_string_ostream oss(s);
if (SU == &EntrySU)
oss << "<entry>";
else if (SU == &ExitSU)
oss << "<exit>";
else
SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
return oss.str();
}
/// Return the basic block label. It is not necessarilly unique because a block
/// contains multiple scheduling regions. But it is fine for visualization.
std::string ScheduleDAGInstrs::getDAGName() const {
return "dag." + BB->getFullName();
}
//===----------------------------------------------------------------------===//
// SchedDFSResult Implementation
//===----------------------------------------------------------------------===//
namespace llvm {
/// \brief Internal state used to compute SchedDFSResult.
class SchedDFSImpl {
SchedDFSResult &R;
/// Join DAG nodes into equivalence classes by their subtree.
IntEqClasses SubtreeClasses;
/// List PredSU, SuccSU pairs that represent data edges between subtrees.
std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
struct RootData {
unsigned NodeID;
unsigned ParentNodeID; // Parent node (member of the parent subtree).
unsigned SubInstrCount; // Instr count in this tree only, not children.
RootData(unsigned id): NodeID(id),
ParentNodeID(SchedDFSResult::InvalidSubtreeID),
SubInstrCount(0) {}
unsigned getSparseSetIndex() const { return NodeID; }
};
SparseSet<RootData> RootSet;
public:
SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
RootSet.setUniverse(R.DFSNodeData.size());
}
/// Return true if this node been visited by the DFS traversal.
///
/// During visitPostorderNode the Node's SubtreeID is assigned to the Node
/// ID. Later, SubtreeID is updated but remains valid.
bool isVisited(const SUnit *SU) const {
return R.DFSNodeData[SU->NodeNum].SubtreeID
!= SchedDFSResult::InvalidSubtreeID;
}
/// Initialize this node's instruction count. We don't need to flag the node
/// visited until visitPostorder because the DAG cannot have cycles.
void visitPreorder(const SUnit *SU) {
R.DFSNodeData[SU->NodeNum].InstrCount =
SU->getInstr()->isTransient() ? 0 : 1;
}
/// Called once for each node after all predecessors are visited. Revisit this
/// node's predecessors and potentially join them now that we know the ILP of
/// the other predecessors.
void visitPostorderNode(const SUnit *SU) {
// Mark this node as the root of a subtree. It may be joined with its
// successors later.
R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
RootData RData(SU->NodeNum);
RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
// If any predecessors are still in their own subtree, they either cannot be
// joined or are large enough to remain separate. If this parent node's
// total instruction count is not greater than a child subtree by at least
// the subtree limit, then try to join it now since splitting subtrees is
// only useful if multiple high-pressure paths are possible.
unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
for (SUnit::const_pred_iterator
PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
if (PI->getKind() != SDep::Data)
continue;
unsigned PredNum = PI->getSUnit()->NodeNum;
if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
// Either link or merge the TreeData entry from the child to the parent.
if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
// If the predecessor's parent is invalid, this is a tree edge and the
// current node is the parent.
if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
RootSet[PredNum].ParentNodeID = SU->NodeNum;
}
else if (RootSet.count(PredNum)) {
// The predecessor is not a root, but is still in the root set. This
// must be the new parent that it was just joined to. Note that
// RootSet[PredNum].ParentNodeID may either be invalid or may still be
// set to the original parent.
RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
RootSet.erase(PredNum);
}
}
RootSet[SU->NodeNum] = RData;
}
/// Called once for each tree edge after calling visitPostOrderNode on the
/// predecessor. Increment the parent node's instruction count and
/// preemptively join this subtree to its parent's if it is small enough.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
R.DFSNodeData[Succ->NodeNum].InstrCount
+= R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
joinPredSubtree(PredDep, Succ);
}
/// Add a connection for cross edges.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
}
/// Set each node's subtree ID to the representative ID and record connections
/// between trees.
void finalize() {
SubtreeClasses.compress();
R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
assert(SubtreeClasses.getNumClasses() == RootSet.size()
&& "number of roots should match trees");
for (SparseSet<RootData>::const_iterator
RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
unsigned TreeID = SubtreeClasses[RI->NodeID];
if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
// Note that SubInstrCount may be greater than InstrCount if we joined
// subtrees across a cross edge. InstrCount will be attributed to the
// original parent, while SubInstrCount will be attributed to the joined
// parent.
}
R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
DEBUG(dbgs() << " SU(" << Idx << ") in tree "
<< R.DFSNodeData[Idx].SubtreeID << '\n');
}
for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
I = ConnectionPairs.begin(), E = ConnectionPairs.end();
I != E; ++I) {
unsigned PredTree = SubtreeClasses[I->first->NodeNum];
unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
if (PredTree == SuccTree)
continue;
unsigned Depth = I->first->getDepth();
addConnection(PredTree, SuccTree, Depth);
addConnection(SuccTree, PredTree, Depth);
}
}
protected:
/// Join the predecessor subtree with the successor that is its DFS
/// parent. Apply some heuristics before joining.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
bool CheckLimit = true) {
assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
// Check if the predecessor is already joined.
const SUnit *PredSU = PredDep.getSUnit();
unsigned PredNum = PredSU->NodeNum;
if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
return false;
// Four is the magic number of successors before a node is considered a
// pinch point.
unsigned NumDataSucs = 0;
for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
SE = PredSU->Succs.end(); SI != SE; ++SI) {
if (SI->getKind() == SDep::Data) {
if (++NumDataSucs >= 4)
return false;
}
}
if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
return false;
R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
SubtreeClasses.join(Succ->NodeNum, PredNum);
return true;
}
/// Called by finalize() to record a connection between trees.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
if (!Depth)
return;
do {
SmallVectorImpl<SchedDFSResult::Connection> &Connections =
R.SubtreeConnections[FromTree];
for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
I = Connections.begin(), E = Connections.end(); I != E; ++I) {
if (I->TreeID == ToTree) {
I->Level = std::max(I->Level, Depth);
return;
}
}
Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
FromTree = R.DFSTreeData[FromTree].ParentTreeID;
} while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
} // namespace llvm
namespace {
/// \brief Manage the stack used by a reverse depth-first search over the DAG.
class SchedDAGReverseDFS {
std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
public:
bool isComplete() const { return DFSStack.empty(); }
void follow(const SUnit *SU) {
DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
}
void advance() { ++DFSStack.back().second; }
const SDep *backtrack() {
DFSStack.pop_back();
return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
}
const SUnit *getCurr() const { return DFSStack.back().first; }
SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
SUnit::const_pred_iterator getPredEnd() const {
return getCurr()->Preds.end();
}
};
} // anonymous
static bool hasDataSucc(const SUnit *SU) {
for (SUnit::const_succ_iterator
SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
return true;
}
return false;
}
/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
/// search from this root.
void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
if (!IsBottomUp)
llvm_unreachable("Top-down ILP metric is unimplemnted");
SchedDFSImpl Impl(*this);
for (ArrayRef<SUnit>::const_iterator
SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
const SUnit *SU = &*SI;
if (Impl.isVisited(SU) || hasDataSucc(SU))
continue;
SchedDAGReverseDFS DFS;
Impl.visitPreorder(SU);
DFS.follow(SU);
for (;;) {
// Traverse the leftmost path as far as possible.
while (DFS.getPred() != DFS.getPredEnd()) {
const SDep &PredDep = *DFS.getPred();
DFS.advance();
// Ignore non-data edges.
if (PredDep.getKind() != SDep::Data
|| PredDep.getSUnit()->isBoundaryNode()) {
continue;
}
// An already visited edge is a cross edge, assuming an acyclic DAG.
if (Impl.isVisited(PredDep.getSUnit())) {
Impl.visitCrossEdge(PredDep, DFS.getCurr());
continue;
}
Impl.visitPreorder(PredDep.getSUnit());
DFS.follow(PredDep.getSUnit());
}
// Visit the top of the stack in postorder and backtrack.
const SUnit *Child = DFS.getCurr();
const SDep *PredDep = DFS.backtrack();
Impl.visitPostorderNode(Child);
if (PredDep)
Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
if (DFS.isComplete())
break;
}
}
Impl.finalize();
}
/// The root of the given SubtreeID was just scheduled. For all subtrees
/// connected to this tree, record the depth of the connection so that the
/// nearest connected subtrees can be prioritized.
void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
for (SmallVectorImpl<Connection>::const_iterator
I = SubtreeConnections[SubtreeID].begin(),
E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
SubtreeConnectLevels[I->TreeID] =
std::max(SubtreeConnectLevels[I->TreeID], I->Level);
DEBUG(dbgs() << " Tree: " << I->TreeID
<< " @" << SubtreeConnectLevels[I->TreeID] << '\n');
}
}
LLVM_DUMP_METHOD
void ILPValue::print(raw_ostream &OS) const {
OS << InstrCount << " / " << Length << " = ";
if (!Length)
OS << "BADILP";
else
OS << format("%g", ((double)InstrCount / Length));
}
LLVM_DUMP_METHOD
void ILPValue::dump() const {
dbgs() << *this << '\n';
}
namespace llvm {
LLVM_DUMP_METHOD
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
}
} // namespace llvm
Index: llvm/trunk/lib/CodeGen/DFAPacketizer.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/DFAPacketizer.cpp (revision 216123)
+++ llvm/trunk/lib/CodeGen/DFAPacketizer.cpp (revision 216124)
@@ -1,226 +1,225 @@
//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This class implements a deterministic finite automaton (DFA) based
// packetizing mechanism for VLIW architectures. It provides APIs to
// determine whether there exists a legal mapping of instructions to
// functional unit assignments in a packet. The DFA is auto-generated from
// the target's Schedule.td file.
//
// A DFA consists of 3 major elements: states, inputs, and transitions. For
// the packetizing mechanism, the input is the set of instruction classes for
// a target. The state models all possible combinations of functional unit
// consumption for a given set of instructions in a packet. A transition
// models the addition of an instruction to a packet. In the DFA constructed
// by this class, if an instruction can be added to a packet, then a valid
// transition exists from the corresponding state. Invalid transitions
// indicate that the instruction cannot be added to the current packet.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
const unsigned *SET):
InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
DFAStateEntryTable(SET) {}
//
// ReadTable - Read the DFA transition table and update CachedTable.
//
// Format of the transition tables:
// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
// transitions
// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
// for the ith state
//
void DFAPacketizer::ReadTable(unsigned int state) {
unsigned ThisState = DFAStateEntryTable[state];
unsigned NextStateInTable = DFAStateEntryTable[state+1];
// Early exit in case CachedTable has already contains this
// state's transitions.
if (CachedTable.count(UnsignPair(state,
DFAStateInputTable[ThisState][0])))
return;
for (unsigned i = ThisState; i < NextStateInTable; i++)
CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
DFAStateInputTable[i][1];
}
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
unsigned FuncUnits = IS->getUnits();
UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
ReadTable(CurrentState);
return (CachedTable.count(StateTrans) != 0);
}
// reserveResources - Reserve the resources occupied by a MCInstrDesc and
// change the current state to reflect that change.
void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
unsigned FuncUnits = IS->getUnits();
UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
ReadTable(CurrentState);
assert(CachedTable.count(StateTrans) != 0);
CurrentState = CachedTable[StateTrans];
}
// canReserveResources - Check if the resources occupied by a machine
// instruction are available in the current state.
bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
const llvm::MCInstrDesc &MID = MI->getDesc();
return canReserveResources(&MID);
}
// reserveResources - Reserve the resources occupied by a machine
// instruction and change the current state to reflect that change.
void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
const llvm::MCInstrDesc &MID = MI->getDesc();
reserveResources(&MID);
}
namespace llvm {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT, bool IsPostRA);
+ bool IsPostRA);
// Schedule - Actual scheduling work.
void schedule() override;
};
}
-DefaultVLIWScheduler::DefaultVLIWScheduler(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA) :
- ScheduleDAGInstrs(MF, &MLI, &MDT, IsPostRA) {
+DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
+ MachineLoopInfo &MLI, bool IsPostRA)
+ : ScheduleDAGInstrs(MF, &MLI, IsPostRA) {
CanHandleTerminators = true;
}
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
buildSchedGraph(nullptr);
}
// VLIWPacketizerList Ctor
-VLIWPacketizerList::VLIWPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
+VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
+ MachineLoopInfo &MLI, bool IsPostRA)
+ : TM(MF.getTarget()), MF(MF) {
TII = TM.getSubtargetImpl()->getInstrInfo();
ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr);
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA);
}
// VLIWPacketizerList Dtor
VLIWPacketizerList::~VLIWPacketizerList() {
if (VLIWScheduler)
delete VLIWScheduler;
if (ResourceTracker)
delete ResourceTracker;
}
// endPacket - End the current packet, bundle packet instructions and reset
// DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineInstr *MI) {
if (CurrentPacketMIs.size() > 1) {
MachineInstr *MIFirst = CurrentPacketMIs.front();
finalizeBundle(*MBB, MIFirst, MI);
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
}
// PacketizeMIs - Bundle machine instructions into packets.
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr) {
assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
VLIWScheduler->startBlock(MBB);
VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
std::distance(BeginItr, EndItr));
VLIWScheduler->schedule();
// Generate MI -> SU map.
MIToSUnit.clear();
for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
SUnit *SU = &VLIWScheduler->SUnits[i];
MIToSUnit[SU->getInstr()] = SU;
}
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
MachineInstr *MI = BeginItr;
this->initPacketizerState();
// End the current packet if needed.
if (this->isSoloInstruction(MI)) {
endPacket(MBB, MI);
continue;
}
// Ignore pseudo instructions.
if (this->ignorePseudoInstruction(MI, MBB))
continue;
SUnit *SUI = MIToSUnit[MI];
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
bool ResourceAvail = ResourceTracker->canReserveResources(MI);
if (ResourceAvail) {
// Dependency check for MI with instructions in CurrentPacketMIs.
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
MachineInstr *MJ = *VI;
SUnit *SUJ = MIToSUnit[MJ];
assert(SUJ && "Missing SUnit Info!");
// Is it legal to packetize SUI and SUJ together.
if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
// Allow packetization if dependency can be pruned.
if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
// End the packet if dependency cannot be pruned.
endPacket(MBB, MI);
break;
} // !isLegalToPruneDependencies.
} // !isLegalToPacketizeTogether.
} // For all instructions in CurrentPacketMIs.
} else {
// End the packet if resource is not available.
endPacket(MBB, MI);
}
// Add MI to the current packet.
BeginItr = this->addToPacket(MI);
} // For all instructions in BB.
// End any packet left behind.
endPacket(MBB, EndItr);
VLIWScheduler->exitRegion();
VLIWScheduler->finishBlock();
}
Index: llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp (revision 216123)
+++ llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp (revision 216124)
@@ -1,686 +1,685 @@
//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements a top-down list scheduler, using standard algorithms.
// The basic approach uses a priority queue of available nodes to schedule.
// One at a time, nodes are taken from the priority queue (thus in priority
// order), checked for legality to schedule, and emitted if legal.
//
// Nodes may not be legal to schedule either due to structural hazards (e.g.
// pipeline or resource constraints) or because an input to the instruction has
// not completed execution.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
#include "AggressiveAntiDepBreaker.h"
#include "AntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
STATISTIC(NumNoops, "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
// Post-RA scheduling is enabled with
// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
// override the target.
static cl::opt<bool>
EnablePostRAScheduler("post-RA-scheduler",
cl::desc("Enable scheduling after register allocation"),
cl::init(false), cl::Hidden);
static cl::opt<std::string>
EnableAntiDepBreaking("break-anti-dependencies",
cl::desc("Break post-RA scheduling anti-dependencies: "
"\"critical\", \"all\", or \"none\""),
cl::init("none"), cl::Hidden);
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
static cl::opt<int>
DebugDiv("postra-sched-debugdiv",
cl::desc("Debug control MBBs that are scheduled"),
cl::init(0), cl::Hidden);
static cl::opt<int>
DebugMod("postra-sched-debugmod",
cl::desc("Debug control MBBs that are scheduled"),
cl::init(0), cl::Hidden);
AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
public:
static char ID;
PostRAScheduler() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &Fn) override;
bool enablePostRAScheduler(
const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
char PostRAScheduler::ID = 0;
class SchedulePostRATDList : public ScheduleDAGInstrs {
/// AvailableQueue - The priority queue to use for the available SUnits.
///
LatencyPriorityQueue AvailableQueue;
/// PendingQueue - This contains all of the instructions whose operands have
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands becomes available, the instruction is
/// added to the AvailableQueue.
std::vector<SUnit*> PendingQueue;
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
/// AntiDepBreak - Anti-dependence breaking object, or NULL if none
AntiDepBreaker *AntiDepBreak;
/// AA - AliasAnalysis for making memory reference queries.
AliasAnalysis *AA;
/// The schedule. Null SUnit*'s represent noop instructions.
std::vector<SUnit*> Sequence;
/// The index in BB of RegionEnd.
///
/// This is the instruction number from the top of the current block, not
/// the SlotIndex. It is only used by the AntiDepBreaker.
unsigned EndIndex;
public:
SchedulePostRATDList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- AliasAnalysis *AA, const RegisterClassInfo&,
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
~SchedulePostRATDList();
/// startBlock - Initialize register live-range state for scheduling in
/// this block.
///
void startBlock(MachineBasicBlock *BB) override;
// Set the index of RegionEnd within the current BB.
void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
/// Initialize the scheduler state for the next scheduling region.
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) override;
/// Notify that the scheduler has finished scheduling the current region.
void exitRegion() override;
/// Schedule - Schedule the instruction range using list scheduling.
///
void schedule() override;
void EmitSchedule();
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void Observe(MachineInstr *MI, unsigned Count);
/// finishBlock - Clean up register live-range state.
///
void finishBlock() override;
private:
void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
void ListScheduleTopDown();
void dumpSchedule() const;
void emitNoop(unsigned CurCycle);
};
}
char &llvm::PostRASchedulerID = PostRAScheduler::ID;
INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
"Post RA top-down list latency scheduler", false, false)
SchedulePostRATDList::SchedulePostRATDList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- AliasAnalysis *AA, const RegisterClassInfo &RCI,
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, &MLI, &MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
const TargetMachine &TM = MF.getTarget();
const InstrItineraryData *InstrItins =
TM.getSubtargetImpl()->getInstrItineraryData();
HazardRec =
TM.getSubtargetImpl()->getInstrInfo()->CreateTargetPostRAHazardRecognizer(
InstrItins, this);
assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
MRI.tracksLiveness()) &&
"Live-ins must be accurate for anti-dependency breaking");
AntiDepBreak =
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
(AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr));
}
SchedulePostRATDList::~SchedulePostRATDList() {
delete HazardRec;
delete AntiDepBreak;
}
/// Initialize state associated with the next scheduling region.
void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) {
ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
Sequence.clear();
}
/// Print the schedule before exiting the region.
void SchedulePostRATDList::exitRegion() {
DEBUG({
dbgs() << "*** Final schedule ***\n";
dumpSchedule();
dbgs() << '\n';
});
ScheduleDAGInstrs::exitRegion();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
SU->dump(this);
else
dbgs() << "**** NOOP ****\n";
}
}
#endif
bool PostRAScheduler::enablePostRAScheduler(
const TargetSubtargetInfo &ST,
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
Mode = ST.getAntiDepBreakMode();
ST.getCriticalPathRCs(CriticalPathRCs);
return ST.enablePostMachineScheduler() &&
OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
}
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (skipOptnoneFunction(*Fn.getFunction()))
return false;
TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
RegClassInfo.runOnMachineFunction(Fn);
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
TargetSubtargetInfo::ANTIDEP_NONE;
SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
const TargetSubtargetInfo &ST =
Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
AntiDepMode, CriticalPathRCs))
return false;
}
// Check for antidep breaking override...
if (EnableAntiDepBreaking.getPosition() > 0) {
AntiDepMode = (EnableAntiDepBreaking == "all")
? TargetSubtargetInfo::ANTIDEP_ALL
: ((EnableAntiDepBreaking == "critical")
? TargetSubtargetInfo::ANTIDEP_CRITICAL
: TargetSubtargetInfo::ANTIDEP_NONE);
}
DEBUG(dbgs() << "PostRAScheduler\n");
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
CriticalPathRCs);
// Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
#ifndef NDEBUG
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
if (DebugDiv > 0) {
static int bbcnt = 0;
if (bbcnt++ % DebugDiv != DebugMod)
continue;
dbgs() << "*** DEBUG scheduling " << Fn.getName()
<< ":BB#" << MBB->getNumber() << " ***\n";
}
#endif
// Initialize register live-range state for scheduling in this block.
Scheduler.startBlock(MBB);
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
MachineBasicBlock::iterator Current = MBB->end();
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
MachineInstr *MI = std::prev(I);
--Count;
// Calls are not scheduling boundaries before register allocation, but
// post-ra we don't gain anything by scheduling across calls since we
// don't need to worry about register pressure.
if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
Current = MI;
CurrentCount = Count;
Scheduler.Observe(MI, CurrentCount);
}
I = MI;
if (MI->isBundle())
Count -= MI->getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB->begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
// Clean up register live-range state.
Scheduler.finishBlock();
// Update register kills
Scheduler.fixupKills(MBB);
}
return true;
}
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
///
void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
// Call the superclass.
ScheduleDAGInstrs::startBlock(BB);
// Reset the hazard recognizer and anti-dep breaker.
HazardRec->Reset();
if (AntiDepBreak)
AntiDepBreak->StartBlock(BB);
}
/// Schedule - Schedule the instruction range using list scheduling.
///
void SchedulePostRATDList::schedule() {
// Build the scheduling graph.
buildSchedGraph(AA);
if (AntiDepBreak) {
unsigned Broken =
AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
EndIndex, DbgValues);
if (Broken != 0) {
// We made changes. Update the dependency graph.
// Theoretically we could update the graph in place:
// When a live range is changed to use a different register, remove
// the def's anti-dependence *and* output-dependence edges due to
// that register, and add new anti-dependence and output-dependence
// edges based on the next live range of the register.
ScheduleDAG::clearDAG();
buildSchedGraph(AA);
NumFixedAnti += Broken;
}
}
DEBUG(dbgs() << "********** List Scheduling **********\n");
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
AvailableQueue.releaseState();
}
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
if (AntiDepBreak)
AntiDepBreak->Observe(MI, Count, EndIndex);
}
/// FinishBlock - Clean up register live-range state.
///
void SchedulePostRATDList::finishBlock() {
if (AntiDepBreak)
AntiDepBreak->FinishBlock();
// Call the superclass.
ScheduleDAGInstrs::finishBlock();
}
//===----------------------------------------------------------------------===//
// Top-Down Scheduling
//===----------------------------------------------------------------------===//
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
/// the PendingQueue if the count reaches zero.
void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
if (SuccEdge->isWeak()) {
--SuccSU->WeakPredsLeft;
return;
}
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
SuccSU->dump(this);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
#endif
--SuccSU->NumPredsLeft;
// Standard scheduler algorithms will recompute the depth of the successor
// here as such:
// SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
//
// However, we lazily compute node depth instead. Note that
// ScheduleNodeTopDown has already updated the depth of this node which causes
// all descendents to be marked dirty. Setting the successor depth explicitly
// here would cause depth to be recomputed for all its ancestors. If the
// successor is not yet ready (because of a transitively redundant edge) then
// this causes depth computation to be quadratic in the size of the DAG.
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
PendingQueue.push_back(SuccSU);
}
/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
ReleaseSucc(SU, &*I);
}
}
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() &&
"Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
ReleaseSuccessors(SU);
SU->isScheduled = true;
AvailableQueue.scheduledNode(SU);
}
/// emitNoop - Add a noop to the current instruction sequence.
void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
HazardRec->EmitNoop();
Sequence.push_back(nullptr); // NULL here means noop
++NumNoops;
}
/// ListScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
void SchedulePostRATDList::ListScheduleTopDown() {
unsigned CurCycle = 0;
// We're scheduling top-down but we're visiting the regions in
// bottom-up order, so we don't know the hazards at the start of a
// region. So assume no hazards (this should usually be ok as most
// blocks are a single region).
HazardRec->Reset();
// Release any successors of the special Entry node.
ReleaseSuccessors(&EntrySU);
// Add all leaves to Available queue.
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
// It is available if it has no predecessors.
if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
AvailableQueue.push(&SUnits[i]);
SUnits[i].isAvailable = true;
}
}
// In any cycle where we can't schedule any instructions, we must
// stall or emit a noop, depending on the target.
bool CycleHasInsts = false;
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
Sequence.reserve(SUnits.size());
while (!AvailableQueue.empty() || !PendingQueue.empty()) {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
unsigned MinDepth = ~0u;
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
if (PendingQueue[i]->getDepth() <= CurCycle) {
AvailableQueue.push(PendingQueue[i]);
PendingQueue[i]->isAvailable = true;
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
} else if (PendingQueue[i]->getDepth() < MinDepth)
MinDepth = PendingQueue[i]->getDepth();
}
DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
bool HasNoopHazards = false;
while (!AvailableQueue.empty()) {
SUnit *CurSUnit = AvailableQueue.pop();
ScheduleHazardRecognizer::HazardType HT =
HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
if (HazardRec->ShouldPreferAnother(CurSUnit)) {
if (!NotPreferredSUnit) {
// If this is the first non-preferred node for this cycle, then
// record it and continue searching for a preferred node. If this
// is not the first non-preferred node, then treat it as though
// there had been a hazard.
NotPreferredSUnit = CurSUnit;
continue;
}
} else {
FoundSUnit = CurSUnit;
break;
}
}
// Remember if this is a noop hazard.
HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
NotReady.push_back(CurSUnit);
}
// If we have a non-preferred node, push it back onto the available list.
// If we did not find a preferred node, then schedule this first
// non-preferred node.
if (NotPreferredSUnit) {
if (!FoundSUnit) {
DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n");
FoundSUnit = NotPreferredSUnit;
} else {
AvailableQueue.push(NotPreferredSUnit);
}
NotPreferredSUnit = nullptr;
}
// Add the nodes that aren't ready back onto the available list.
if (!NotReady.empty()) {
AvailableQueue.push_all(NotReady);
NotReady.clear();
}
// If we found a node to schedule...
if (FoundSUnit) {
// If we need to emit noops prior to this instruction, then do so.
unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
for (unsigned i = 0; i != NumPreNoops; ++i)
emitNoop(CurCycle);
// ... schedule the node...
ScheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
CycleHasInsts = true;
if (HazardRec->atIssueLimit()) {
DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
++CurCycle;
CycleHasInsts = false;
}
} else {
if (CycleHasInsts) {
DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem,
// just advance the current cycle and try again.
DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
++NumStalls;
} else {
// Otherwise, we have no instructions to issue and we have instructions
// that will fault if we don't do this right. This is the case for
// processors without pipeline interlocks and other cases.
emitNoop(CurCycle);
}
++CurCycle;
CycleHasInsts = false;
}
}
#ifndef NDEBUG
unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
unsigned Noops = 0;
for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
if (!Sequence[i])
++Noops;
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
#endif // NDEBUG
}
// EmitSchedule - Emit the machine code in scheduled order.
void SchedulePostRATDList::EmitSchedule() {
RegionBegin = RegionEnd;
// If first instruction was a DBG_VALUE then put it back.
if (FirstDbgValue)
BB->splice(RegionEnd, BB, FirstDbgValue);
// Then re-insert them according to the given schedule.
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
BB->splice(RegionEnd, BB, SU->getInstr());
else
// Null SUnit* is a noop.
TII->insertNoop(*BB, RegionEnd);
// Update the Begin iterator, as the first instruction in the block
// may have been scheduled later.
if (i == 0)
RegionBegin = std::prev(RegionEnd);
}
// Reinsert any remaining debug_values.
for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
MachineInstr *DbgValue = P.first;
MachineBasicBlock::iterator OrigPrivMI = P.second;
BB->splice(++OrigPrivMI, BB, DbgValue);
}
DbgValues.clear();
FirstDbgValue = nullptr;
}

Event Timeline