Skip to content

Commit

Permalink
[DAG, X86] Improve Dependency analysis when doing multi-node
Browse files Browse the repository at this point in the history
Instruction Selection

Cleanup cycle/validity checks in ISel (IsLegalToFold,
HandleMergeInputChains) and X86 (isFusableLoadOpStore). Now do a full
search for cycles / dependencies pruning the search when topological
property of NodeId allows.

As part of this propogate the NodeId-based cutoffs to narrow
hasPreprocessorHelper searches.

Reviewers: craig.topper, bogner

Subscribers: llvm-commits, hiraditya

Differential Revision: https://reviews.llvm.org/D41293

llvm-svn: 324359
  • Loading branch information
niravhdave committed Feb 6, 2018
1 parent 7f24765 commit 27721e8
Showing 18 changed files with 645 additions and 985 deletions.
36 changes: 30 additions & 6 deletions llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Original file line number Diff line number Diff line change
@@ -796,16 +796,38 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
/// searches to be performed in parallel, caching of results across
/// queries and incremental addition to Worklist. Stops early if N is
/// found but will resume. Remember to clear Visited and Worklists
/// if DAG changes.
/// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
/// giving up. The TopologicalPrune flag signals that positive NodeIds are
/// topologically ordered (Operands have strictly smaller node id) and search
/// can be pruned leveraging this.
static bool hasPredecessorHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode *> &Visited,
SmallVectorImpl<const SDNode *> &Worklist,
unsigned int MaxSteps = 0) {
unsigned int MaxSteps = 0,
bool TopologicalPrune = false) {
SmallVector<const SDNode *, 8> DeferredNodes;
if (Visited.count(N))
return true;

// Node Id's are assigned in three places: As a topological
// ordering (> 0), during legalization (results in values set to
// 0), and new nodes (set to -1). If N has a topolgical id then we
// know that all nodes with ids smaller than it cannot be
// successors and we need not check them. Filter out all node
// that can't be matches. We add them to the worklist before exit
// in case of multiple calls.

int NId = N->getNodeId();

bool Found = false;
while (!Worklist.empty()) {
const SDNode *M = Worklist.pop_back_val();
bool Found = false;
int MId = M->getNodeId();
if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
(MId > 0) && (MId < NId)) {
DeferredNodes.push_back(M);
continue;
}
for (const SDValue &OpV : M->op_values()) {
SDNode *Op = OpV.getNode();
if (Visited.insert(Op).second)
@@ -814,11 +836,13 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
Found = true;
}
if (Found)
return true;
break;
if (MaxSteps != 0 && Visited.size() >= MaxSteps)
return false;
break;
}
return false;
// Push deferred nodes back on worklist.
Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
return Found;
}

/// Return true if all the users of N are contained in Nodes.
295 changes: 80 additions & 215 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Original file line number Diff line number Diff line change
@@ -2137,54 +2137,44 @@ static SDNode *findGlueUse(SDNode *N) {
return nullptr;
}

/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
/// This function iteratively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
/// beyond "ImmedUse". We may ignore chains as they are checked separately.
static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
bool IgnoreChains) {
// The NodeID's are given uniques ID's where a node ID is guaranteed to be
// greater than all of its (recursive) operands. If we scan to a point where
// 'use' is smaller than the node we're scanning for, then we know we will
// never find it.
//
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
// happen because we scan down to newly selected nodes in the case of glue
// uses.
std::vector<SDNode *> WorkList;
WorkList.push_back(Use);

while (!WorkList.empty()) {
Use = WorkList.back();
WorkList.pop_back();
// NodeId topological order of TokenFactors is not guaranteed. Do not skip.
if (Use->getOpcode() != ISD::TokenFactor &&
Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
continue;
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 16> WorkList;
// Only check if we have non-immediate uses of Def.
if (ImmedUse->isOnlyUserOf(Def))
return false;

// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
if (!Visited.insert(Use).second)
// We don't care about paths to Def that go through ImmedUse so mark it
// visited and mark non-def operands as used.
Visited.insert(ImmedUse);
for (const SDValue &Op : ImmedUse->op_values()) {
SDNode *N = Op.getNode();
// Ignore chain deps (they are validated by
// HandleMergeInputChains) and immediate uses
if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
continue;
if (!Visited.insert(N).second)
continue;
WorkList.push_back(N);
}

for (const SDValue &Op : Use->op_values()) {
// Ignore chain uses, they are validated by HandleMergeInputChains.
if (Op.getValueType() == MVT::Other && IgnoreChains)
continue;

// Initialize worklist to operands of Root.
if (Root != ImmedUse) {
for (const SDValue &Op : Root->op_values()) {
SDNode *N = Op.getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
continue; // We are not looking for immediate use.
assert(N != Root);
return true;
}

// Traverse up the operand chain.
// Ignore chains (they are validated by HandleMergeInputChains)
if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
continue;
if (!Visited.insert(N).second)
continue;
WorkList.push_back(N);
}
}
return false;

return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
}

/// IsProfitableToFold - Returns true if it's profitable to fold the specific
@@ -2256,13 +2246,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,

// If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
// the chain, our WalkChainUsers predicate will not consider it. Because of
// the chain, HandleMergeInputChains will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}

SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}

void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -2381,143 +2370,6 @@ void SelectionDAGISel::UpdateChains(
DEBUG(dbgs() << "ISEL: Match complete!\n");
}

enum ChainResult {
CR_Simple,
CR_InducesCycle,
CR_LeadsToInteriorNode
};

/// WalkChainUsers - Walk down the users of the specified chained node that is
/// part of the pattern we're matching, looking at all of the users we find.
/// This determines whether something is an interior node, whether we have a
/// non-pattern node in between two pattern nodes (which prevent folding because
/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
/// between pattern nodes (in which case the TF becomes part of the pattern).
///
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
static ChainResult
WalkChainUsers(const SDNode *ChainedNode,
SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;

for (SDNode::use_iterator UI = ChainedNode->use_begin(),
E = ChainedNode->use_end(); UI != E; ++UI) {
// Make sure the use is of the chain, not some other value we produce.
if (UI.getUse().getValueType() != MVT::Other) continue;

SDNode *User = *UI;

if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;

// If we see an already-selected machine node, then we've gone beyond the
// pattern that we're selecting down into the already selected chunk of the
// DAG.
unsigned UserOpcode = User->getOpcode();
if (User->isMachineOpcode() ||
UserOpcode == ISD::CopyToReg ||
UserOpcode == ISD::CopyFromReg ||
UserOpcode == ISD::INLINEASM ||
UserOpcode == ISD::EH_LABEL ||
UserOpcode == ISD::LIFETIME_START ||
UserOpcode == ISD::LIFETIME_END) {
// If their node ID got reset to -1 then they've already been selected.
// Treat them like a MachineOpcode.
if (User->getNodeId() == -1)
continue;
}

// If we have a TokenFactor, we handle it specially.
if (User->getOpcode() != ISD::TokenFactor) {
// If the node isn't a token factor and isn't part of our pattern, then it
// must be a random chained node in between two nodes we're selecting.
// This happens when we have something like:
// x = load ptr
// call
// y = x+4
// store y -> ptr
// Because we structurally match the load/store as a read/modify/write,
// but the call is chained between them. We cannot fold in this case
// because it would induce a cycle in the graph.
if (!std::count(ChainedNodesInPattern.begin(),
ChainedNodesInPattern.end(), User))
return CR_InducesCycle;

// Otherwise we found a node that is part of our pattern. For example in:
// x = load ptr
// y = x+4
// store y -> ptr
// This would happen when we're scanning down from the load and see the
// store as a user. Record that there is a use of ChainedNode that is
// part of the pattern and keep scanning uses.
Result = CR_LeadsToInteriorNode;
InteriorChainedNodes.push_back(User);
continue;
}

// If we found a TokenFactor, there are two cases to consider: first if the
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
// uses of the TF are in our pattern) we just want to ignore it. Second,
// the TokenFactor can be sandwiched in between two chained nodes, like so:
// [Load chain]
// ^
// |
// [Load]
// ^ ^
// | \ DAG's like cheese
// / \ do you?
// / |
// [TokenFactor] [Op]
// ^ ^
// | |
// \ /
// \ /
// [Store]
//
// In this case, the TokenFactor becomes part of our match and we rewrite it
// as a new TokenFactor.
//
// To distinguish these two cases, do a recursive walk down the uses.
auto MemoizeResult = TokenFactorResult.find(User);
bool Visited = MemoizeResult != TokenFactorResult.end();
// Recursively walk chain users only if the result is not memoized.
if (!Visited) {
auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
InteriorChainedNodes);
MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
}
switch (MemoizeResult->second) {
case CR_Simple:
// If the uses of the TokenFactor are just already-selected nodes, ignore
// it, it is "below" our pattern.
continue;
case CR_InducesCycle:
// If the uses of the TokenFactor lead to nodes that are not part of our
// pattern that are not selected, folding would turn this into a cycle,
// bail out now.
return CR_InducesCycle;
case CR_LeadsToInteriorNode:
break; // Otherwise, keep processing.
}

// Okay, we know we're in the interesting interior case. The TokenFactor
// is now going to be considered part of the pattern so that we rewrite its
// uses (it may have uses that are not part of the pattern) with the
// ultimate chain result of the generated code. We will also add its chain
// inputs as inputs to the ultimate TokenFactor we create.
Result = CR_LeadsToInteriorNode;
if (!Visited) {
ChainedNodesInPattern.push_back(User);
InteriorChainedNodes.push_back(User);
}
}

return Result;
}

/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
/// operation for when the pattern matched at least one node with a chains. The
/// input vector contains a list of all of the chained nodes that we match. We
@@ -2527,47 +2379,60 @@ WalkChainUsers(const SDNode *ChainedNode,
static SDValue
HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
SelectionDAG *CurDAG) {
// Used for memoization. Without it WalkChainUsers could take exponential
// time to run.
DenseMap<const SDNode *, ChainResult> TokenFactorResult;
// Walk all of the chained nodes we've matched, recursively scanning down the
// users of the chain result. This adds any TokenFactor nodes that are caught
// in between chained nodes to the chained and interior nodes list.
SmallVector<SDNode*, 3> InteriorChainedNodes;
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
TokenFactorResult,
InteriorChainedNodes) == CR_InducesCycle)
return SDValue(); // Would induce a cycle.
}

// Okay, we have walked all the matched nodes and collected TokenFactor nodes
// that we are interested in. Form our input TokenFactor node.
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 8> Worklist;
SmallVector<SDValue, 3> InputChains;
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
// Add the input chain of this node to the InputChains list (which will be
// the operands of the generated TokenFactor) if it's not an interior node.
SDNode *N = ChainNodesMatched[i];
if (N->getOpcode() != ISD::TokenFactor) {
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
continue;
unsigned int Max = 8192;

// Otherwise, add the input chain.
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
assert(InChain.getValueType() == MVT::Other && "Not a chain");
InputChains.push_back(InChain);
continue;
}
// Quick exit on trivial merge.
if (ChainNodesMatched.size() == 1)
return ChainNodesMatched[0]->getOperand(0);

// If we have a token factor, we want to add all inputs of the token factor
// that are not part of the pattern we're matching.
for (const SDValue &Op : N->op_values()) {
if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
Op.getNode()))
InputChains.push_back(Op);
}
// Add chains that aren't already added (internal). Peek through
// token factors.
std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
if (V.getValueType() != MVT::Other)
return;
if (V->getOpcode() == ISD::EntryToken)
return;
// Newly selected nodes (-1) are always added directly.
if (V->getNodeId() == -1)
InputChains.push_back(V);
else if (V->getOpcode() == ISD::TokenFactor) {
for (int i = 0, e = V->getNumOperands(); i != e; ++i)
AddChains(V->getOperand(i));
} else if (!Visited.count(V.getNode()))
InputChains.push_back(V);
};

for (auto *N : ChainNodesMatched) {
Worklist.push_back(N);
Visited.insert(N);
}

while (!Worklist.empty())
AddChains(Worklist.pop_back_val()->getOperand(0));

// Skip the search if there are no chain dependencies.
if (InputChains.size() == 0)
return CurDAG->getEntryNode();

// If one of these chains is a successor of input, we must have a
// node that is both the predecessor and successor of the
// to-be-merged nodes. Fail.
Visited.clear();
for (SDValue V : InputChains)
Worklist.push_back(V.getNode());

for (auto *N : ChainNodesMatched)
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
return SDValue();
// Fail conservatively if we stopped searching early.
if (Visited.size() >= Max)
return SDValue();

// Return merged chain.
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
Loading

0 comments on commit 27721e8

Please sign in to comment.