Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -542,13 +542,15 @@ /// that potentially may be merged with St are placed in /// StoreNodes. void getStoreMergeCandidates(StoreSDNode *St, - SmallVectorImpl &StoreNodes); + SmallVectorImpl &StoreNodes, + SDNode *&Root); /// Helper function for MergeConsecutiveStores. Checks if /// candidate stores have indirect dependency through their /// operands. \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( - SmallVectorImpl &StoreNodes, unsigned NumStores); + SmallVectorImpl &StoreNodes, unsigned NumStores, + SDNode *Root); /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. @@ -13229,7 +13231,8 @@ } void DAGCombiner::getStoreMergeCandidates( - StoreSDNode *St, SmallVectorImpl &StoreNodes) { + StoreSDNode *St, SmallVectorImpl &StoreNodes, + SDNode *&RootNode) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); @@ -13328,7 +13331,7 @@ // FIXME: We should be able to climb and // descend TokenFactors to find candidates as well. - SDNode *RootNode = (St->getChain()).getNode(); + RootNode = St->getChain().getNode(); if (LoadSDNode *Ldn = dyn_cast(RootNode)) { RootNode = Ldn->getChain().getNode(); @@ -13359,21 +13362,39 @@ // through the chain). Check in parallel by searching up from // non-chain operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( - SmallVectorImpl &StoreNodes, unsigned NumStores) { + SmallVectorImpl &StoreNodes, unsigned NumStores, + SDNode *RootNode) { // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to // TokenFactor simplfication. - SmallPtrSet Visited; + SmallPtrSet Visited; SmallVector Worklist; - unsigned int Max = 1024; + + // RootNode is a predecessor to all candidates so we need not search + // past it. Add RootNode (peeking through TokenFactors). Do not count + // these towards size check. + + Worklist.push_back(RootNode); + while (!Worklist.empty()) { + auto N = Worklist.pop_back_val(); + if (N->getOpcode() == ISD::TokenFactor) { + for (SDValue Op : N->ops()) + Worklist.push_back(Op.getNode()); + } + Visited.insert(N); + } + + // Don't count pruning nodes towards max. + unsigned int Max = 1024 + Visited.size(); // Search Ops of store candidates. for (unsigned i = 0; i < NumStores; ++i) { - SDNode *n = StoreNodes[i].MemNode; - // Potential loops may happen only through non-chain operands - for (unsigned j = 1; j < n->getNumOperands(); ++j) - Worklist.push_back(n->getOperand(j).getNode()); + SDNode *N = StoreNodes[i].MemNode; + // Loops can only happen through the value operand(1). + auto *Op = N->getOperand(1).getNode(); + if (Visited.insert(Op).second) + Worklist.push_back(Op); } // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) @@ -13417,8 +13438,9 @@ return false; SmallVector StoreNodes; + SDNode *RootNode; // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes); + getStoreMergeCandidates(St, StoreNodes, RootNode); // Check if there is anything to merge. if (StoreNodes.size() < 2) @@ -13634,8 +13656,8 @@ } // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, - NumStoresToMerge)) { + if (!checkMergeStoreCandidatesForDependencies( + StoreNodes, NumStoresToMerge, RootNode)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); continue; @@ -13811,7 +13833,8 @@ } // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem)) { + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); continue; }