Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -115,6 +115,11 @@ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); +static cl::opt LimitInDependenceCheck( + "limit-in-dependence-check", cl::Hidden, cl::init(100), + cl::desc("Limit the number of times to invoke dependence check for a " + "candidate set. ")); + namespace { class DAGCombiner { @@ -152,6 +157,13 @@ /// which have not yet been combined to the worklist. SmallPtrSet CombinedNodes; + /// Map from StoreNode to the pair of RootNode and count. This is used + /// to track how many times the same candidate set is considered in + /// dependence check for store merging. Usually the StoreNode and + /// the RootNode determine the candidate set and the count shows how + /// many times the set appears in dependence check. + DenseMap> StoreRootCountMap; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +248,7 @@ void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -635,7 +648,7 @@ /// \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores, - SDNode *RootNode); + SDNode *RootNode, SDNode *St); /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. @@ -15476,7 +15489,7 @@ // non-chain operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores, - SDNode *RootNode) { + SDNode *RootNode, SDNode *St) { // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to @@ -15485,6 +15498,20 @@ SmallPtrSet Visited; SmallVector Worklist; + // Everytime we see the initial store node and the same root node, + // we assume we have the same candidate set. We limit the number + // of times we perform the dependence check for the candidate set + // in order to avoid excessive compile time in extreme case. + auto RootCount = StoreRootCountMap.find(St); + if (StoreRootCountMap.find(St) != StoreRootCountMap.end() && + RootCount->second.first == RootNode) { + RootCount->second.second++; + if (RootCount->second.second > LimitInDependenceCheck) + return false; + } else { + StoreRootCountMap[St] = {RootNode, 1}; + } + // RootNode is a predecessor to all candidates so we need not search // past it. Add RootNode (peeking through TokenFactors). Do not count // these towards size check. @@ -15726,7 +15753,7 @@ // Check that we can merge these candidates without causing a cycle. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { + RootNode, St)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; continue; @@ -15792,7 +15819,7 @@ // Check that we can merge these candidates without causing a cycle. if (!checkMergeStoreCandidatesForDependencies( - StoreNodes, NumStoresToMerge, RootNode)) { + StoreNodes, NumStoresToMerge, RootNode, St)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); NumConsecutiveStores -= NumStoresToMerge; @@ -15975,7 +16002,7 @@ // Check that we can merge these candidates without causing a cycle. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { + RootNode, St)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); NumConsecutiveStores -= NumElem;