Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -1235,6 +1235,10 @@ return GatherAllAliasesMaxDepth; } + unsigned getFindBetterChainsMaxChains() const { + return FindBetterChainsMaxChains; + } + /// Returns the size of the platform's va_list object. virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { return getPointerTy(DL).getSizeInBits(); @@ -2585,6 +2589,14 @@ /// expected to be merged. unsigned GatherAllAliasesMaxDepth; + // Number of chains that are considered when looking back for mergeable stores + // in DAGCombiner::findBetterNeighborChains. This should be larger than the + // maximum number of stores to be merged for a target (e.g. setting this to + // 8 will allow merging 8 consecutive i8 stores to one i64). Increasing this + // value further will allow discovering and merging potentially + // non-consecutive stores to be merged. + unsigned FindBetterChainsMaxChains; + /// Specify maximum number of store instructions per memset call. /// /// When lowering \@llvm.memset this field specifies the maximum number of Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17394,7 +17394,7 @@ NewMask.push_back(M < 0 ? -1 : Scale * M + s); return NewMask; }; - + SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { EVT SVT = VT.getScalarType(); @@ -18946,8 +18946,9 @@ // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. + // or instruction or a different base pointer. StoreSDNode *Index = St; + const unsigned MaxChains = TLI.getFindBetterChainsMaxChains(); while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) @@ -18974,7 +18975,8 @@ break; } ChainedStores.push_back(STn); - Index = STn; + // Stop if we have too many chains. + Index = ChainedStores.size() >= MaxChains ? nullptr : STn; break; } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { NextInChain = Ldn->getChain().getNode(); @@ -18993,7 +18995,6 @@ // required ordering. bool MadeChangeToSt = false; SmallVector, 8> BetterChains; - for (StoreSDNode *ChainedStore : ChainedStores) { SDValue Chain = ChainedStore->getChain(); SDValue BetterChain = FindBetterChain(ChainedStore, Chain); @@ -19004,7 +19005,6 @@ BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } - // Do all replacements after finding the replacements to make to avoid making // the chains more complicated by introducing new TokenFactors. for (auto Replacement : BetterChains) Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -558,6 +558,7 @@ PrefFunctionAlignment = 0; PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 18; + FindBetterChainsMaxChains = 32; MinStackArgumentAlignment = 1; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary.