Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -298,6 +298,9 @@ SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); + + SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); + SDValue visitSTORE(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); @@ -375,6 +378,10 @@ /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Do FindBetterChain for a store and any possibly adjacent stores on + /// consecutive chains. + bool findBetterNeighborChains(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -10777,6 +10784,34 @@ EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; + + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + + if (UseAA) { + // Look at other users of the same chain. Stores on the same chain do not + // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized + // to be on the same chain, so don't bother looking at adjacent chains. + + SDValue Chain = St->getChain(); + for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { + if (StoreSDNode *OtherST = dyn_cast(*I)) { + + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + + if (OtherST->getMemoryVT() != MemVT) + continue; + + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); + } + } + + return; + } + while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) @@ -11210,6 +11245,31 @@ return true; } +SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { + SDLoc SL(ST); + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + } else { + ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), + ST->getMemOperand()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, SL, + MVT::Other, ST->getChain(), ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Don't add users to work list. + return CombineTo(ST, Token, false); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); @@ -11340,31 +11400,12 @@ UseAA = false; #endif if (UseAA && ST->isUnindexed()) { - // Walk up chain skipping non-aliasing memory nodes. - SDValue BetterChain = FindBetterChain(N, Chain); - - // If there is a better chain. - if (Chain != BetterChain) { - SDValue ReplStore; - - // Replace the chain to avoid dependency. - if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemoryVT(), ST->getMemOperand()); - } else { - ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemOperand()); - } - - // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, Chain, ReplStore); - - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Don't add users to work list. - return CombineTo(N, Token, false); + // Walk up chain skipping non-aliasing memory nodes, on this store and any + // adjacent stores. + if (findBetterNeighborChains(ST)) { + // replaceStoreChain uses CombineTo, which handled all of the worklist + // manipulation. Return the original node to not do anything else. + return SDValue(ST, 0); } } @@ -14147,6 +14188,85 @@ return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return false; + + SmallVector ChainedStores; + ChainedStores.push_back(St); + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + EVT MemVT = St->getMemoryVT(); + unsigned Seq = 0; + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 0)->hasOneUse()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + if (Index->isVolatile() || Index->isIndexed()) + break; + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (true) { + if (StoreSDNode *STn = dyn_cast(NextInChain)) { + // We found a store node. Use it for the next iteration. + ChainedStores.push_back(STn); + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = nullptr; + break; + } + } + } + + bool MadeChange = false; + SmallVector, 8> BetterChains; + + for (StoreSDNode *ChainedStore : ChainedStores) { + SDValue Chain = ChainedStore->getChain(); + SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + + if (Chain != BetterChain) { + MadeChange = true; + BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); + } + } + + // Do all replacements after finding the replacements to make to avoid making + // the chains more complicated by introducing new TokenFactors. + for (auto Replacement : BetterChains) + replaceStoreChain(Replacement.first, Replacement.second); + + return MadeChange; +} + /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { Index: test/CodeGen/PowerPC/memcpy-vec.ll =================================================================== --- test/CodeGen/PowerPC/memcpy-vec.ll +++ test/CodeGen/PowerPC/memcpy-vec.ll @@ -14,8 +14,11 @@ ; PWR7-LABEL: @foo1 ; PWR7-NOT: bl memcpy -; PWR7: ld {{[0-9]+}}, {{[0-9]+}}(4) -; PWR7: std {{[0-9]+}}, {{[0-9]+}}(3) +; PWR7-DAG: li [[OFFSET:[0-9]+]], 16 +; PWR7-DAG: lxvd2x [[TMP0:[0-9]+]], 4, [[OFFSET]] +; PWR7-DAG: stxvd2x [[TMP0]], 0, 3 +; PWR7-DAG: lxvd2x [[TMP1:[0-9]+]], 0, 4 +; PWR7-DAG: stxvd2x [[TMP1]], 0, 3 ; PWR7: blr ; PWR8-LABEL: @foo1 Index: test/CodeGen/SystemZ/vec-args-04.ll =================================================================== --- test/CodeGen/SystemZ/vec-args-04.ll +++ test/CodeGen/SystemZ/vec-args-04.ll @@ -21,17 +21,25 @@ ; CHECK-VEC-DAG: vrepib %v31, 8 ; CHECK-VEC: brasl %r14, bar@PLT ; + +; CHECK-STACK: .LCPI0_0: +; CHECK-STACK: .quad 795741901033570304 # 0xb0b0b0b00000000 +; CHECK-STACK: .quad 868082074056920076 # 0xc0c0c0c0c0c0c0c +; CHECK-STACK: .LCPI0_1: +; CHECK-STACK: .quad 648518346341351424 # 0x900000000000000 +; CHECK-STACK: .quad 723390690146385920 # 0xa0a000000000000 + ; CHECK-STACK-LABEL: foo: ; CHECK-STACK: aghi %r15, -192 -; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304 -; CHECK-STACK-DAG: stg [[REG1]], 160(%r15) -; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570 -; CHECK-STACK-DAG: stg [[REG2]], 168(%r15) -; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099 -; CHECK-STACK-DAG: stg [[REG3]], 176(%r15) -; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108 -; CHECK-STACK-DAG: oilf [[REG4]], 202116108 -; CHECK-STACK-DAG: stg [[REG4]], 176(%r15) + +; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0 +; CHECK-STACK-DAG: vl [[VREG0:%v[0-9]+]], 0([[REG1]]) +; CHECK-STACK-DAG: vst [[VREG0]], 176(%r15) + +; CHECK-STACK-DAG: larl [[REG2:%r[0-9]+]], .LCPI0_1 +; CHECK-STACK-DAG: vl [[VREG1:%v[0-9]+]], 0([[REG2]]) +; CHECK-STACK-DAG: vst [[VREG1]], 160(%r15) + ; CHECK-STACK: brasl %r14, bar@PLT call void @bar (<1 x i8> , Index: test/CodeGen/SystemZ/vec-args-05.ll =================================================================== --- test/CodeGen/SystemZ/vec-args-05.ll +++ test/CodeGen/SystemZ/vec-args-05.ll @@ -14,12 +14,14 @@ ; CHECK-VEC-DAG: vrepib %v26, 2 ; CHECK-VEC: brasl %r14, bar@PLT ; +; CHECK-STACK: .LCPI0_0: +; CHECK-STACK: .quad 217020518463700992 # 0x303030300000000 +; CHECK-STACK: .quad 289360691284934656 # 0x404040400000000 ; CHECK-STACK-LABEL: foo: ; CHECK-STACK: aghi %r15, -176 -; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027 -; CHECK-STACK-DAG: stg [[REG1]], 160(%r15) -; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036 -; CHECK-STACK-DAG: stg [[REG2]], 168(%r15) +; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0 +; CHECK-STACK-DAG: vl [[VREG:%v[0-9]+]], 0([[REG1]]) +; CHECK-STACK-DAG: vst [[VREG]], 160(%r15) ; CHECK-STACK: brasl %r14, bar@PLT call void (<4 x i8>, <4 x i8>, ...) @bar