Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -452,6 +452,8 @@ /// \return True if some memory operations were changed. bool MergeConsecutiveStores(StoreSDNode *N); + bool isOverlap(StoreSDNode* prev, StoreSDNode* next); + /// \brief Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) /// @@ -11210,6 +11212,19 @@ return true; } +bool DAGCombiner::isOverlap(StoreSDNode* prev, StoreSDNode* next) { + BaseIndexOffset prevPtr = BaseIndexOffset::match(prev->getBasePtr(), DAG); + BaseIndexOffset nextPtr = BaseIndexOffset::match(next->getBasePtr(), DAG); + if (!prevPtr.equalBaseIndex(nextPtr)) + return false; + + int64_t diff = std::abs(nextPtr.Offset - prevPtr.Offset); + if (diff && prev->getValue().getValueSizeInBits() / 8 > diff) + return true; + + return false; +} + void DAGCombiner::getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl &StoreNodes, SmallVectorImpl &AliasLoadNodes) { @@ -11285,15 +11300,25 @@ break; // The stored memory type must be the same. - if (Index->getMemoryVT() != MemVT) - break; + if (Index->getMemoryVT() != MemVT) { + if (StoreNodes.size() == 1) { + StoreNodes.pop_back(); + MemVT = Index->getMemoryVT(); + continue; + } else { + // Merge what we have + break; + } + } - // We do not allow under-aligned stores in order to prevent - // overriding stores. NOTE: this is a bad hack. Alignment SHOULD - // be irrelevant here; what MATTERS is that we not move memory - // operations that potentially overlap past each-other. - if (Index->getAlignment() < MemVT.getStoreSize()) - break; + if (StoreNodes.size() != 0) { + StoreSDNode* chain = dyn_cast(StoreNodes[0].MemNode); + while (chain && chain != Index) { + if (isOverlap(Index, chain)) + return; + chain = dyn_cast(chain->getChain().getNode()); + } + } // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -11392,7 +11417,8 @@ (LHS.OffsetFromBase == RHS.OffsetFromBase && LHS.SequenceNum < RHS.SequenceNum); }); - + MemVT = StoreNodes[0].MemNode->getMemoryVT(); + ElementSizeBytes = MemVT.getSizeInBits() / 8; // Scan the memory operations on the chain and find the first non-consecutive // store memory address. unsigned LastConsecutiveStore = 0; Index: test/CodeGen/X86/dag-merge-fast-accesses.ll =================================================================== --- test/CodeGen/X86/dag-merge-fast-accesses.ll +++ test/CodeGen/X86/dag-merge-fast-accesses.ll @@ -51,19 +51,11 @@ } -;; TODO: FAST *should* be: -;; movups (%rdi), %xmm0 -;; movups %xmm0, 40(%rdi) -;; ..but is not currently. See the UseAA FIXME in DAGCombiner.cpp -;; visitSTORE. - define void @merge_vec_load_and_stores(i64 *%ptr) { ; FAST-LABEL: merge_vec_load_and_stores: ; FAST: # BB#0: -; FAST-NEXT: movq (%rdi), %rax -; FAST-NEXT: movq 8(%rdi), %rcx -; FAST-NEXT: movq %rax, 40(%rdi) -; FAST-NEXT: movq %rcx, 48(%rdi) +; FAST-NEXT: movups (%rdi), %xmm0 +; FAST-NEXT: movups %xmm0, 40(%rdi) ; FAST-NEXT: retq ; ; SLOW-LABEL: merge_vec_load_and_stores: