Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10760,6 +10760,8 @@ LatestNodeUsed = i; } + SmallVector Chains; + // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); @@ -10787,6 +10789,7 @@ if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); + Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. @@ -10806,6 +10809,8 @@ for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { @@ -10822,7 +10827,8 @@ StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -11274,6 +11280,10 @@ if (NumElem < 2) return false; + // Collect the chains from all merged stores. + SmallVector MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; igetChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; @@ -11300,12 +11312,17 @@ SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); + // The merged loads are required to have the same chain, so using the first's + // chain is acceptable. SDValue NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + SDValue NewStore = DAG.getStore( - LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); // Replace one of the loads with the new load. Index: test/CodeGen/X86/2012-11-28-merge-store-alias.ll =================================================================== --- test/CodeGen/X86/2012-11-28-merge-store-alias.ll +++ test/CodeGen/X86/2012-11-28-merge-store-alias.ll @@ -3,8 +3,8 @@ ; CHECK: merge_stores_can ; CHECK: callq foo ; CHECK: xorps %xmm0, %xmm0 -; CHECK-NEXT: movl 36(%rsp), %ebp ; CHECK-NEXT: movups %xmm0 +; CHECK-NEXT: movl 36(%rsp), %ebp ; CHECK: callq foo ; CHECK: ret declare i32 @foo([10 x i32]* ) Index: test/CodeGen/X86/merge-store-partially-alias-loads.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/merge-store-partially-alias-loads.ll @@ -0,0 +1,52 @@ +; REQUIRES: asserts +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s + +; It's OK to merge the load / store of the first 2 components, but +; they must not be placed on the same chain after merging. + +; X86-LABEL: {{^}}merge_store_partial_overlap_load: +; X86: movw ([[BASEREG:%[a-z]+]]), [[LO2:%[a-z]+]] +; X86-NEXT: movb 2([[BASEREG]]), [[HI1:%[a-z]+]] +; X86-NEXT: movw [[LO2]], 1([[BASEREG]]) +; X86-NEXT: movb [[HI1]], 3([[BASEREG]]) +; X86-NEXT: retq + +; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:' +; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken +; DBGDAG-DAG: [[TWO:t[0-9]+]]: i64 = Constant<2> +; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]], +; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], [[TWO]] + +; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load [[ENTRYTOKEN]], [[BASEPTR]], t{{[0-9]+}} +; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load [[ENTRYTOKEN]], [[ADDPTR]], t{{[0-9]+}} + +; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1 + +; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, t{{[0-9]+}} +; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store [[ST2]], [[LD1]], t{{[0-9]+}}, t{{[0-9]+}} +; DBGDAG: X86ISD::RET_FLAG [[ST1]], + +; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:' +define void @merge_store_partial_overlap_load([4 x i8]* %tmp) { + %tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0 + %tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1 + %tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2 + %tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3 + + %tmp9 = load i8, i8* %tmp8, align 1 ; base + 0 + %tmp11 = load i8, i8* %tmp10, align 1 ; base + 1 + %tmp13 = load i8, i8* %tmp12, align 1 ; base + 2 + + store i8 %tmp9, i8* %tmp10, align 1 ; base + 1 + store i8 %tmp11, i8* %tmp12, align 1 ; base + 2 + store i8 %tmp13, i8* %tmp14, align 1 ; base + 3 + +; Should emit +; load base + 0, base + 1 +; store base + 1, base + 2 +; load base + 2 +; store base + 3 + + ret void +}