Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10055,19 +10055,19 @@ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned EarliestNodeUsed = 0; + unsigned LatestNodeUsed = 0; for (unsigned i=0; i < NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; @@ -10126,17 +10126,17 @@ StoredVal = DAG.getConstant(StoreInt, StoreTy); } - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); // ReplaceAllUsesWith will replace all uses that existed when it was @@ -10513,18 +10513,19 @@ if (NumElem < 2) return false; - // The earliest Node in the DAG. - unsigned EarliestNodeUsed = 0; - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + unsigned LatestNodeUsed = 0; for (unsigned i=1; i StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; @@ -10546,7 +10547,7 @@ false, false, false, FirstLoad->getAlignment()); - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); @@ -10564,12 +10565,12 @@ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); } - // Replace the first store with the new store. - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { // Remove all Store nodes. - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); Index: test/CodeGen/AArch64/merge-store.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/merge-store.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=aarch64 %s -o - | FileCheck %s + +@g0 = external global <3 x float>, align 16 +@g1 = external global <3 x float>, align 4 + +; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4 +; CHECK: ld1.s { v[[R0]] }[1], {{\[}}[[R1]]{{\]}} +; CHECK: str d[[R0]] + +define void @blam() { + %tmp4 = getelementptr inbounds <3 x float>, <3 x float>* @g1, i64 0, i64 0 + %tmp5 = load <3 x float>, <3 x float>* @g0, align 16 + %tmp6 = extractelement <3 x float> %tmp5, i64 0 + store float %tmp6, float* %tmp4 + %tmp7 = getelementptr inbounds float, float* %tmp4, i64 1 + %tmp8 = load <3 x float>, <3 x float>* @g0, align 16 + %tmp9 = extractelement <3 x float> %tmp8, i64 1 + store float %tmp9, float* %tmp7 + ret void; +} Index: test/CodeGen/X86/2012-11-28-merge-store-alias.ll =================================================================== --- test/CodeGen/X86/2012-11-28-merge-store-alias.ll +++ test/CodeGen/X86/2012-11-28-merge-store-alias.ll @@ -2,7 +2,7 @@ ; CHECK: merge_stores_can ; CHECK: callq foo -; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK: xorps %xmm0, %xmm0 ; CHECK-NEXT: movups %xmm0 ; CHECK: callq foo ; CHECK: ret