Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -387,6 +387,13 @@ unsigned SequenceNum; }; + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a + /// constant build_vector of the stored constant values in Stores. + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef Stores, + EVT Ty) const; + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. @@ -10576,6 +10583,17 @@ }; } // namespace +SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef Stores, + EVT Ty) const { + SmallVector BuildVector; + + for (const MemOpLink &Store : Stores) + BuildVector.push_back(cast(Store.MemNode)->getValue()); + return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); +} + bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector) { @@ -10606,12 +10624,7 @@ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - // A vector store with a constant source implies that the constant is - // zero; we only handle merging stores of constant zeros because the zero - // can be materialized without a load. - // It may be beneficial to loosen this restriction to allow non-zero - // store merging. - StoredVal = DAG.getConstant(0, DL, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); } else { SmallVector Ops; for (unsigned i = 0; i < NumElem ; ++i) { Index: test/CodeGen/R600/merge-stores.ll =================================================================== --- test/CodeGen/R600/merge-stores.ll +++ test/CodeGen/R600/merge-stores.ll @@ -89,7 +89,11 @@ } ; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32: -; GCN: buffer_store_dwordx2 +; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0 +; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}} +; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]] +; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]] +; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* @@ -99,7 +103,11 @@ } ; GCN-LABEL: {{^}}merge_global_store_4_constants_i32: -; GCN: buffer_store_dwordx4 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}} +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}} +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}} +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}} +; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 Index: test/CodeGen/X86/2012-11-28-merge-store-alias.ll =================================================================== --- test/CodeGen/X86/2012-11-28-merge-store-alias.ll +++ test/CodeGen/X86/2012-11-28-merge-store-alias.ll @@ -3,6 +3,7 @@ ; CHECK: merge_stores_can ; CHECK: callq foo ; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: movl 36(%rsp), %ebp ; CHECK-NEXT: movups %xmm0 ; CHECK: callq foo ; CHECK: ret