Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -271,6 +271,15 @@ return true; } + /// isStoreBitCastBeneficial() - Mirror of isLoadBitCastBeneficial(). Return + /// true if the following transform is beneficial. + /// + /// (store (y (conv x)), y*)) -> (store x, (x*)) + virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const { + // Default to the same logic as stores. + return isLoadBitCastBeneficial(StoreVT, BitcastVT); + } + /// Return true if it is expected to be cheaper to do a store of a non-zero /// vector constant with the given size and type for the address space than to /// store the individual scalar element constants. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11864,14 +11864,11 @@ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { EVT SVT = Value.getOperand(0).getValueType(); - - if ((!LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) { + if (((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { unsigned OrigAlign = ST->getAlignment(); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - SVT.getTypeForEVT(*DAG.getContext())); - - bool Fast; + bool Fast = false; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, ST->getAddressSpace(), OrigAlign, &Fast) && Fast) { Index: test/CodeGen/AMDGPU/reduce-store-width-alignment.ll =================================================================== --- test/CodeGen/AMDGPU/reduce-store-width-alignment.ll +++ test/CodeGen/AMDGPU/reduce-store-width-alignment.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4: ; GCN: s_load_dwordx2 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) { +define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 { %x.bc = bitcast <2 x i32> %x to <4 x i16> store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4 ret void @@ -13,7 +13,7 @@ ; GCN: s_load_dwordx4 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) { +define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 { %x.bc = bitcast <4 x i32> %x to <8 x i16> store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4 ret void @@ -22,18 +22,17 @@ ; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4: ; GCN: s_load_dwordx2 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) { +define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 { %x.bc = bitcast <2 x i32> %x to <4 x i16> store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4 ret void } - ; GCN-LABEL: {{^}}store_v4i32_as_v2i64_align_4: ; GCN: s_load_dwordx4 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) { +define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 { %x.bc = bitcast <4 x i32> %x to <2 x i64> store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4 ret void @@ -45,8 +44,10 @@ ; GCN: buffer_load_ushort ; GCN: buffer_load_ushort ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) { +define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 { %x.bc = bitcast <4 x i16> %x to <2 x i32> store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4 ret void } + +attributes #0 = { nounwind }