Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2071,6 +2071,9 @@ // problems during legalization, the emitted instructions to pack and unpack // the bytes again are not eliminated in the case of an unaligned copy. if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) { + if (VT.isVector()) + return scalarizeVectorLoad(LN, DAG); + SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); return DAG.getMergeValues(Ops, SDLoc(N)); @@ -2119,8 +2122,12 @@ // order problems during legalization, the emitted instructions to pack and // unpack the bytes again are not eliminated in the case of an unaligned // copy. - if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) + if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) { + if (VT.isVector()) + return scalarizeVectorStore(SN, DAG); + return expandUnalignedStore(SN, DAG); + } if (!IsFast) return SDValue(); Index: test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll =================================================================== --- test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll +++ test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll @@ -1,15 +1,13 @@ -; XFAIL: * -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s -; XUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s ; ; EG-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount: -; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} MSKOR [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] ; EG-NOT: BFE ; EG: ADD_INT ; EG: LSHL -; EG: ASHR [[RES]] +; EG: ASHR ; EG: LSHL -; EG: ASHR [[RES]] +; EG: ASHR ; EG: LSHR {{\*?}} [[ADDR]] ; Works with the align 2 removed Index: test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- test/CodeGen/AMDGPU/unaligned-load-store.ll +++ test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -552,4 +552,53 @@ ret void } +; SI-LABEL: {{^}}local_load_align1_v16i8: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ScratchSize: 0{{$}} +define void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(3)* %in) #0 { + %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 1 + store <16 x i8> %ld, <16 x i8> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}local_store_align1_v16i8: +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ScratchSize: 0{{$}} +define void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out) #0 { + store <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* %out, align 1 + ret void +} + attributes #0 = { nounwind }