Index: test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- test/CodeGen/AMDGPU/unaligned-load-store.ll +++ test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}unaligned_load_store_i16_local: @@ -65,13 +65,36 @@ ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 + +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + ; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl ; SI: ds_write_b8 ; SI: s_endpgm define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { @@ -80,6 +103,53 @@ ret void } +; SI-LABEL: {{^}}unaligned_load_store_v2i32_local: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl + +; SI: ds_write_b8 +; SI-NOT: v_or_b32 +; SI-NOT: v_lshl +; SI: ds_write_b8 +; SI: s_endpgm +define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) { + %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1 + store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1 + ret void +} + ; SI-LABEL: {{^}}unaligned_load_store_i64_global: ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte @@ -89,6 +159,10 @@ ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte + +; SI-NOT: v_or_ +; SI-NOT: v_lshl + ; SI: buffer_store_byte ; SI: buffer_store_byte ; SI: buffer_store_byte