Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1069,7 +1069,11 @@ if (!Subtarget->hasUnalignedScratchAccess() && (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { - return false; + bool AlignedBy4 = Align >= 4; + if (IsFast) + *IsFast = AlignedBy4; + + return AlignedBy4; } if (Subtarget->hasUnalignedBufferAccess()) { Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -78,6 +78,16 @@ ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 +; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 +; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 + +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 + ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 @@ -90,15 +100,6 @@ ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 -; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 -; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 - -; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 -; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 ; GCN: s_swappc_b64 ; GCN-NOT: v_readlane_b32 s32 @@ -271,6 +272,16 @@ ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 +; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 +; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 + +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36 + ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 @@ -283,15 +294,7 @@ ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20 -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 -; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 -; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 -; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 -; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36 ; GCN: s_swappc_b64 ; GCN-NOT: v_readlane_b32 s32