Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -385,10 +385,6 @@ defm atomic_cmp_swap : AtomicCmpSwapLocal ; -def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isFlatLoad(dyn_cast(N)); -}]>; - def flat_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return isFlatStore(dyn_cast(N)); Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -260,9 +260,9 @@ def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; class FlatStorePat : Pat < (node vt:$data, i64:$addr), Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -142,6 +142,12 @@ isConstantLoad(cast(N), -1); }]>; +def flat_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(cast(N)) || + isConstantLoad(cast(N), -1) || + isFlatLoad(dyn_cast(N)); +}]>; + def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isConstantLoad(cast(N), -1) && static_cast(getTargetLowering())->isMemOpUniform(N); Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.r600.read.tidig.y() #0 @@ -18,8 +19,9 @@ ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* ; instructions -; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 -; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { entry: @@ -50,8 +52,10 @@ ; Test moving an SMRD instruction to the VALU ; GCN-LABEL: {{^}}smrd_valu: +; FIXME: We should be using flat load for HSA. ; GCN: buffer_load_dword [[OUT:v[0-9]+]] -; GCN: buffer_store_dword [[OUT]] +; GCN-NOHSA: buffer_store_dword [[OUT]] +; GCN-HSA: flat_store_dword [[OUT]] define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 @@ -77,8 +81,9 @@ ; Test moving an SMRD with an immediate offset to the VALU ; GCN-LABEL: {{^}}smrd_valu2: -; GCN-NOT: v_add -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} +; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -91,12 +96,14 @@ ; Use a big offset that will use the SMRD literal offset on CI ; GCN-LABEL: {{^}}smrd_valu_ci_offset: -; GCN-NOT: v_add -; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} -; GCN: v_add_i32_e32 -; GCN: buffer_store_dword +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: buffer_store_dword +; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -109,13 +116,14 @@ } ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2: -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx2 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx2 +; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -128,15 +136,16 @@ } ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4: -; GCN-NOT: v_add -; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx4 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -152,25 +161,27 @@ ; CI. ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} - -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} + +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -184,35 +195,40 @@ ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} - -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} + +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 ; GCN: s_endpgm define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { @@ -227,9 +243,11 @@ } ; GCN-LABEL: {{^}}smrd_valu2_salu_user: -; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; GCN-NOHSA: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; GCN-HSA: flat_load_dword [[MOVED:v[0-9]+]], v[{{[0-9+:[0-9]+}}] ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] -; GCN: buffer_store_dword [[ADD]] +; GCN-NOHSA: buffer_store_dword [[ADD]] +; GCN-HSA: flat_store_dword [[ADD]] define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -242,7 +260,8 @@ } ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} +; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}} define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -254,8 +273,9 @@ } ; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset: -; GCN-NOT: v_add -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} +; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -267,8 +287,10 @@ } ; GCN-LABEL: {{^}}s_load_imm_v8i32: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() @@ -280,16 +302,18 @@ } ; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: buffer_store_dword +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: buffer_store_dword +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() @@ -319,10 +343,14 @@ } ; GCN-LABEL: {{^}}s_load_imm_v16i32: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -334,26 +362,30 @@ } ; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: buffer_store_dword +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: buffer_store_dword +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1