diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5119,6 +5119,9 @@ } else if (Elts.size() == 2) { Type = MVT::v2f32; NumElts = 2; + } else if (Elts.size() == 3) { + Type = MVT::v3f32; + NumElts = 3; } else if (Elts.size() <= 4) { Type = MVT::v4f32; NumElts = 4; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll @@ -35,8 +35,8 @@ } ; GCN-LABEL: {{^}}image_load_mip_v4f16: -; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}} -; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}} +; UNPACKED: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} +; PACKED: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} ; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { main_body: @@ -46,8 +46,8 @@ } ; GCN-LABEL: {{^}}image_load_3d_v2f16: -; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}} -; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}} +; UNPACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} +; PACKED: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} ; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}} define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll @@ -1,34 +1,122 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,GFX8910,SIVI,PRT %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,PRT %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,NOPRT %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s - -; GCN-LABEL: {{^}}load_1d: -; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FIJI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=NOPRT %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s + define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1d_tfe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm tfe{{$}} -; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; VERDE-LABEL: load_1d_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_tfe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_tfe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v0, s[0:7] dmask:0xf unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, s9 ; encoding: [0x09,0x02,0x0c,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v5, s8 ; encoding: [0x08,0x02,0x0a,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[5:6], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x05,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -37,22 +125,82 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_lwe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}} -; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; VERDE-LABEL: load_1d_lwe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf unorm lwe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_lwe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf unorm lwe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_lwe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf unorm lwe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_lwe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v0, s[0:7] dmask:0xf unorm lwe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_lwe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, s9 ; encoding: [0x09,0x02,0x0c,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; encoding: [0x00,0x1f,0x02,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v5, s8 ; encoding: [0x08,0x02,0x0a,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[5:6], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x05,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -61,31 +209,122 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_2d: -; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; VERDE-LABEL: load_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_2d_tfe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) { +; VERDE-LABEL: load_2d_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2d_tfe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2d_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2d_tfe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:1], s[0:7] dmask:0xf unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2d_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, s9 ; encoding: [0x09,0x02,0x0e,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v6, s8 ; encoding: [0x08,0x02,0x0c,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -94,31 +333,126 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_3d: -; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { +; VERDE-LABEL: load_3d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_3d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_3d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_3d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_3d_tfe_lwe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) { +; VERDE-LABEL: load_3d_tfe_lwe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v7, v2 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_3d_tfe_lwe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v7, v2 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_3d_tfe_lwe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v7, v2 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_3d_tfe_lwe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm tfe lwe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_3d_tfe_lwe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; encoding: [0x10,0x1f,0x03,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -127,31 +461,126 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_cube: -; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { +; VERDE-LABEL: load_cube: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_cube: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_cube: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_cube: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_cube_lwe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +; VERDE-LABEL: load_cube_lwe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v7, v2 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_cube_lwe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v7, v2 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_cube_lwe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v7, v2 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_cube_lwe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm lwe da +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_cube_lwe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ; encoding: [0x18,0x1f,0x02,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -160,31 +589,122 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1darray: -; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) { +; VERDE-LABEL: load_1darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1darray_tfe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) { +; VERDE-LABEL: load_1darray_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe da +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1darray_tfe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe da +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1darray_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe da +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1darray_tfe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:1], s[0:7] dmask:0xf unorm tfe da +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1darray_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, s9 ; encoding: [0x09,0x02,0x0e,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; encoding: [0x20,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v6, s8 ; encoding: [0x08,0x02,0x0c,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -193,31 +713,126 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_2darray: -; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { +; VERDE-LABEL: load_2darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_2darray_lwe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +; VERDE-LABEL: load_2darray_lwe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v7, v2 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2darray_lwe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v7, v2 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2darray_lwe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v7, v2 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2darray_lwe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm lwe da +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2darray_lwe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; encoding: [0x28,0x1f,0x02,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -226,31 +841,126 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_2dmsaa: -; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { +; VERDE-LABEL: load_2dmsaa: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2dmsaa: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2dmsaa: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2dmsaa: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_2dmsaa_both: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +; VERDE-LABEL: load_2dmsaa_both: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v7, v2 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2dmsaa_both: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v7, v2 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2dmsaa_both: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v7, v2 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2dmsaa_both: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm tfe lwe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2dmsaa_both: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x30,0x1f,0x03,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -259,31 +969,130 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_2darraymsaa: -; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; VERDE-LABEL: load_2darraymsaa: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2darraymsaa: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2darraymsaa: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2darraymsaa: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_2darraymsaa_tfe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}} -; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; VERDE-LABEL: load_2darraymsaa_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v8, v3 +; VERDE-NEXT: v_mov_b32_e32 v7, v2 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_2darraymsaa_tfe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v8, v3 +; FIJI-NEXT: v_mov_b32_e32 v7, v2 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_2darraymsaa_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v8, v3 +; GFX6789-NEXT: v_mov_b32_e32 v7, v2 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_2darraymsaa_tfe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load v[0:4], v[0:3], s[0:7] dmask:0xf unorm tfe da +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2darraymsaa_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v8, v3 ; encoding: [0x03,0x03,0x10,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x38,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -292,31 +1101,122 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_mip_1d: -; GFX6789: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) { +; VERDE-LABEL: load_mip_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_1d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_1d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_mip_1d_lwe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe{{$}} -; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) { +; VERDE-LABEL: load_mip_1d_lwe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf unorm lwe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_1d_lwe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf unorm lwe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_1d_lwe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf unorm lwe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_1d_lwe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load_mip v[0:4], v[0:1], s[0:7] dmask:0xf unorm lwe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_1d_lwe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, s9 ; encoding: [0x09,0x02,0x0e,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; encoding: [0x00,0x1f,0x06,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v6, s8 ; encoding: [0x08,0x02,0x0c,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -325,31 +1225,126 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_mip_2d: -; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: load_mip_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_mip_2d_tfe: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v4, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT-NOT: v_mov_b32_e32 v3 -; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}} -; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; -; SIVI: buffer_store_dword v4, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4 define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: load_mip_2d_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v7, v2 +; VERDE-NEXT: v_mov_b32_e32 v6, v1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2d_tfe: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v5, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v7, v2 +; FIJI-NEXT: v_mov_b32_e32 v6, v1 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v4, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2d_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v7, v2 +; GFX6789-NEXT: v_mov_b32_e32 v6, v1 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v5, s8 +; GFX6789-NEXT: v_mov_b32_e32 v6, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[5:6], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2d_tfe: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v4, 0 +; NOPRT-NEXT: image_load_mip v[0:4], v[0:2], s[0:7] dmask:0xf unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v5, s8 +; NOPRT-NEXT: v_mov_b32_e32 v6, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[5:6], v4, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x1f,0x05,0xf0,0x05,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -358,14 +1353,51 @@ ret <4 x float> %v.vec } -; Make sure that error flag is returned even with dmask 0 -; GCN-LABEL: {{^}}load_1d_V2_tfe_dmask0: -; GCN: v_mov_b32_e32 v1, 0 -; PRT-DAG: v_mov_b32_e32 v2, v1 -; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}} -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}} define amdgpu_ps float @load_1d_V2_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_V2_tfe_dmask0: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v1, 0 +; VERDE-NEXT: v_mov_b32_e32 v2, v1 +; VERDE-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v2 +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_V2_tfe_dmask0: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v1, 0 +; FIJI-NEXT: v_mov_b32_e32 v2, v1 +; FIJI-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v2 +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_V2_tfe_dmask0: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v1, 0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v1 +; GFX6789-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v2 +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_V2_tfe_dmask0: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v1, 0 +; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v1 +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_V2_tfe_dmask0: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; encoding: [0x80,0x02,0x02,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; encoding: [0x01,0x03,0x04,0x7e] +; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x11,0x01,0xf0,0x00,0x01,0x00,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.err = extractvalue {<2 x float>, i32} %v, 1 @@ -373,13 +1405,51 @@ ret float %vv } -; GCN-LABEL: {{^}}load_1d_V1_tfe_dmask0: -; GCN: v_mov_b32_e32 v1, 0 -; PRT-DAG: v_mov_b32_e32 v2, v1 -; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}} -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}} define amdgpu_ps float @load_1d_V1_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_V1_tfe_dmask0: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v1, 0 +; VERDE-NEXT: v_mov_b32_e32 v2, v1 +; VERDE-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v2 +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_V1_tfe_dmask0: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v1, 0 +; FIJI-NEXT: v_mov_b32_e32 v2, v1 +; FIJI-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v2 +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_V1_tfe_dmask0: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v1, 0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v1 +; GFX6789-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v2 +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_V1_tfe_dmask0: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v1, 0 +; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v1 +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_V1_tfe_dmask0: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; encoding: [0x80,0x02,0x02,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; encoding: [0x01,0x03,0x04,0x7e] +; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x11,0x01,0xf0,0x00,0x01,0x00,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.err = extractvalue {float, i32} %v, 1 @@ -387,13 +1457,51 @@ ret float %vv } -; GCN-LABEL: {{^}}load_mip_2d_tfe_dmask0: -; GCN: v_mov_b32_e32 v3, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3 -; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}} -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}} define amdgpu_ps float @load_mip_2d_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: load_mip_2d_tfe_dmask0: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v3, 0 +; VERDE-NEXT: v_mov_b32_e32 v4, v3 +; VERDE-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v4 +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2d_tfe_dmask0: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v3, 0 +; FIJI-NEXT: v_mov_b32_e32 v4, v3 +; FIJI-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v4 +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2d_tfe_dmask0: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v3, 0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v3 +; GFX6789-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v4 +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2d_tfe_dmask0: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v3, 0 +; NOPRT-NEXT: image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x1 unorm tfe +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v3 +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d_tfe_dmask0: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e] +; GFX10-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x11,0x05,0xf0,0x00,0x03,0x00,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 0, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) %v.err = extractvalue {<4 x float>, i32} %v, 1 @@ -401,14 +1509,51 @@ ret float %vv } -; Do not make dmask 0 even if no result (other than tfe) is used. -; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse: -; GCN: v_mov_b32_e32 v3, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3 -; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}} -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}} define amdgpu_ps float @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: load_mip_2d_tfe_nouse: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v3, 0 +; VERDE-NEXT: v_mov_b32_e32 v4, v3 +; VERDE-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v4 +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2d_tfe_nouse: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v3, 0 +; FIJI-NEXT: v_mov_b32_e32 v4, v3 +; FIJI-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v4 +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2d_tfe_nouse: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v3, 0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v3 +; GFX6789-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v4 +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2d_tfe_nouse: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v3, 0 +; NOPRT-NEXT: image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x1 unorm tfe +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v3 +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d_tfe_nouse: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e] +; GFX10-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x11,0x05,0xf0,0x00,0x03,0x00,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) %v.err = extractvalue {<4 x float>, i32} %v, 1 @@ -416,13 +1561,51 @@ ret float %vv } -; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V2: -; GCN: v_mov_b32_e32 v3, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3 -; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}} -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}} define amdgpu_ps float @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: load_mip_2d_tfe_nouse_V2: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v3, 0 +; VERDE-NEXT: v_mov_b32_e32 v4, v3 +; VERDE-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v4 +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2d_tfe_nouse_V2: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v3, 0 +; FIJI-NEXT: v_mov_b32_e32 v4, v3 +; FIJI-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v4 +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2d_tfe_nouse_V2: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v3, 0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v3 +; GFX6789-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v4 +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2d_tfe_nouse_V2: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v3, 0 +; NOPRT-NEXT: image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x1 unorm tfe +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v3 +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d_tfe_nouse_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e] +; GFX10-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x11,0x05,0xf0,0x00,0x03,0x00,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 6, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) %v.err = extractvalue {<2 x float>, i32} %v, 1 @@ -430,13 +1613,51 @@ ret float %vv } -; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V1: -; GCN: v_mov_b32_e32 v3, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3 -; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}} -; NOPRT-NOT: v_mov_b32_e32 v2 -; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}} define amdgpu_ps float @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: load_mip_2d_tfe_nouse_V1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v3, 0 +; VERDE-NEXT: v_mov_b32_e32 v4, v3 +; VERDE-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 unorm tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v4 +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2d_tfe_nouse_V1: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v3, 0 +; FIJI-NEXT: v_mov_b32_e32 v4, v3 +; FIJI-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 unorm tfe +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v4 +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2d_tfe_nouse_V1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v3, 0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v3 +; GFX6789-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 unorm tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v4 +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2d_tfe_nouse_V1: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v3, 0 +; NOPRT-NEXT: image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x2 unorm tfe +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v3 +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d_tfe_nouse_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e] +; GFX10-NEXT: image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x12,0x05,0xf0,0x00,0x03,0x00,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 2, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) %v.err = extractvalue {float, i32} %v, 1 @@ -444,21 +1665,78 @@ ret float %vv } -; Check for dmask being materially smaller than return type -; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask3: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v3, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; NOPRT-NOT: v_mov_b32_e32 v2 -; GFX6789: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 unorm tfe{{$}} -; GFX10: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; -; SIVI: buffer_store_dword v3, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v3 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; VERDE-LABEL: load_1d_tfe_V4_dmask3: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v3, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_tfe_V4_dmask3: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v4, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v3, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_tfe_V4_dmask3: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v4, s8 +; GFX6789-NEXT: v_mov_b32_e32 v5, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[4:5], v3, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_tfe_V4_dmask3: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v3, 0 +; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v4, s8 +; NOPRT-NEXT: v_mov_b32_e32 v5, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[4:5], v3, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_tfe_V4_dmask3: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v5, s9 ; encoding: [0x09,0x02,0x0a,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x17,0x01,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v4, s8 ; encoding: [0x08,0x02,0x08,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[4:5], v3, off ; encoding: [0x00,0x80,0x70,0xdc,0x04,0x03,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -467,18 +1745,74 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask2: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v2, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; NOPRT-NOT: v_mov_b32_e32 v1 -; GFX6789: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 unorm tfe{{$}} -; GFX10: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; -; SIVI: buffer_store_dword v2, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v2 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; VERDE-LABEL: load_1d_tfe_V4_dmask2: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v2, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_tfe_V4_dmask2: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v3, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v2, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_tfe_V4_dmask2: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v3, s8 +; GFX6789-NEXT: v_mov_b32_e32 v4, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[3:4], v2, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_tfe_V4_dmask2: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v2, 0 +; NOPRT-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x6 unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v3, s8 +; NOPRT-NEXT: v_mov_b32_e32 v4, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[3:4], v2, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_tfe_V4_dmask2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, s9 ; encoding: [0x09,0x02,0x08,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x16,0x01,0xf0,0x03,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v3, s8 ; encoding: [0x08,0x02,0x06,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[3:4], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x02,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -487,16 +1821,70 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask1: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v1, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}} -; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; -; SIVI: buffer_store_dword v1, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; VERDE-LABEL: load_1d_tfe_V4_dmask1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v1, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_tfe_V4_dmask1: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v1, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_tfe_V4_dmask1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v2, s8 +; GFX6789-NEXT: v_mov_b32_e32 v3, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[2:3], v1, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_tfe_V4_dmask1: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v1, 0 +; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x8 unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v2, s8 +; NOPRT-NEXT: v_mov_b32_e32 v3, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[2:3], v1, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_tfe_V4_dmask1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, s9 ; encoding: [0x09,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x18,0x01,0xf0,0x02,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v2, s8 ; encoding: [0x08,0x02,0x04,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[2:3], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x01,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -505,16 +1893,70 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V2_dmask1: -; PRT: v_mov_b32_e32 v0, 0 -; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0 -; NOPRT: v_mov_b32_e32 v1, 0 -; NOPRT-NOT: v_mov_b32_e32 v0 -; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}} -; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; -; SIVI: buffer_store_dword v1, off, s[8:11], 0 -; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1 define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; VERDE-LABEL: load_1d_tfe_V2_dmask1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe +; VERDE-NEXT: s_mov_b32 s11, 0xf000 +; VERDE-NEXT: s_mov_b32 s10, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v1, off, s[8:11], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_tfe_V2_dmask1: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: v_mov_b32_e32 v2, v0 +; FIJI-NEXT: v_mov_b32_e32 v0, 0 +; FIJI-NEXT: v_mov_b32_e32 v1, v0 +; FIJI-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe +; FIJI-NEXT: s_mov_b32 s11, 0xf000 +; FIJI-NEXT: s_mov_b32 s10, -1 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: buffer_store_dword v1, off, s[8:11], 0 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_tfe_V2_dmask1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe +; GFX6789-NEXT: v_mov_b32_e32 v2, s8 +; GFX6789-NEXT: v_mov_b32_e32 v3, s9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[2:3], v1, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_tfe_V2_dmask1: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: v_mov_b32_e32 v1, 0 +; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x8 unorm tfe +; NOPRT-NEXT: v_mov_b32_e32 v2, s8 +; NOPRT-NEXT: v_mov_b32_e32 v3, s9 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: global_store_dword v[2:3], v1, off +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_tfe_V2_dmask1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, s9 ; encoding: [0x09,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x18,0x01,0xf0,0x02,0x00,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v2, s8 ; encoding: [0x08,0x02,0x04,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[2:3], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x01,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<2 x float>, i32} %v, 0 @@ -524,348 +1966,1276 @@ } -; GCN-LABEL: {{^}}load_mip_3d: -; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) { +; VERDE-LABEL: load_mip_3d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_3d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_3d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_3d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_mip_cube: -; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { +; VERDE-LABEL: load_mip_cube: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_cube: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_cube: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_cube: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_mip_1darray: -; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) { +; VERDE-LABEL: load_mip_1darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_1darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_1darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_1darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_mip_2darray: -; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { +; VERDE-LABEL: load_mip_2darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_mip_2darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_mip_2darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_mip_2darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}store_1d: -; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +; VERDE-LABEL: store_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_2d: -; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { +; VERDE-LABEL: store_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_2d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_2d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_3d: -; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) { +; VERDE-LABEL: store_3d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_3d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_3d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_3d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_cube: -; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { +; VERDE-LABEL: store_cube: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_cube: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_cube: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_cube: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_1darray: -; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) { +; VERDE-LABEL: store_1darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_2darray: -; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { +; VERDE-LABEL: store_2darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_2darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_2darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_2darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_2dmsaa: -; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) { +; VERDE-LABEL: store_2dmsaa: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_2dmsaa: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_2dmsaa: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_2dmsaa: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_2darraymsaa: -; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; VERDE-LABEL: store_2darraymsaa: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_2darraymsaa: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_2darraymsaa: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_2darraymsaa: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_mip_1d: -; GFX6789: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) { +; VERDE-LABEL: store_mip_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_mip_1d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_mip_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_mip_1d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_mip_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_mip_2d: -; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) { +; VERDE-LABEL: store_mip_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_mip_2d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_mip_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_mip_2d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_mip_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_mip_3d: -; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) { +; VERDE-LABEL: store_mip_3d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_mip_3d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_mip_3d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_mip_3d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_mip_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_mip_cube: -; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { +; VERDE-LABEL: store_mip_cube: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_mip_cube: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_mip_cube: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_mip_cube: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_mip_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_mip_1darray: -; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) { +; VERDE-LABEL: store_mip_1darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_mip_1darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_mip_1darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_mip_1darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_mip_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_mip_2darray: -; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { +; VERDE-LABEL: store_mip_2darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_mip_2darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_mip_2darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_mip_2darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_mip_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}getresinfo_1d: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_1d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_1d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_2d: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_2d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_2d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_3d: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_3d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_3d: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_3d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_3d: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_cube: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_cube: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_cube: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_cube: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_cube: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_1darray: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_1darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_1darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_1darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_1darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_2darray: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_2darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_2darray: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_2darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_2darray: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_2dmsaa: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_2dmsaa: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_2dmsaa: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_2dmsaa: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_2dmsaa: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}getresinfo_2darraymsaa: -; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} -; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) { +; VERDE-LABEL: getresinfo_2darraymsaa: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_2darraymsaa: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_2darraymsaa: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_2darraymsaa: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1d_V1: -; GFX6789: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}} -; GFX10: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_V1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_V1: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_V1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_V1: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x18,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret float %v } -; GCN-LABEL: {{^}}load_1d_V2: -; GFX6789: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}} -; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_V2: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_V2: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_V2: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_V2: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x19,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %v } -; GCN-LABEL: {{^}}store_1d_V1: -; GFX6789: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}} -; GFX10: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { +; VERDE-LABEL: store_1d_V1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1d_V1: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1d_V1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1d_V1: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1d_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x12,0x20,0xf0,0x01,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}store_1d_V2: -; GFX6789: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}} -; GFX10: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) { +; VERDE-LABEL: store_1d_V2: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1d_V2: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1d_V2: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1d_V2: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1d_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1c,0x20,0xf0,0x02,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; GCN-LABEL: {{^}}load_1d_glc: -; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}} -; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_glc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_glc: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_glc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_glc: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_glc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x3f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1d_slc: -; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}} -; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_slc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_slc: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_slc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_slc: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; encoding: [0x00,0x1f,0x00,0xf2,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1d_glc_slc: -; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}} -; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) { +; VERDE-LABEL: load_1d_glc_slc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: load_1d_glc_slc: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: load_1d_glc_slc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: load_1d_glc_slc: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_glc_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; encoding: [0x00,0x3f,0x00,0xf2,0x00,0x00,0x00,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) ret <4 x float> %v } -; GCN-LABEL: {{^}}store_1d_glc: -; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} -; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +; VERDE-LABEL: store_1d_glc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1d_glc: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1d_glc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1d_glc: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1d_glc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x3f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) ret void } -; GCN-LABEL: {{^}}store_1d_slc: -; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}} -; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +; VERDE-LABEL: store_1d_slc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1d_slc: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1d_slc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1d_slc: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; encoding: [0x00,0x1f,0x20,0xf2,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) ret void } -; GCN-LABEL: {{^}}store_1d_glc_slc: -; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}} -; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +; VERDE-LABEL: store_1d_glc_slc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: store_1d_glc_slc: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: store_1d_glc_slc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: store_1d_glc_slc: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: store_1d_glc_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; encoding: [0x00,0x3f,0x20,0xf2,0x04,0x00,0x00,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) ret void } -; GCN-LABEL: {{^}}getresinfo_dmask0: -; GCN-NOT: image -; GCN: ; return to shader part epilog define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 { +; VERDE-LABEL: getresinfo_dmask0: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: getresinfo_dmask0: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: getresinfo_dmask0: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: getresinfo_dmask0: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_dmask0: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %r } -; Ideally, the register allocator would avoid the wait here ; -; GCN-LABEL: {{^}}image_store_wait: -; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf -; SI: s_waitcnt expcnt(0) -; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf -; GCN: s_waitcnt vmcnt(0) -; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { +; VERDE-LABEL: image_store_wait: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt expcnt(0) +; VERDE-NEXT: image_load v[0:3], v4, s[8:15] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: image_store v[0:3], v4, s[16:23] dmask:0xf unorm +; VERDE-NEXT: s_endpgm +; +; FIJI-LABEL: image_store_wait: +; FIJI: ; %bb.0: ; %main_body +; FIJI-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; FIJI-NEXT: image_load v[0:3], v4, s[8:15] dmask:0xf unorm +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: image_store v[0:3], v4, s[16:23] dmask:0xf unorm +; FIJI-NEXT: s_endpgm +; +; GFX6789-LABEL: image_store_wait: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; GFX6789-NEXT: image_load v[0:3], v4, s[8:15] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: image_store v[0:3], v4, s[16:23] dmask:0xf unorm +; GFX6789-NEXT: s_endpgm +; +; NOPRT-LABEL: image_store_wait: +; NOPRT: ; %bb.0: ; %main_body +; NOPRT-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; NOPRT-NEXT: image_load v[0:3], v4, s[8:15] dmask:0xf unorm +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: image_store v[0:3], v4, s[16:23] dmask:0xf unorm +; NOPRT-NEXT: s_endpgm +; +; GFX10-LABEL: image_store_wait: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX10-NEXT: image_load v[0:3], v4, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: image_store v[0:3], v4, s[16:23] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x04,0x00] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] main_body: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0) %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0) @@ -873,12 +3243,61 @@ ret void } -; SI won't merge ds memory operations, because of the signed offset bug, so -; we only have check lines for VI+. -; GFX8910-LABEL: image_load_mmo -; GFX8910: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GFX8910: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { +; VERDE-LABEL: image_load_mmo: +; VERDE: ; %bb.0: +; VERDE-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 unorm +; VERDE-NEXT: v_mov_b32_e32 v2, 0 +; VERDE-NEXT: s_mov_b32 m0, -1 +; VERDE-NEXT: v_add_i32_e32 v3, vcc, 16, v0 +; VERDE-NEXT: ds_write_b32 v0, v2 +; VERDE-NEXT: ds_write_b32 v3, v2 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v1 +; VERDE-NEXT: s_waitcnt lgkmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; FIJI-LABEL: image_load_mmo: +; FIJI: ; %bb.0: +; FIJI-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 unorm +; FIJI-NEXT: v_mov_b32_e32 v3, 0 +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: ds_write2_b32 v0, v3, v3 offset1:4 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, v1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: image_load_mmo: +; GFX6789: ; %bb.0: +; GFX6789-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 unorm +; GFX6789-NEXT: v_mov_b32_e32 v3, 0 +; GFX6789-NEXT: ds_write2_b32 v0, v3, v3 offset1:4 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v1 +; GFX6789-NEXT: s_waitcnt lgkmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; NOPRT-LABEL: image_load_mmo: +; NOPRT: ; %bb.0: +; NOPRT-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 unorm +; NOPRT-NEXT: v_mov_b32_e32 v3, 0 +; NOPRT-NEXT: ds_write2_b32 v0, v3, v3 offset1:4 +; NOPRT-NEXT: s_waitcnt vmcnt(0) +; NOPRT-NEXT: v_mov_b32_e32 v0, v1 +; NOPRT-NEXT: s_waitcnt lgkmcnt(0) +; NOPRT-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_load_mmo: +; GFX10: ; %bb.0: +; GFX10-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x11,0x00,0xf0,0x01,0x01,0x00,0x00] +; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ds_write2_b32 v0, v3, v3 offset1:4 ; encoding: [0x00,0x04,0x38,0xd8,0x00,0x03,0x03,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog store float 0.000000e+00, float addrspace(3)* %lds %c0 = extractelement <2 x i32> %c, i32 0 %c1 = extractelement <2 x i32> %c, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll @@ -41,7 +41,7 @@ } ; GCN-LABEL: {{^}}gather4_c_cl_2d: -; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}} +; GCN: image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}} define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -57,7 +57,7 @@ } ; GCN-LABEL: {{^}}gather4_c_b_2d: -; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}} +; GCN: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}} define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -65,7 +65,7 @@ } ; GCN-LABEL: {{^}}gather4_b_cl_2d: -; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}} +; GCN: image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}} define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -89,7 +89,7 @@ } ; GCN-LABEL: {{^}}gather4_c_l_2d: -; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}} +; GCN: image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}} define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll @@ -4,8 +4,8 @@ ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16: -; UNPACKED: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}} -; PACKED: image_gather4_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}} +; UNPACKED: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}} +; PACKED: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}} ; GFX10: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps <2 x float> @image_gather4_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll @@ -12,7 +12,7 @@ } ; GCN-LABEL: {{^}}gather4_cube: -; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}} +; GFX6789: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da{{$}} ; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ; define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { main_body: @@ -21,7 +21,7 @@ } ; GCN-LABEL: {{^}}gather4_2darray: -; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}} +; GFX6789: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da{{$}} ; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { main_body: @@ -30,7 +30,7 @@ } ; GCN-LABEL: {{^}}gather4_c_2d: -; GFX6789: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} ; GFX10: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { main_body: @@ -39,7 +39,7 @@ } ; GCN-LABEL: {{^}}gather4_cl_2d: -; GFX6789: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} ; GFX10: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { main_body: @@ -57,7 +57,7 @@ } ; GCN-LABEL: {{^}}gather4_b_2d: -; GFX6789: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} ; GFX10: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: @@ -93,7 +93,7 @@ } ; GCN-LABEL: {{^}}gather4_l_2d: -; GFX6789: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} ; GFX10: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { main_body: @@ -120,7 +120,7 @@ } ; GCN-LABEL: {{^}}gather4_c_lz_2d: -; GFX6789: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} ; GFX10: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}gather4_o_2d: -; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GCN: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -82,7 +82,7 @@ } ; GCN-LABEL: {{^}}gather4_lz_o_2d: -; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}} define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll @@ -15,7 +15,7 @@ ; GCN-LABEL: {{^}}sample_3d: ; NONSA: v_mov_b32_e32 v3, v0 -; NONSA: image_sample v[0:3], v[1:4], +; NONSA: image_sample v[0:3], v[1:3], ; NSA: image_sample v[0:3], [v1, v2, v0], define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) { main_body: @@ -70,7 +70,7 @@ ; GCN-LABEL: {{^}}sample_contig_contig: ; GCN: image_sample_c_l v0, v[0:7], ; NSA: image_sample v1, v[5:7], -; NONSA: image_sample v1, v[5:8], +; NONSA: image_sample v1, v[5:7], define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) { main_body: %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll @@ -1,374 +1,624 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { +; GCN-LABEL: sample_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_2d: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { +; GCN-LABEL: sample_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_3d: -; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { +; GCN-LABEL: sample_3d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v1, v2 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cube: -; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}} define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { +; GCN-LABEL: sample_cube: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v1, v2 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_1darray: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}} define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { +; GCN-LABEL: sample_1darray: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_2darray: -; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}} define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { +; GCN-LABEL: sample_2darray: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v1, v2 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_1d: -; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { +; GCN-LABEL: sample_c_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_2d: -; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { +; GCN-LABEL: sample_c_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cl_1d: -; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { +; GCN-LABEL: sample_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cl_2d: -; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v1, v2 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cl_1d: -; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { +; GCN-LABEL: sample_c_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cl_2d: -; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_c_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v2, v3 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_1d: -; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { +; GCN-LABEL: sample_b_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_2d: -; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { +; GCN-LABEL: sample_b_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_1d: -; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { +; GCN-LABEL: sample_c_b_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_2d: -; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { +; GCN-LABEL: sample_c_b_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_cl_1d: -; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { +; GCN-LABEL: sample_b_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_cl_2d: -; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_b_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v2, v3 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_cl_1d: -; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { +; GCN-LABEL: sample_c_b_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_cl_2d: -; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_c_b_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_wqm_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v3, v4 +; GCN-NEXT: s_and_b64 exec, exec, s[12:13] +; GCN-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_1d: -; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { +; GCN-LABEL: sample_d_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_2d: -; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +; GCN-LABEL: sample_d_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v3, v4 +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: image_sample_d v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABAL: {{^}}sample_d_3d: -; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { +; GCN-LABEL: sample_d_3d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v4, v3 +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: v_mov_b32_e32 v7, v8 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_1d: -; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { +; GCN-LABEL: sample_c_d_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_2d: -; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +; GCN-LABEL: sample_c_d_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v2, v1 +; GCN-NEXT: v_mov_b32_e32 v4, v5 +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_cl_1d: -; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { +; GCN-LABEL: sample_d_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_cl_2d: -; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_d_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: v_mov_b32_e32 v5, v6 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_cl_1d: -; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { +; GCN-LABEL: sample_c_d_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_cl_2d: -; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_c_d_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v4, v3 +; GCN-NEXT: v_mov_b32_e32 v6, v7 +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_1d: -; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { +; GCN-LABEL: sample_cd_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_2d: -; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +; GCN-LABEL: sample_cd_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v3, v4 +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: image_sample_cd v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_1d: -; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { +; GCN-LABEL: sample_c_cd_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_2d: -; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +; GCN-LABEL: sample_c_cd_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v2, v1 +; GCN-NEXT: v_mov_b32_e32 v4, v5 +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_cl_1d: -; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { +; GCN-LABEL: sample_cd_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_cl_2d: -; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_cd_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: v_mov_b32_e32 v5, v6 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_cl_1d: -; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { +; GCN-LABEL: sample_c_cd_cl_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_cl_2d: -; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +; GCN-LABEL: sample_c_cd_cl_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v4, v3 +; GCN-NEXT: v_mov_b32_e32 v6, v7 +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_l_1d: -; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { +; GCN-LABEL: sample_l_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_l_2d: -; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { +; GCN-LABEL: sample_l_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v1, v2 +; GCN-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_l_1d: -; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { +; GCN-LABEL: sample_c_l_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_l_2d: -; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { +; GCN-LABEL: sample_c_l_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v2, v3 +; GCN-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_lz_1d: -; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { +; GCN-LABEL: sample_lz_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_lz_2d: -; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { +; GCN-LABEL: sample_lz_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_lz_1d: -; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { +; GCN-LABEL: sample_c_lz_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_lz_2d: -; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { +; GCN-LABEL: sample_c_lz_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1: -; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}} define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { +; GCN-LABEL: sample_c_d_o_2darray_V1: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v5, v4 +; GCN-NEXT: v_mov_b32_e32 v4, v2 +; GCN-NEXT: v_mov_b32_e32 v7, v8 +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v } -; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2: -; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}} define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { +; GCN-LABEL: sample_c_d_o_2darray_V2: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v5, v4 +; GCN-NEXT: v_mov_b32_e32 v4, v2 +; GCN-NEXT: v_mov_b32_e32 v7, v8 +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -1,22 +1,120 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=TONGA %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GFX81 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GFX9 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s -; GCN-LABEL: {{^}}image_sample_2d_f16: -; GFX89: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}} -; GFX10: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; TONGA-LABEL: image_sample_2d_f16: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: s_mov_b64 s[12:13], exec +; TONGA-NEXT: s_wqm_b64 exec, exec +; TONGA-NEXT: s_and_b64 exec, exec, s[12:13] +; TONGA-NEXT: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_2d_f16: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: s_mov_b64 s[12:13], exec +; GFX81-NEXT: s_wqm_b64 exec, exec +; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_2d_f16: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b64 s[12:13], exec +; GFX9-NEXT: s_wqm_b64 exec, exec +; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX9-NEXT: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_2d_f16: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog main_body: %tex = call half @llvm.amdgcn.image.sample.2d.f16.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret half %tex } -; GCN-LABEL: {{^}}image_sample_2d_f16_tfe: -; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0 -; PACKED: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16{{$}} -; UNPACKED: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16{{$}} define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, i32 addrspace(1)* inreg %out) { +; TONGA-LABEL: image_sample_2d_f16_tfe: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: s_mov_b64 s[14:15], exec +; TONGA-NEXT: s_wqm_b64 exec, exec +; TONGA-NEXT: v_mov_b32_e32 v2, 0 +; TONGA-NEXT: v_mov_b32_e32 v4, s12 +; TONGA-NEXT: v_mov_b32_e32 v5, s13 +; TONGA-NEXT: v_mov_b32_e32 v3, v2 +; TONGA-NEXT: s_and_b64 exec, exec, s[14:15] +; TONGA-NEXT: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_mov_b32_e32 v0, v2 +; TONGA-NEXT: flat_store_dword v[4:5], v3 +; TONGA-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_2d_f16_tfe: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: s_mov_b64 s[14:15], exec +; GFX81-NEXT: s_wqm_b64 exec, exec +; GFX81-NEXT: v_mov_b32_e32 v2, 0 +; GFX81-NEXT: v_mov_b32_e32 v4, s12 +; GFX81-NEXT: v_mov_b32_e32 v5, s13 +; GFX81-NEXT: v_mov_b32_e32 v3, v2 +; GFX81-NEXT: s_and_b64 exec, exec, s[14:15] +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: v_mov_b32_e32 v0, v2 +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: flat_store_dword v[4:5], v3 +; GFX81-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_2d_f16_tfe: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b64 s[14:15], exec +; GFX9-NEXT: s_wqm_b64 exec, exec +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mov_b32_e32 v4, s12 +; GFX9-NEXT: v_mov_b32_e32 v5, s13 +; GFX9-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] +; GFX9-NEXT: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: global_store_dword v[4:5], v3, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_2d_f16_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s14, exec_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, s12 +; GFX10-NEXT: v_mov_b32_e32 v5, s13 +; GFX10-NEXT: v_mov_b32_e32 v3, v2 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 +; GFX10-NEXT: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-NEXT: global_store_dword v[4:5], v3, off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: ; return to shader part epilog main_body: %tex = call {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) %tex.vec = extractvalue {half, i32} %tex, 0 @@ -25,22 +123,84 @@ ret half %tex.vec } -; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16: -; UNPACKED: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}} -; PACKED: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}} -; GFX10: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D d16{{$}} define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +; TONGA-LABEL: image_sample_c_d_1d_v2f16: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; TONGA-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_c_d_1d_v2f16: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_c_d_1d_v2f16: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_c_d_1d_v2f16: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D d16 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog main_body: %tex = call <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %r = bitcast <2 x half> %tex to float ret float %r } -; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16_tfe: -; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0 -; UNPACKED: image_sample_c_d v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16{{$}} -; PACKED: image_sample_c_d v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16{{$}} define amdgpu_ps <2 x float> @image_sample_c_d_1d_v2f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +; TONGA-LABEL: image_sample_c_d_1d_v2f16_tfe: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: v_mov_b32_e32 v4, 0 +; TONGA-NEXT: v_mov_b32_e32 v5, v4 +; TONGA-NEXT: v_mov_b32_e32 v6, v4 +; TONGA-NEXT: image_sample_c_d v[4:6], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_lshlrev_b32_e32 v0, 16, v5 +; TONGA-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: v_mov_b32_e32 v1, v6 +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_c_d_1d_v2f16_tfe: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: v_mov_b32_e32 v4, 0 +; GFX81-NEXT: v_mov_b32_e32 v5, v4 +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: image_sample_c_d v[4:5], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: v_mov_b32_e32 v0, v4 +; GFX81-NEXT: v_mov_b32_e32 v1, v5 +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_c_d_1d_v2f16_tfe: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, v4 +; GFX9-NEXT: image_sample_c_d v[4:5], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_c_d_1d_v2f16_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v5, v0 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 +; GFX10-NEXT: image_sample_c_d v[0:1], [v5, v4, v2, v3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog main_body: %tex = call {<2 x half>,i32} @llvm.amdgcn.image.sample.c.d.1d.v2f16i32.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) %tex.vec = extractvalue {<2 x half>, i32} %tex, 0 @@ -52,22 +212,120 @@ ret <2 x float> %r } -; GCN-LABEL: {{^}}image_sample_b_2d_v4f16: -; UNPACKED: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}} -; PACKED: image_sample_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}} -; GFX10: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +; TONGA-LABEL: image_sample_b_2d_v4f16: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: s_mov_b64 s[12:13], exec +; TONGA-NEXT: s_wqm_b64 exec, exec +; TONGA-NEXT: s_and_b64 exec, exec, s[12:13] +; TONGA-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; TONGA-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; TONGA-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_b_2d_v4f16: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: s_mov_b64 s[12:13], exec +; GFX81-NEXT: s_wqm_b64 exec, exec +; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_b_2d_v4f16: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b64 s[12:13], exec +; GFX9-NEXT: s_wqm_b64 exec, exec +; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX9-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_b_2d_v4f16: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog main_body: %tex = call <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %r = bitcast <4 x half> %tex to <2 x float> ret <2 x float> %r } -; GCN-LABEL: {{^}}image_sample_b_2d_v4f16_tfe: -; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0 -; UNPACKED: image_sample_b v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0xf tfe d16{{$}} -; PACKED: image_sample_b v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0xf tfe d16{{$}} define amdgpu_ps <4 x float> @image_sample_b_2d_v4f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +; TONGA-LABEL: image_sample_b_2d_v4f16_tfe: +; TONGA: ; %bb.0: ; %main_body +; TONGA-NEXT: s_mov_b64 s[12:13], exec +; TONGA-NEXT: s_wqm_b64 exec, exec +; TONGA-NEXT: v_mov_b32_e32 v3, 0 +; TONGA-NEXT: v_mov_b32_e32 v4, v3 +; TONGA-NEXT: v_mov_b32_e32 v5, v3 +; TONGA-NEXT: v_mov_b32_e32 v6, v3 +; TONGA-NEXT: v_mov_b32_e32 v7, v3 +; TONGA-NEXT: s_and_b64 exec, exec, s[12:13] +; TONGA-NEXT: image_sample_b v[3:7], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16 +; TONGA-NEXT: s_waitcnt vmcnt(0) +; TONGA-NEXT: v_lshlrev_b32_e32 v0, 16, v4 +; TONGA-NEXT: v_lshlrev_b32_e32 v1, 16, v6 +; TONGA-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; TONGA-NEXT: v_mov_b32_e32 v2, v7 +; TONGA-NEXT: ; return to shader part epilog +; +; GFX81-LABEL: image_sample_b_2d_v4f16_tfe: +; GFX81: ; %bb.0: ; %main_body +; GFX81-NEXT: s_mov_b64 s[12:13], exec +; GFX81-NEXT: s_wqm_b64 exec, exec +; GFX81-NEXT: v_mov_b32_e32 v3, 0 +; GFX81-NEXT: v_mov_b32_e32 v4, v3 +; GFX81-NEXT: v_mov_b32_e32 v5, v3 +; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX81-NEXT: s_nop 0 +; GFX81-NEXT: image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16 +; GFX81-NEXT: s_waitcnt vmcnt(0) +; GFX81-NEXT: v_mov_b32_e32 v0, v3 +; GFX81-NEXT: v_mov_b32_e32 v1, v4 +; GFX81-NEXT: v_mov_b32_e32 v2, v5 +; GFX81-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: image_sample_b_2d_v4f16_tfe: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: s_mov_b64 s[12:13], exec +; GFX9-NEXT: s_wqm_b64 exec, exec +; GFX9-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v4, v3 +; GFX9-NEXT: v_mov_b32_e32 v5, v3 +; GFX9-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX9-NEXT: image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: image_sample_b_2d_v4f16_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, v2 +; GFX10-NEXT: v_mov_b32_e32 v4, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, v0 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_sample_b v[0:2], v[3:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog main_body: %tex = call {<4 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v4f16i32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) %tex.vec = extractvalue {<4 x half>, i32} %tex, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -1,25 +1,99 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s -; GCN-LABEL: {{^}}sample_1d: -; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_1d_tfe: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GCN: v_mov_b32_e32 v2, v0 -; GCN: v_mov_b32_e32 v3, v0 -; GCN: v_mov_b32_e32 v4, v0 -; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe{{$}} -; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +; VERDE-LABEL: sample_1d_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[16:17], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: s_mov_b32 s15, 0xf000 +; VERDE-NEXT: s_mov_b32 s14, -1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[16:17] +; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[14:15], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v6, s12 +; GFX6789-NEXT: v_mov_b32_e32 v7, s13 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] +; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[6:7], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0f,0x81,0xf0,0x05,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -28,12 +102,43 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_1: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe{{$}} -; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x81,0xf0,0x02,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -45,12 +150,43 @@ ret <2 x float> %res } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_2: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe{{$}} -; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x02,0x81,0xf0,0x02,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -62,12 +198,43 @@ ret <2 x float> %res } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_3: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe{{$}} -; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_3: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x04,0x81,0xf0,0x02,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -79,12 +246,43 @@ ret <2 x float> %res } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_4: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe{{$}} -; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_4: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x08,0x81,0xf0,0x02,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -96,13 +294,46 @@ ret <2 x float> %res } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_12: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GCN: v_mov_b32_e32 v2, v0 -; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe{{$}} -; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x03,0x81,0xf0,0x03,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -116,13 +347,46 @@ ret <4 x float> %res } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_24: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GCN: v_mov_b32_e32 v2, v0 -; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe{{$}} -; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0a,0x81,0xf0,0x03,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -136,14 +400,49 @@ ret <4 x float> %res } -; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_134: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GCN: v_mov_b32_e32 v2, v0 -; GCN: v_mov_b32_e32 v3, v0 -; GFX6789: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe{{$}} -; GFX10: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_tfe_adjust_writemask_134: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0d,0x81,0xf0,0x04,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %res.vec = extractvalue {<4 x float>,i32} %v, 0 @@ -159,15 +458,64 @@ ret <4 x float> %res } -; GCN-LABEL: {{^}}sample_1d_lwe: -; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 -; GCN: v_mov_b32_e32 v2, v0 -; GCN: v_mov_b32_e32 v3, v0 -; GCN: v_mov_b32_e32 v4, v0 -; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe{{$}} -; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +; VERDE-LABEL: sample_1d_lwe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[16:17], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: v_mov_b32_e32 v5, v0 +; VERDE-NEXT: v_mov_b32_e32 v0, 0 +; VERDE-NEXT: s_mov_b32 s15, 0xf000 +; VERDE-NEXT: s_mov_b32 s14, -1 +; VERDE-NEXT: v_mov_b32_e32 v1, v0 +; VERDE-NEXT: v_mov_b32_e32 v2, v0 +; VERDE-NEXT: v_mov_b32_e32 v3, v0 +; VERDE-NEXT: v_mov_b32_e32 v4, v0 +; VERDE-NEXT: s_and_b64 exec, exec, s[16:17] +; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_lwe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[14:15], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: v_mov_b32_e32 v5, v0 +; GFX6789-NEXT: v_mov_b32_e32 v0, 0 +; GFX6789-NEXT: v_mov_b32_e32 v6, s12 +; GFX6789-NEXT: v_mov_b32_e32 v7, s13 +; GFX6789-NEXT: v_mov_b32_e32 v1, v0 +; GFX6789-NEXT: v_mov_b32_e32 v2, v0 +; GFX6789-NEXT: v_mov_b32_e32 v3, v0 +; GFX6789-NEXT: v_mov_b32_e32 v4, v0 +; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] +; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[6:7], v4, off +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_lwe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; encoding: [0x00,0x0f,0x82,0xf0,0x05,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -176,406 +524,1274 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}sample_2d: -; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; VERDE-LABEL: sample_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_3d: -; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { +; VERDE-LABEL: sample_3d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_3d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cube: -; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}} -; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { +; VERDE-LABEL: sample_cube: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cube: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_1darray: -; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}} -; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { +; VERDE-LABEL: sample_1darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x20,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_2darray: -; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}} -; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { +; VERDE-LABEL: sample_2darray: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_2darray: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_1d: -; GFX6789: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; VERDE-LABEL: sample_c_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_2d: -; GFX6789: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; VERDE-LABEL: sample_c_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cl_1d: -; GFX6789: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { +; VERDE-LABEL: sample_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cl_2d: -; GFX6789: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cl_1d: -; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { +; VERDE-LABEL: sample_c_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cl_2d: -; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_c_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_1d: -; GFX6789: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { +; VERDE-LABEL: sample_b_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_b_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_b_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_2d: -; GFX6789: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +; VERDE-LABEL: sample_b_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_b_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_b_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_1d: -; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { +; VERDE-LABEL: sample_c_b_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_b_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_b_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_2d: -; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { +; VERDE-LABEL: sample_c_b_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_b_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_b_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_cl_1d: -; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { +; VERDE-LABEL: sample_b_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_b_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_b_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_b_cl_2d: -; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_b_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_b_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_b_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_cl_1d: -; GFX6789: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { +; VERDE-LABEL: sample_c_b_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_b_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_b_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_b_cl_2d: -; GFX6789: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_c_b_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_b_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_b_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_1d: -; GFX6789: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +; VERDE-LABEL: sample_d_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_d_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_d_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_2d: -; GFX6789: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; VERDE-LABEL: sample_d_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_d_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_d_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_1d: -; GFX6789: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +; VERDE-LABEL: sample_c_d_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_2d: -; GFX6789: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; VERDE-LABEL: sample_c_d_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_cl_1d: -; GFX6789: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { +; VERDE-LABEL: sample_d_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_d_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_d_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_d_cl_2d: -; GFX6789: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_d_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_d_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_d_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_cl_1d: -; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +; VERDE-LABEL: sample_c_d_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_cl_2d: -; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_c_d_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_1d: -; GFX6789: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +; VERDE-LABEL: sample_cd_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cd_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cd_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_2d: -; GFX6789: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; VERDE-LABEL: sample_cd_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cd_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cd_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_1d: -; GFX6789: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +; VERDE-LABEL: sample_c_cd_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_cd_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_cd_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_2d: -; GFX6789: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; VERDE-LABEL: sample_c_cd_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_cd_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_cd_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_cl_1d: -; GFX6789: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { +; VERDE-LABEL: sample_cd_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cd_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cd_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_cd_cl_2d: -; GFX6789: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_cd_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_cd_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_cd_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_cl_1d: -; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +; VERDE-LABEL: sample_c_cd_cl_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_cd_cl_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_cd_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_cd_cl_2d: -; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; VERDE-LABEL: sample_c_cd_cl_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_cd_cl_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_cd_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_l_1d: -; GFX6789: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { +; VERDE-LABEL: sample_l_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_l_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_l_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_l_2d: -; GFX6789: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { +; VERDE-LABEL: sample_l_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_l_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_l_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_l_1d: -; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { +; VERDE-LABEL: sample_c_l_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_l_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_l_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_l_2d: -; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { +; VERDE-LABEL: sample_c_l_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_l_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_l_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_lz_1d: -; GFX6789: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_lz_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_lz_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_lz_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_lz_2d: -; GFX6789: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; VERDE-LABEL: sample_lz_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_lz_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_lz_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_lz_1d: -; GFX6789: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; VERDE-LABEL: sample_c_lz_1d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_lz_1d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_lz_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_lz_2d: -; GFX6789: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -; GFX10: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; VERDE-LABEL: sample_c_lz_2d: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_lz_2d: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_lz_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1: -; GFX6789: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}} -; GFX10: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { +; VERDE-LABEL: sample_c_d_o_2darray_V1: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_o_2darray_V1: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_o_2darray_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v } -; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1_tfe: -; GFX6789: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da{{$}} -; GFX10: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) { +; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v9, 0 +; VERDE-NEXT: v_mov_b32_e32 v10, v9 +; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da +; VERDE-NEXT: s_mov_b32 s15, 0xf000 +; VERDE-NEXT: s_mov_b32 s14, -1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v9 +; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v9, 0 +; GFX6789-NEXT: v_mov_b32_e32 v10, v9 +; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da +; GFX6789-NEXT: v_mov_b32_e32 v0, s12 +; GFX6789-NEXT: v_mov_b32_e32 v1, s13 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: global_store_dword v[0:1], v10, off +; GFX6789-NEXT: v_mov_b32_e32 v0, v9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v10, v0 ; encoding: [0x00,0x03,0x14,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; encoding: [0x01,0x03,0x12,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v11, s13 ; encoding: [0x0d,0x02,0x16,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x04,0xe9,0xf0,0x0a,0x00,0x40,0x00,0x09,0x02,0x03,0x04,0x05,0x06,0x07,0x08] +; GFX10-NEXT: v_mov_b32_e32 v10, s12 ; encoding: [0x0c,0x02,0x14,0x7e] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: global_store_dword v[10:11], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x01,0x7d,0x00] +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %v.vec = extractvalue {float, i32} %v, 0 @@ -584,19 +1800,67 @@ ret float %v.vec } -; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2: -; GFX6789: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}} -; GFX10: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { +; VERDE-LABEL: sample_c_d_o_2darray_V2: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_o_2darray_V2: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_o_2darray_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v } -; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2_tfe: -; GFX6789: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da{{$}} -; GFX10: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { +; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: v_mov_b32_e32 v9, 0 +; VERDE-NEXT: v_mov_b32_e32 v10, v9 +; VERDE-NEXT: v_mov_b32_e32 v11, v9 +; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: v_mov_b32_e32 v0, v9 +; VERDE-NEXT: v_mov_b32_e32 v1, v10 +; VERDE-NEXT: v_mov_b32_e32 v2, v11 +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: v_mov_b32_e32 v9, 0 +; GFX6789-NEXT: v_mov_b32_e32 v10, v9 +; GFX6789-NEXT: v_mov_b32_e32 v11, v9 +; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: v_mov_b32_e32 v0, v9 +; GFX6789-NEXT: v_mov_b32_e32 v1, v10 +; GFX6789-NEXT: v_mov_b32_e32 v2, v11 +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v11, v0 ; encoding: [0x00,0x03,0x16,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v10, v1 ; encoding: [0x01,0x03,0x14,0x7e] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e] +; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] +; GFX10-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x06,0xe9,0xf0,0x0b,0x00,0x40,0x00,0x0a,0x09,0x03,0x04,0x05,0x06,0x07,0x08] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %v.vec = extractvalue {<2 x float>, i32} %v, 0 @@ -610,125 +1874,456 @@ ret <4 x float> %res.2 } -; GCN-LABEL: {{^}}sample_1d_unorm: -; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}} -; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_unorm: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_unorm: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_unorm: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_1d_glc: -; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}} -; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_glc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_glc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_glc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x2f,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_1d_slc: -; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}} -; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_slc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_slc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x0f,0x80,0xf2,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2) ret <4 x float> %v } -; GCN-LABEL: {{^}}sample_1d_glc_slc: -; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}} -; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ; define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: sample_1d_glc_slc: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: sample_1d_glc_slc: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: sample_1d_glc_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ; encoding: [0x00,0x2f,0x80,0xf2,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3) ret <4 x float> %v } -; GCN-LABEL: {{^}}adjust_writemask_sample_0: -; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_0: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_0: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_0: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %elt0 = extractelement <4 x float> %r, i32 0 ret float %elt0 } -; GCN-LABEL: {{^}}adjust_writemask_sample_01 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_01: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_01: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_01: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> ret <2 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_012 -; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_012: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_012: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_012: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> ret <3 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_12 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_12: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_12: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_12: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> ret <2 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_03 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_03: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_03: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_03: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> ret <2 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_13 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_13: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_13: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_13: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> ret <2 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_123 -; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_123: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_123: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_123: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0e,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> ret <3 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled -; GCN-NOT: image define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_none_enabled: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_none_enabled: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_none_enabled: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %r } -; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_123_to_12: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_123_to_12: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_123_to_12: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> ret <2 x float> %out } -; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; VERDE-LABEL: adjust_writemask_sample_013_to_13: +; VERDE: ; %bb.0: ; %main_body +; VERDE-NEXT: s_mov_b64 s[12:13], exec +; VERDE-NEXT: s_wqm_b64 exec, exec +; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] +; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa +; VERDE-NEXT: s_waitcnt vmcnt(0) +; VERDE-NEXT: ; return to shader part epilog +; +; GFX6789-LABEL: adjust_writemask_sample_013_to_13: +; GFX6789: ; %bb.0: ; %main_body +; GFX6789-NEXT: s_mov_b64 s[12:13], exec +; GFX6789-NEXT: s_wqm_b64 exec, exec +; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] +; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa +; GFX6789-NEXT: s_waitcnt vmcnt(0) +; GFX6789-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: adjust_writemask_sample_013_to_13: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe] +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87] +; GFX10-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00] +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll @@ -27,7 +27,7 @@ } ; GCN-LABEL: {{^}}sample_c_l_2d: -; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -43,7 +43,7 @@ } ; GCN-LABEL: {{^}}sample_l_o_2d: -; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -51,7 +51,7 @@ } ; GCN-LABEL: {{^}}sample_c_l_o_1d: -; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -75,7 +75,7 @@ } ; GCN-LABEL: {{^}}gather4_c_l_2d: -; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -83,7 +83,7 @@ } ; GCN-LABEL: {{^}}gather4_l_o_2d: -; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll @@ -10,7 +10,7 @@ } ; GCN-LABEL: {{^}}sample_o_2d: -; GCN: image_sample_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -18,7 +18,7 @@ } ; GCN-LABEL: {{^}}sample_c_o_1d: -; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_c_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_c_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -34,7 +34,7 @@ } ; GCN-LABEL: {{^}}sample_cl_o_1d: -; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_cl_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -66,7 +66,7 @@ } ; GCN-LABEL: {{^}}sample_b_o_1d: -; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_b_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -258,7 +258,7 @@ } ; GCN-LABEL: {{^}}sample_l_o_1d: -; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_l_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -298,7 +298,7 @@ } ; GCN-LABEL: {{^}}sample_lz_o_2d: -; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -306,7 +306,7 @@ } ; GCN-LABEL: {{^}}sample_c_lz_o_1d: -; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} define amdgpu_ps <4 x float> @sample_c_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)