diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -504,11 +504,95 @@ ret <2 x half> %v } -; FIXME: -; define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { -; %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) -; ret <3 x half> %v -; } +define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { +; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz: +; GFX8-UNPACKED: ; %bb.0: +; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 +; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 +; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 +; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 +; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 +; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 +; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 +; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 +; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 +; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff +; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0 +; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1 +; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-UNPACKED-NEXT: ; return to shader part epilog +; +; GFX8-PACKED-LABEL: load_1d_v3f16_xyz: +; GFX8-PACKED: ; %bb.0: +; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 +; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 +; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 +; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 +; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 +; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 +; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 +; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 +; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 +; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff +; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0 +; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) +; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-PACKED-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: load_1d_v3f16_xyz: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 +; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_v3f16_xyz: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 +; GFX10-NEXT: ; return to shader part epilog + %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret <3 x half> %v +} define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw: @@ -712,13 +796,72 @@ ret float %vv } -; FIXME: -; define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { -; %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) -; %v.err = extractvalue { <3 x half>, i32 } %v, 1 -; %vv = bitcast i32 %v.err to float -; ret float %vv -; } +define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { +; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: +; GFX8-UNPACKED: ; %bb.0: +; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 +; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 +; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 +; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 +; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 +; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 +; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 +; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 +; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe d16 +; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) +; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 +; GFX8-UNPACKED-NEXT: ; return to shader part epilog +; +; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: +; GFX8-PACKED: ; %bb.0: +; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 +; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 +; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 +; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 +; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 +; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 +; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 +; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 +; GFX8-PACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm tfe d16 +; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) +; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 +; GFX8-PACKED-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, s2 +; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 +; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 +; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm tfe d16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_v3f16_tfe_dmask_xyz: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s0, s2 +; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 +; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 +; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-NEXT: ; return to shader part epilog + %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %v.err = extractvalue { <3 x half>, i32 } %v, 1 + %vv = bitcast i32 %v.err to float + ret float %vv +} define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll @@ -16,19 +16,6 @@ ; UNPACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 ; UNPACKED-NEXT: s_endpgm ; -; PACKED-LABEL: image_store_f16: -; PACKED: ; %bb.0: -; PACKED-NEXT: s_mov_b32 s0, s2 -; PACKED-NEXT: s_mov_b32 s1, s3 -; PACKED-NEXT: s_mov_b32 s2, s4 -; PACKED-NEXT: s_mov_b32 s3, s5 -; PACKED-NEXT: s_mov_b32 s4, s6 -; PACKED-NEXT: s_mov_b32 s5, s7 -; PACKED-NEXT: s_mov_b32 s6, s8 -; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 -; PACKED-NEXT: s_endpgm -; ; GFX81-LABEL: image_store_f16: ; GFX81: ; %bb.0: ; GFX81-NEXT: s_mov_b32 s0, s2 @@ -41,6 +28,18 @@ ; GFX81-NEXT: s_mov_b32 s7, s9 ; GFX81-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 ; GFX81-NEXT: s_endpgm +; PACKED-LABEL: image_store_f16: +; PACKED: ; %bb.0: +; PACKED-NEXT: s_mov_b32 s0, s2 +; PACKED-NEXT: s_mov_b32 s1, s3 +; PACKED-NEXT: s_mov_b32 s2, s4 +; PACKED-NEXT: s_mov_b32 s3, s5 +; PACKED-NEXT: s_mov_b32 s4, s6 +; PACKED-NEXT: s_mov_b32 s5, s7 +; PACKED-NEXT: s_mov_b32 s6, s8 +; PACKED-NEXT: s_mov_b32 s7, s9 +; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 +; PACKED-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -60,6 +59,18 @@ ; UNPACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 ; UNPACKED-NEXT: s_endpgm ; +; GFX81-LABEL: image_store_v2f16: +; GFX81: ; %bb.0: +; GFX81-NEXT: s_mov_b32 s0, s2 +; GFX81-NEXT: s_mov_b32 s1, s3 +; GFX81-NEXT: s_mov_b32 s2, s4 +; GFX81-NEXT: s_mov_b32 s3, s5 +; GFX81-NEXT: s_mov_b32 s4, s6 +; GFX81-NEXT: s_mov_b32 s5, s7 +; GFX81-NEXT: s_mov_b32 s6, s8 +; GFX81-NEXT: s_mov_b32 s7, s9 +; GFX81-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 +; GFX81-NEXT: s_endpgm ; PACKED-LABEL: image_store_v2f16: ; PACKED: ; %bb.0: ; PACKED-NEXT: s_mov_b32 s0, s2 @@ -72,9 +83,36 @@ ; PACKED-NEXT: s_mov_b32 s7, s9 ; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16 ; PACKED-NEXT: s_endpgm + call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { +; UNPACKED-LABEL: image_store_v3f16: +; UNPACKED: ; %bb.0: +; UNPACKED-NEXT: v_mov_b32_e32 v5, v1 +; UNPACKED-NEXT: v_mov_b32_e32 v1, v2 +; UNPACKED-NEXT: s_mov_b32 s0, s2 +; UNPACKED-NEXT: s_mov_b32 s1, s3 +; UNPACKED-NEXT: s_mov_b32 s2, s4 +; UNPACKED-NEXT: s_mov_b32 s3, s5 +; UNPACKED-NEXT: s_mov_b32 s4, s6 +; UNPACKED-NEXT: s_mov_b32 s5, s7 +; UNPACKED-NEXT: s_mov_b32 s6, s8 +; UNPACKED-NEXT: s_mov_b32 s7, s9 +; UNPACKED-NEXT: v_mov_b32_e32 v4, v0 +; UNPACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; UNPACKED-NEXT: image_store v[1:3], v[4:5], s[0:7] dmask:0x7 unorm d16 +; UNPACKED-NEXT: s_endpgm ; -; GFX81-LABEL: image_store_v2f16: +; GFX81-LABEL: image_store_v3f16: ; GFX81: ; %bb.0: +; GFX81-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX81-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX81-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX81-NEXT: v_mov_b32_e32 v4, 0 +; GFX81-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX81-NEXT: v_mov_b32_e32 v4, 0 ; GFX81-NEXT: s_mov_b32 s0, s2 ; GFX81-NEXT: s_mov_b32 s1, s3 ; GFX81-NEXT: s_mov_b32 s2, s4 @@ -83,18 +121,13 @@ ; GFX81-NEXT: s_mov_b32 s5, s7 ; GFX81-NEXT: s_mov_b32 s6, s8 ; GFX81-NEXT: s_mov_b32 s7, s9 -; GFX81-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 +; GFX81-NEXT: v_mov_b32_e32 v5, v4 +; GFX81-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 unorm d16 ; GFX81-NEXT: s_endpgm - call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } -; FIXME: Broken -; define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { -; call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) -; ret void -; } - define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { ; UNPACKED-LABEL: image_store_v4f16: ; UNPACKED: ; %bb.0: @@ -114,19 +147,6 @@ ; UNPACKED-NEXT: image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16 ; UNPACKED-NEXT: s_endpgm ; -; PACKED-LABEL: image_store_v4f16: -; PACKED: ; %bb.0: -; PACKED-NEXT: s_mov_b32 s0, s2 -; PACKED-NEXT: s_mov_b32 s1, s3 -; PACKED-NEXT: s_mov_b32 s2, s4 -; PACKED-NEXT: s_mov_b32 s3, s5 -; PACKED-NEXT: s_mov_b32 s4, s6 -; PACKED-NEXT: s_mov_b32 s5, s7 -; PACKED-NEXT: s_mov_b32 s6, s8 -; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16 -; PACKED-NEXT: s_endpgm -; ; GFX81-LABEL: image_store_v4f16: ; GFX81: ; %bb.0: ; GFX81-NEXT: s_mov_b32 s0, s2 @@ -139,6 +159,18 @@ ; GFX81-NEXT: s_mov_b32 s7, s9 ; GFX81-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16 ; GFX81-NEXT: s_endpgm +; PACKED-LABEL: image_store_v4f16: +; PACKED: ; %bb.0: +; PACKED-NEXT: s_mov_b32 s0, s2 +; PACKED-NEXT: s_mov_b32 s1, s3 +; PACKED-NEXT: s_mov_b32 s2, s4 +; PACKED-NEXT: s_mov_b32 s3, s5 +; PACKED-NEXT: s_mov_b32 s4, s6 +; PACKED-NEXT: s_mov_b32 s5, s7 +; PACKED-NEXT: s_mov_b32 s6, s8 +; PACKED-NEXT: s_mov_b32 s7, s9 +; PACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16 +; PACKED-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll @@ -72,6 +72,12 @@ ret float %x } + +; GCN-LABEL: {{^}}image_load_3d_v3f16: +; UNPACKED: image_load v[0:2], v[0:2], s[0:7] dmask:0x7 unorm d16 +; PACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 unorm d16 +; GFX81: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 unorm d16 +; GFX10: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm d16{{$}} define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { main_body: %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -103,6 +109,11 @@ ret void } +; GCN-LABEL: {{^}}image_store_v3f16: +; UNPACKED: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16 +; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm d16 +; GFX81: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm d16 +; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { main_body: %r = bitcast <2 x float> %in to <4 x half>