Diff 242534

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,401 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::lowerImage(SDValue Op,
if (((VAddrScalarVT == MVT::f16) \|\| (VAddrScalarVT == MVT::i16))) {		if (((VAddrScalarVT == MVT::f16) \|\| (VAddrScalarVT == MVT::i16))) {
// Illegal to use a16 images		// Illegal to use a16 images
if (!ST->hasFeature(AMDGPU::FeatureR128A16))		if (!ST->hasFeature(AMDGPU::FeatureR128A16))
return Op;		return Op;

IsA16 = true;		IsA16 = true;
const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;		const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {		for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
SDValue AddrLo, AddrHi;		SDValue AddrLo;
// Push back extra arguments.		// Push back extra arguments.
if (i < DimIdx) {		if (i < DimIdx) {
AddrLo = Op.getOperand(i);		AddrLo = Op.getOperand(i);
} else {		} else {
AddrLo = Op.getOperand(i);
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also,		// Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
// in 1D, derivatives dx/dh and dx/dv are packed with undef.		// in 1D, derivatives dx/dh and dx/dv are packed with undef.
if (((i + 1) >= (AddrIdx + NumMIVAddrs)) \|\|		if (((i + 1) >= (AddrIdx + NumMIVAddrs)) \|\|
((NumGradients / 2) % 2 == 1 &&		((NumGradients / 2) % 2 == 1 &&
(i == DimIdx + (NumGradients / 2) - 1 \|\|		(i == DimIdx + (NumGradients / 2) - 1 \|\|
i == DimIdx + NumGradients - 1))) {		i == DimIdx + NumGradients - 1))) {
AddrHi = DAG.getUNDEF(MVT::f16);		AddrLo = Op.getOperand(i);
		if (AddrLo.getValueType() != MVT::i16)
		AddrLo = DAG.getBitcast(MVT::i16, Op.getOperand(i));
		AddrLo = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, AddrLo);
} else {		} else {
AddrHi = Op.getOperand(i + 1);		AddrLo = DAG.getBuildVector(VectorVT, DL,
		{Op.getOperand(i), Op.getOperand(i + 1)});
i++;		i++;
}		}
AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorVT,		AddrLo = DAG.getBitcast(MVT::f32, AddrLo);
{AddrLo, AddrHi});
AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
}		}
		rtaylorUnsubmitted Not Done Reply Inline Actions Couldn't this just be a SCALAR_TO_VECTOR to get the 0th element and create the vector? I think Nicolai mentioned this before? rtaylor: Couldn't this just be a SCALAR_TO_VECTOR to get the 0th element and create the vector? I think…
		nhaehnleUnsubmitted Not Done Reply Inline Actions Yes, please change this. nhaehnle: Yes, please change this.
VAddrs.push_back(AddrLo);		VAddrs.push_back(AddrLo);
}		}
} else {		} else {
for (unsigned i = 0; i < NumMIVAddrs; ++i)		for (unsigned i = 0; i < NumMIVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));		VAddrs.push_back(Op.getOperand(AddrIdx + i));
}		}

// If the register allocator cannot place the address registers contiguously		// If the register allocator cannot place the address registers contiguously
▲ Show 20 Lines • Show All 5,524 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s			; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s

	; GCN-LABEL: {{^}}gather4_2d:
	; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
				; GCN-LABEL: gather4_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
				rtaylorUnsubmitted Not Done Reply Inline Actions What is the point of combining the halves into vectors? I think this makes the test code unnecessarily complex and less readable. rtaylor: What is the point of combining the halves into vectors? I think this makes the test code…
				nhaehnleUnsubmitted Not Done Reply Inline Actions Ideally, this test should be changed into one that uses the update_llc_test_checks script; with that, having the inputs packed like this will highlight some inefficiencies in the codegen that we should fix. nhaehnle: Ideally, this test should be changed into one that uses the update_llc_test_checks script; with…
				rtaylorUnsubmitted Not Done Reply Inline Actions Nicolai, can you be more specific, what inefficiencies? rtaylor: Nicolai, can you be more specific, what inefficiencies?
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_cube:
	; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
	define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {			define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
				; GCN-LABEL: gather4_cube:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_2darray:
	; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
	define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {			define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
	arsenmUnsubmitted Not Done Reply Inline Actions These tests changes seem independent, but I think having the separate scalars has more value for testing the packing code actually works arsenm: These tests changes seem independent, but I think having the separate scalars has more value…
	sebastian-neAuthorUnsubmitted Not Done Reply Inline Actions Well, before this patch the packing did not work but the tests passed ;) To test the packing, I added the new test. nhaehnle said the amdgpu_ps calling convention is not build to handle f16 arguments, so it is not clear if they are packed or not? The llvm.amdgcn.image.a16.dim.ll test, which takes i16 instead of halfs, also uses packed arguments. It groups all arguments into <2 x i16>s. sebastian-ne: Well, before this patch the packing did not work but the tests passed ;) To test the packing, I…
	arsenmUnsubmitted Not Done Reply Inline Actions f16 arguments work. They are not packed arsenm: f16 arguments work. They are not packed
	sebastian-neAuthorUnsubmitted Not Done Reply Inline Actions I forgot to add, some testcases later down used e.g. v[2:9] as arguments, using the packed arguments ensures that always v[0:…] is used so the test should not be influenced by other optimizations or changes of the compiler. sebastian-ne: I forgot to add, some testcases later down used e.g. v[2:9] as arguments, using the packed…
				; GCN-LABEL: gather4_2darray:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_2d:
	; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
				; GCN-LABEL: gather4_c_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
				; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_cl_2d:
	; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
				; GCN-LABEL: gather4_cl_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_cl_2d:
	; GCN: image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
				; GCN-LABEL: gather4_c_cl_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_mov_b32_e32 v5, v3
				; GCN-NEXT: v_mov_b32_e32 v3, v0
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v1
				; GCN-NEXT: v_lshl_or_b32 v4, v2, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_b_2d:
	; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
				; GCN-LABEL: gather4_b_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
				; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_b_2d:
	; GCN: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
				; GCN-LABEL: gather4_c_b_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
				; GCN-NEXT: v_lshl_or_b32 v2, v3, 16, v2
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_b_cl_2d:
	; GCN: image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
				; GCN-LABEL: gather4_b_cl_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_mov_b32_e32 v5, v3
				; GCN-NEXT: v_mov_b32_e32 v3, v0
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v1
				; GCN-NEXT: v_lshl_or_b32 v4, v2, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
	; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
				; GCN-LABEL: gather4_c_b_cl_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: s_mov_b64 s[12:13], exec
				; GCN-NEXT: s_wqm_b64 exec, exec
				; GCN-NEXT: v_mov_b32_e32 v7, v4
				; GCN-NEXT: v_mov_b32_e32 v4, v0
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v2
				; GCN-NEXT: v_mov_b32_e32 v5, v1
				; GCN-NEXT: v_lshl_or_b32 v6, v3, 16, v0
				; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
				; GCN-NEXT: image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_l_2d:
	; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {			define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
				; GCN-LABEL: gather4_l_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
				; GCN-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_l_2d:
	; GCN: image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {			define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
				; GCN-LABEL: gather4_c_l_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: v_mov_b32_e32 v5, v3
				; GCN-NEXT: v_mov_b32_e32 v3, v0
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v1
				; GCN-NEXT: v_lshl_or_b32 v4, v2, 16, v0
				; GCN-NEXT: image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_lz_2d:
	; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
				; GCN-LABEL: gather4_lz_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
				; GCN-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_lz_2d:
	; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
				; GCN-LABEL: gather4_c_lz_2d:
				; GCN: ; %bb.0: ; %main_body
				; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
				; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
				; GCN-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
				; GCN-NEXT: s_waitcnt vmcnt(0)
				; GCN-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	Show All 19 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

Show All 13 Lines	main_body:
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GCN-LABEL: sample_2d:		; GCN-LABEL: sample_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {		define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
; GCN-LABEL: sample_3d:		; GCN-LABEL: sample_3d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v1, v2		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {		define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
; GCN-LABEL: sample_cube:		; GCN-LABEL: sample_cube:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v1, v2		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da		; GCN-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {		define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
; GCN-LABEL: sample_1darray:		; GCN-LABEL: sample_1darray:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da		; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {		define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
; GCN-LABEL: sample_2darray:		; GCN-LABEL: sample_2darray:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v1, v2		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da		; GCN-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {		define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
Show All 10 Lines	main_body:
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
; GCN-LABEL: sample_c_2d:		; GCN-LABEL: sample_c_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
; GCN-LABEL: sample_cl_1d:		; GCN-LABEL: sample_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_cl_2d:		; GCN-LABEL: sample_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v1, v2		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
; GCN-LABEL: sample_c_cl_1d:		; GCN-LABEL: sample_c_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_c_cl_2d:		; GCN-LABEL: sample_c_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v3		; GCN-NEXT: v_mov_b32_e32 v5, v3
		; GCN-NEXT: v_mov_b32_e32 v3, v0
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v4, v2, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {		define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
Show All 10 Lines	main_body:
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
; GCN-LABEL: sample_b_2d:		; GCN-LABEL: sample_b_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}
Show All 12 Lines	main_body:
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
; GCN-LABEL: sample_c_b_2d:		; GCN-LABEL: sample_c_b_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
		; GCN-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
; GCN-LABEL: sample_b_cl_1d:		; GCN-LABEL: sample_b_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_b_cl_2d:		; GCN-LABEL: sample_b_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v3		; GCN-NEXT: v_mov_b32_e32 v5, v3
		; GCN-NEXT: v_mov_b32_e32 v3, v0
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v4, v2, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
; GCN-LABEL: sample_c_b_cl_1d:		; GCN-LABEL: sample_c_b_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
		; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
		; GCN-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_c_b_cl_2d:		; GCN-LABEL: sample_c_b_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v4		; GCN-NEXT: v_mov_b32_e32 v7, v4
		; GCN-NEXT: v_mov_b32_e32 v4, v0
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v2
		; GCN-NEXT: v_mov_b32_e32 v5, v1
		; GCN-NEXT: v_lshl_or_b32 v6, v3, 16, v0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {		define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
; GCN-LABEL: sample_d_1d:		; GCN-LABEL: sample_d_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
; GCN-LABEL: sample_d_2d:		; GCN-LABEL: sample_d_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v3, v4		; GCN-NEXT: v_mov_b32_e32 v6, 0xffff
; GCN-NEXT: v_mov_b32_e32 v1, v0		; GCN-NEXT: v_and_b32_e32 v4, v6, v4
; GCN-NEXT: image_sample_d v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v2, v6, v2
		; GCN-NEXT: v_and_b32_e32 v0, v6, v0
		; GCN-NEXT: v_lshl_or_b32 v3, v3, 16, v2
		; GCN-NEXT: v_lshl_or_b32 v4, v5, 16, v4
		; GCN-NEXT: v_lshl_or_b32 v2, v1, 16, v0
		; GCN-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {		define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
; GCN-LABEL: sample_d_3d:		; GCN-LABEL: sample_d_3d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v4, v3		; GCN-NEXT: v_mov_b32_e32 v12, v8
; GCN-NEXT: v_mov_b32_e32 v3, v2		; GCN-NEXT: v_mov_b32_e32 v8, v2
; GCN-NEXT: v_mov_b32_e32 v7, v8		; GCN-NEXT: v_mov_b32_e32 v2, 0xffff
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_mov_b32_e32 v10, v5
; GCN-NEXT: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v5, v2, v6
		; GCN-NEXT: v_and_b32_e32 v3, v2, v3
		; GCN-NEXT: v_and_b32_e32 v0, v2, v0
		; GCN-NEXT: v_lshl_or_b32 v11, v7, 16, v5
		; GCN-NEXT: v_lshl_or_b32 v9, v4, 16, v3
		; GCN-NEXT: v_lshl_or_b32 v7, v1, 16, v0
		; GCN-NEXT: image_sample_d v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {		define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
; GCN-LABEL: sample_c_d_1d:		; GCN-LABEL: sample_c_d_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
; GCN-LABEL: sample_c_d_2d:		; GCN-LABEL: sample_c_d_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v2, v1		; GCN-NEXT: v_mov_b32_e32 v9, 0xffff
; GCN-NEXT: v_mov_b32_e32 v4, v5		; GCN-NEXT: v_mov_b32_e32 v8, v2
; GCN-NEXT: v_mov_b32_e32 v1, v0		; GCN-NEXT: v_mov_b32_e32 v7, v3
; GCN-NEXT: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v2, v9, v5
		; GCN-NEXT: v_and_b32_e32 v1, v9, v1
		; GCN-NEXT: v_lshl_or_b32 v3, v6, 16, v2
		; GCN-NEXT: v_and_b32_e32 v2, v9, v7
		; GCN-NEXT: v_lshl_or_b32 v2, v4, 16, v2
		; GCN-NEXT: v_lshl_or_b32 v1, v8, 16, v1
		; GCN-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
; GCN-LABEL: sample_d_cl_1d:		; GCN-LABEL: sample_d_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
		; GCN-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GCN-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_d_cl_2d:		; GCN-LABEL: sample_d_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v3, v2		; GCN-NEXT: v_mov_b32_e32 v7, 0xffff
; GCN-NEXT: v_mov_b32_e32 v5, v6		; GCN-NEXT: v_and_b32_e32 v4, v7, v4
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_and_b32_e32 v2, v7, v2
; GCN-NEXT: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v0, v7, v0
		; GCN-NEXT: v_lshl_or_b32 v5, v5, 16, v4
		; GCN-NEXT: v_lshl_or_b32 v4, v3, 16, v2
		; GCN-NEXT: v_lshl_or_b32 v3, v1, 16, v0
		; GCN-NEXT: image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
; GCN-LABEL: sample_c_d_cl_1d:		; GCN-LABEL: sample_c_d_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3
		; GCN-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GCN-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_c_d_cl_2d:		; GCN-LABEL: sample_c_d_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v4, v3		; GCN-NEXT: v_mov_b32_e32 v11, v7
; GCN-NEXT: v_mov_b32_e32 v6, v7		; GCN-NEXT: v_mov_b32_e32 v7, v0
; GCN-NEXT: v_mov_b32_e32 v3, v1		; GCN-NEXT: v_mov_b32_e32 v0, 0xffff
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_and_b32_e32 v5, v0, v5
; GCN-NEXT: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v3, v0, v3
		; GCN-NEXT: v_and_b32_e32 v0, v0, v1
		; GCN-NEXT: v_lshl_or_b32 v10, v6, 16, v5
		; GCN-NEXT: v_lshl_or_b32 v9, v4, 16, v3
		; GCN-NEXT: v_lshl_or_b32 v8, v2, 16, v0
		; GCN-NEXT: image_sample_c_d_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {		define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
; GCN-LABEL: sample_cd_1d:		; GCN-LABEL: sample_cd_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
; GCN-LABEL: sample_cd_2d:		; GCN-LABEL: sample_cd_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v3, v4		; GCN-NEXT: v_mov_b32_e32 v6, 0xffff
; GCN-NEXT: v_mov_b32_e32 v1, v0		; GCN-NEXT: v_and_b32_e32 v4, v6, v4
; GCN-NEXT: image_sample_cd v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v2, v6, v2
		; GCN-NEXT: v_and_b32_e32 v0, v6, v0
		; GCN-NEXT: v_lshl_or_b32 v3, v3, 16, v2
		; GCN-NEXT: v_lshl_or_b32 v4, v5, 16, v4
		; GCN-NEXT: v_lshl_or_b32 v2, v1, 16, v0
		; GCN-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {		define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
; GCN-LABEL: sample_c_cd_1d:		; GCN-LABEL: sample_c_cd_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
; GCN-LABEL: sample_c_cd_2d:		; GCN-LABEL: sample_c_cd_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v2, v1		; GCN-NEXT: v_mov_b32_e32 v9, 0xffff
; GCN-NEXT: v_mov_b32_e32 v4, v5		; GCN-NEXT: v_mov_b32_e32 v8, v2
; GCN-NEXT: v_mov_b32_e32 v1, v0		; GCN-NEXT: v_mov_b32_e32 v7, v3
; GCN-NEXT: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v2, v9, v5
		; GCN-NEXT: v_and_b32_e32 v1, v9, v1
		; GCN-NEXT: v_lshl_or_b32 v3, v6, 16, v2
		; GCN-NEXT: v_and_b32_e32 v2, v9, v7
		; GCN-NEXT: v_lshl_or_b32 v2, v4, 16, v2
		; GCN-NEXT: v_lshl_or_b32 v1, v8, 16, v1
		; GCN-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
; GCN-LABEL: sample_cd_cl_1d:		; GCN-LABEL: sample_cd_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
		; GCN-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GCN-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_cd_cl_2d:		; GCN-LABEL: sample_cd_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v3, v2		; GCN-NEXT: v_mov_b32_e32 v7, 0xffff
; GCN-NEXT: v_mov_b32_e32 v5, v6		; GCN-NEXT: v_and_b32_e32 v4, v7, v4
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_and_b32_e32 v2, v7, v2
; GCN-NEXT: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v0, v7, v0
		; GCN-NEXT: v_lshl_or_b32 v5, v5, 16, v4
		; GCN-NEXT: v_lshl_or_b32 v4, v3, 16, v2
		; GCN-NEXT: v_lshl_or_b32 v3, v1, 16, v0
		; GCN-NEXT: image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
; GCN-LABEL: sample_c_cd_cl_1d:		; GCN-LABEL: sample_c_cd_cl_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3
		; GCN-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GCN-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {		define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
; GCN-LABEL: sample_c_cd_cl_2d:		; GCN-LABEL: sample_c_cd_cl_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v4, v3		; GCN-NEXT: v_mov_b32_e32 v11, v7
; GCN-NEXT: v_mov_b32_e32 v6, v7		; GCN-NEXT: v_mov_b32_e32 v7, v0
; GCN-NEXT: v_mov_b32_e32 v3, v1		; GCN-NEXT: v_mov_b32_e32 v0, 0xffff
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_and_b32_e32 v5, v0, v5
; GCN-NEXT: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_and_b32_e32 v3, v0, v3
		; GCN-NEXT: v_and_b32_e32 v0, v0, v1
		; GCN-NEXT: v_lshl_or_b32 v10, v6, 16, v5
		; GCN-NEXT: v_lshl_or_b32 v9, v4, 16, v3
		; GCN-NEXT: v_lshl_or_b32 v8, v2, 16, v0
		; GCN-NEXT: image_sample_c_cd_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {		define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
; GCN-LABEL: sample_l_1d:		; GCN-LABEL: sample_l_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GCN-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {		define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
; GCN-LABEL: sample_l_2d:		; GCN-LABEL: sample_l_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v1, v2		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v0
		; GCN-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {		define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
; GCN-LABEL: sample_c_l_1d:		; GCN-LABEL: sample_c_l_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GCN-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {		define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
; GCN-LABEL: sample_c_l_2d:		; GCN-LABEL: sample_c_l_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v2, v3		; GCN-NEXT: v_mov_b32_e32 v5, v3
; GCN-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: v_mov_b32_e32 v3, v0
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v4, v2, 16, v0
		; GCN-NEXT: image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {		define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
; GCN-LABEL: sample_lz_1d:		; GCN-LABEL: sample_lz_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GCN-LABEL: sample_lz_2d:		; GCN-LABEL: sample_lz_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
		; GCN-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {		define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
; GCN-LABEL: sample_c_lz_1d:		; GCN-LABEL: sample_c_lz_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
; GCN-LABEL: sample_c_lz_2d:		; GCN-LABEL: sample_c_lz_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
		; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
		; GCN-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {		define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
; GCN-LABEL: sample_c_d_o_2darray_V1:		; GCN-LABEL: sample_c_d_o_2darray_V1:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v5, v4		; GCN-NEXT: v_mov_b32_e32 v13, v8
; GCN-NEXT: v_mov_b32_e32 v4, v2		; GCN-NEXT: v_mov_b32_e32 v8, v0
; GCN-NEXT: v_mov_b32_e32 v7, v8		; GCN-NEXT: v_mov_b32_e32 v0, 0xffff
; GCN-NEXT: v_mov_b32_e32 v3, v1		; GCN-NEXT: v_mov_b32_e32 v9, v1
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_and_b32_e32 v1, v0, v6
; GCN-NEXT: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da		; GCN-NEXT: v_lshl_or_b32 v12, v7, 16, v1
		; GCN-NEXT: v_and_b32_e32 v1, v0, v4
		; GCN-NEXT: v_and_b32_e32 v0, v0, v2
		; GCN-NEXT: v_lshl_or_b32 v11, v5, 16, v1
		; GCN-NEXT: v_lshl_or_b32 v10, v3, 16, v0
		; GCN-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 a16 da
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret float %v		ret float %v
}		}

define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {		define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
; GCN-LABEL: sample_c_d_o_2darray_V2:		; GCN-LABEL: sample_c_d_o_2darray_V2:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v5, v4		; GCN-NEXT: v_mov_b32_e32 v13, v8
; GCN-NEXT: v_mov_b32_e32 v4, v2		; GCN-NEXT: v_mov_b32_e32 v8, v0
; GCN-NEXT: v_mov_b32_e32 v7, v8		; GCN-NEXT: v_mov_b32_e32 v0, 0xffff
; GCN-NEXT: v_mov_b32_e32 v3, v1		; GCN-NEXT: v_mov_b32_e32 v9, v1
; GCN-NEXT: v_mov_b32_e32 v2, v0		; GCN-NEXT: v_and_b32_e32 v1, v0, v6
; GCN-NEXT: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da		; GCN-NEXT: v_lshl_or_b32 v12, v7, 16, v1
		; GCN-NEXT: v_and_b32_e32 v1, v0, v4
		; GCN-NEXT: v_and_b32_e32 v0, v0, v2
		; GCN-NEXT: v_lshl_or_b32 v11, v5, 16, v1
		; GCN-NEXT: v_lshl_or_b32 v10, v3, 16, v0
		; GCN-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 a16 da
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <2 x float> %v		ret <2 x float> %v
}		}

declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1		declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix lowering a16 image intrinsics
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 242534

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix lowering a16 image intrinsicsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 242534

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

[AMDGPU] Fix lowering a16 image intrinsics
ClosedPublic