Diff 240589

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,437 Lines • ▼ Show 20 Lines	for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
((NumGradients / 2) % 2 == 1 &&		((NumGradients / 2) % 2 == 1 &&
(i == DimIdx + (NumGradients / 2) - 1 \|\|		(i == DimIdx + (NumGradients / 2) - 1 \|\|
i == DimIdx + NumGradients - 1))) {		i == DimIdx + NumGradients - 1))) {
AddrHi = DAG.getUNDEF(MVT::f16);		AddrHi = DAG.getUNDEF(MVT::f16);
} else {		} else {
AddrHi = Op.getOperand(i + 1);		AddrHi = Op.getOperand(i + 1);
i++;		i++;
}		}
AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorVT,		SDValue Vec = DAG.getNode(ISD::UNDEF, DL, VectorVT);
{AddrLo, AddrHi});		Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, Vec, AddrLo,
AddrLo = DAG.getBitcast(MVT::i32, AddrLo);		DAG.getConstant(0, DL, MVT::i32));
		rtaylorUnsubmitted Not Done Reply Inline Actions Couldn't this just be a SCALAR_TO_VECTOR to get the 0th element and create the vector? I think Nicolai mentioned this before? rtaylor: Couldn't this just be a SCALAR_TO_VECTOR to get the 0th element and create the vector? I think…
		nhaehnleUnsubmitted Not Done Reply Inline Actions Yes, please change this. nhaehnle: Yes, please change this.
		Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, Vec, AddrHi,
		DAG.getConstant(1, DL, MVT::i32));
		AddrLo = DAG.getBitcast(MVT::i32, Vec);
}		}
VAddrs.push_back(AddrLo);		VAddrs.push_back(AddrLo);
}		}
} else {		} else {
for (unsigned i = 0; i < NumMIVAddrs; ++i)		for (unsigned i = 0; i < NumMIVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));		VAddrs.push_back(Op.getOperand(AddrIdx + i));
}		}

▲ Show 20 Lines • Show All 5,557 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GFX9 %s

				arsenmUnsubmitted Not Done Reply Inline Actions if this is an encoding test, it should use -show-mc-encoding? arsenm: if this is an encoding test, it should use -show-mc-encoding?
				sebastian-neAuthorUnsubmitted Not Done Reply Inline Actions Maybe the name was chosen uncarefully, this test should ensure that all components are actually converted and packed into one register. Before this patch, a16 is not working because the y component just gets ignored, the compiler only ensures that the x component is present. Still, all other a16 tests are passing. So I tried to introduce this test as a regression test. It should make sure that actually all components are "encoded" into the packed coordinates. I guess renaming it to llvm.amdgcn.image.a16.pack.ll would make more sense? sebastian-ne: Maybe the name was chosen uncarefully, this test should ensure that all components are actually…
				rtaylorUnsubmitted Not Done Reply Inline Actions Should we be checking the encoding for all existing tests instead? rtaylor: Should we be checking the encoding for all existing tests instead?
				nhaehnleUnsubmitted Not Done Reply Inline Actions Yes, rename the file to avoid confusion. Yes, we should add encoding tests, but that should happen in a separate patch that actually fixes the encoding for gfx1010 :) nhaehnle: Yes, rename the file to avoid confusion. Yes, we should add encoding tests, but that should…
				rtaylorUnsubmitted Not Done Reply Inline Actions I'm not sure what this file is doing that the current tests don't do? ; GCN-LABEL: {{^}}sample_2d: ; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}} define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v } rtaylor: I'm not sure what this file is doing that the current tests don't do? ; GCN-LABEL…
				define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %smpl, float %sf, float %tf) {
				; GFX9-LABEL: load_3d:
				; GFX9: ; %bb.0: ; %main_body
				; GFX9-NEXT: s_mov_b64 s[12:13], exec
				; GFX9-NEXT: s_wqm_b64 exec, exec
				; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
				; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
				; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
				; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
				; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
				; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
				; GFX9-NEXT: s_waitcnt vmcnt(0)
				; GFX9-NEXT: ; return to shader part epilog
				rtaylorUnsubmitted Not Done Reply Inline Actions Do you need all this checking or can you just check the image_sample instruction? If so, it would be cleaner and easier to read and easier to tell what you are checking with just the GFX9: image_sample... rtaylor: Do you need all this checking or can you just check the image_sample instruction? If so, it…
				sebastian-neAuthorUnsubmitted Not Done Reply Inline Actions Testing the image_sample instruction is not enough. In the previous tests, only the image_sample was tested and the tests passed. However, a16 was not working because packing the f16 arguments into the registers did work only partially. So the previous tests were incomplete. This is exactly the part that this test fills. The significant part of this test is not the image_sample, but the conversion and packing of all arguments into the registers. It tests the part that went wrong before this patch. Is the intent of this test clear? I can try to explain it more detailed. With my updated patch, the other tests check more details, so we could also remove this small test. Any opinions on this? sebastian-ne: Testing the image_sample instruction is not enough. In the previous tests, only the…
				main_body:
				%s = fptrunc float %sf to half
				%t = fptrunc float %tf to half
				%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %smpl, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0

				attributes #0 = { nounwind readonly }

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s			; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s

	; GCN-LABEL: {{^}}gather4_2d:			; GCN-LABEL: {{^}}gather4_2d:
	; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
				rtaylorUnsubmitted Not Done Reply Inline Actions What is the point of combining the halves into vectors? I think this makes the test code unnecessarily complex and less readable. rtaylor: What is the point of combining the halves into vectors? I think this makes the test code…
				nhaehnleUnsubmitted Not Done Reply Inline Actions Ideally, this test should be changed into one that uses the update_llc_test_checks script; with that, having the inputs packed like this will highlight some inefficiencies in the codegen that we should fix. nhaehnle: Ideally, this test should be changed into one that uses the update_llc_test_checks script; with…
				rtaylorUnsubmitted Not Done Reply Inline Actions Nicolai, can you be more specific, what inefficiencies? rtaylor: Nicolai, can you be more specific, what inefficiencies?
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_cube:			; GCN-LABEL: {{^}}gather4_cube:
	; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}			; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
	define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {			define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%face = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_2darray:			; GCN-LABEL: {{^}}gather4_2darray:
	; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}			; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
	define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {			define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	arsenmUnsubmitted Not Done Reply Inline Actions These tests changes seem independent, but I think having the separate scalars has more value for testing the packing code actually works arsenm: These tests changes seem independent, but I think having the separate scalars has more value…
	sebastian-neAuthorUnsubmitted Not Done Reply Inline Actions Well, before this patch the packing did not work but the tests passed ;) To test the packing, I added the new test. nhaehnle said the amdgpu_ps calling convention is not build to handle f16 arguments, so it is not clear if they are packed or not? The llvm.amdgcn.image.a16.dim.ll test, which takes i16 instead of halfs, also uses packed arguments. It groups all arguments into <2 x i16>s. sebastian-ne: Well, before this patch the packing did not work but the tests passed ;) To test the packing, I…
	arsenmUnsubmitted Not Done Reply Inline Actions f16 arguments work. They are not packed arsenm: f16 arguments work. They are not packed
	sebastian-neAuthorUnsubmitted Not Done Reply Inline Actions I forgot to add, some testcases later down used e.g. v[2:9] as arguments, using the packed arguments ensures that always v[0:…] is used so the test should not be influenced by other optimizations or changes of the compiler. sebastian-ne: I forgot to add, some testcases later down used e.g. v[2:9] as arguments, using the packed…
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%slice = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_2d:			; GCN-LABEL: {{^}}gather4_c_2d:
	; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_cl_2d:			; GCN-LABEL: {{^}}gather4_cl_2d:
	; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_cl_2d:			; GCN-LABEL: {{^}}gather4_c_cl_2d:
	; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_b_2d:			; GCN-LABEL: {{^}}gather4_b_2d:
	; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_b_2d:			; GCN-LABEL: {{^}}gather4_c_b_2d:
	; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_b_cl_2d:			; GCN-LABEL: {{^}}gather4_b_cl_2d:
	; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_b_cl_2d:			; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
	; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_l_2d:			; GCN-LABEL: {{^}}gather4_l_2d:
	; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {			define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%lod = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_l_2d:			; GCN-LABEL: {{^}}gather4_c_l_2d:
	; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {			define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%lod = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_lz_2d:			; GCN-LABEL: {{^}}gather4_lz_2d:
	; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}gather4_c_lz_2d:			; GCN-LABEL: {{^}}gather4_c_lz_2d:
	; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}			; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
	define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	Show All 18 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s			; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s
	; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {			define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_2d:			; GCN-LABEL: {{^}}sample_2d:
	; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_3d:			; GCN-LABEL: {{^}}sample_3d:
	; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {			define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%r = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cube:			; GCN-LABEL: {{^}}sample_cube:
	; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}			; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
	define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {			define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%face = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_1darray:			; GCN-LABEL: {{^}}sample_1darray:
	; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}			; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}
	define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {			define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%slice = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_2darray:			; GCN-LABEL: {{^}}sample_2darray:
	; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}			; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
	define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {			define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%slice = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_1d:			; GCN-LABEL: {{^}}sample_c_1d:
	; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {			define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_2d:			; GCN-LABEL: {{^}}sample_c_2d:
	; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cl_1d:			; GCN-LABEL: {{^}}sample_cl_1d:
	; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%clamp = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cl_2d:			; GCN-LABEL: {{^}}sample_cl_2d:
	; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cl_1d:			; GCN-LABEL: {{^}}sample_c_cl_1d:
	; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%clamp = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cl_2d:			; GCN-LABEL: {{^}}sample_c_cl_2d:
	; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_b_1d:			; GCN-LABEL: {{^}}sample_b_1d:
	; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {			define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_b_2d:			; GCN-LABEL: {{^}}sample_b_2d:
	; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_b_1d:			; GCN-LABEL: {{^}}sample_c_b_1d:
	; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {			define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_b_2d:			; GCN-LABEL: {{^}}sample_c_b_2d:
	; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_b_cl_1d:			; GCN-LABEL: {{^}}sample_b_cl_1d:
	; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%clamp = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_b_cl_2d:			; GCN-LABEL: {{^}}sample_b_cl_2d:
	; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_b_cl_1d:			; GCN-LABEL: {{^}}sample_c_b_cl_1d:
	; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%clamp = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_b_cl_2d:			; GCN-LABEL: {{^}}sample_c_b_cl_2d:
	; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%clamp = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_d_1d:			; GCN-LABEL: {{^}}sample_d_1d:
	; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {			define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <3 x half> %args, i32 0
				%dsdv = extractelement <3 x half> %args, i32 1
				%s = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_d_2d:			; GCN-LABEL: {{^}}sample_d_2d:
	; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <6 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <6 x half> %args, i32 0
				%dtdh = extractelement <6 x half> %args, i32 1
				%dsdv = extractelement <6 x half> %args, i32 2
				%dtdv = extractelement <6 x half> %args, i32 3
				%s = extractelement <6 x half> %args, i32 4
				%t = extractelement <6 x half> %args, i32 5
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABAL: {{^}}sample_d_3d:			; GCN-LABAL: {{^}}sample_d_3d:
	; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <9 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <9 x half> %args, i32 0
				%dtdh = extractelement <9 x half> %args, i32 1
				%drdh = extractelement <9 x half> %args, i32 2
				%dsdv = extractelement <9 x half> %args, i32 3
				%dtdv = extractelement <9 x half> %args, i32 4
				%drdv = extractelement <9 x half> %args, i32 5
				%s = extractelement <9 x half> %args, i32 6
				%t = extractelement <9 x half> %args, i32 7
				%r = extractelement <9 x half> %args, i32 8
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_1d:			; GCN-LABEL: {{^}}sample_c_d_1d:
	; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {			define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <3 x half> %args, i32 0
				%dsdv = extractelement <3 x half> %args, i32 1
				%s = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_2d:			; GCN-LABEL: {{^}}sample_c_d_2d:
	; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <6 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <6 x half> %args, i32 0
				%dtdh = extractelement <6 x half> %args, i32 1
				%dsdv = extractelement <6 x half> %args, i32 2
				%dtdv = extractelement <6 x half> %args, i32 3
				%s = extractelement <6 x half> %args, i32 4
				%t = extractelement <6 x half> %args, i32 5
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_d_cl_1d:			; GCN-LABEL: {{^}}sample_d_cl_1d:
	; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <4 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <4 x half> %args, i32 0
				%dsdv = extractelement <4 x half> %args, i32 1
				%s = extractelement <4 x half> %args, i32 2
				%clamp = extractelement <4 x half> %args, i32 3
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_d_cl_2d:			; GCN-LABEL: {{^}}sample_d_cl_2d:
	; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <7 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <7 x half> %args, i32 0
				%dtdh = extractelement <7 x half> %args, i32 1
				%dsdv = extractelement <7 x half> %args, i32 2
				%dtdv = extractelement <7 x half> %args, i32 3
				%s = extractelement <7 x half> %args, i32 4
				%t = extractelement <7 x half> %args, i32 5
				%clamp = extractelement <7 x half> %args, i32 6
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_cl_1d:			; GCN-LABEL: {{^}}sample_c_d_cl_1d:
	; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <4 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <4 x half> %args, i32 0
				%dsdv = extractelement <4 x half> %args, i32 1
				%s = extractelement <4 x half> %args, i32 2
				%clamp = extractelement <4 x half> %args, i32 3
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_cl_2d:			; GCN-LABEL: {{^}}sample_c_d_cl_2d:
	; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <7 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <7 x half> %args, i32 0
				%dtdh = extractelement <7 x half> %args, i32 1
				%dsdv = extractelement <7 x half> %args, i32 2
				%dtdv = extractelement <7 x half> %args, i32 3
				%s = extractelement <7 x half> %args, i32 4
				%t = extractelement <7 x half> %args, i32 5
				%clamp = extractelement <7 x half> %args, i32 6
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cd_1d:			; GCN-LABEL: {{^}}sample_cd_1d:
	; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {			define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <3 x half> %args, i32 0
				%dsdv = extractelement <3 x half> %args, i32 1
				%s = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cd_2d:			; GCN-LABEL: {{^}}sample_cd_2d:
	; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <6 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <6 x half> %args, i32 0
				%dtdh = extractelement <6 x half> %args, i32 1
				%dsdv = extractelement <6 x half> %args, i32 2
				%dtdv = extractelement <6 x half> %args, i32 3
				%s = extractelement <6 x half> %args, i32 4
				%t = extractelement <6 x half> %args, i32 5
	%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cd_1d:			; GCN-LABEL: {{^}}sample_c_cd_1d:
	; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {			define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <3 x half> %args, i32 0
				%dsdv = extractelement <3 x half> %args, i32 1
				%s = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cd_2d:			; GCN-LABEL: {{^}}sample_c_cd_2d:
	; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <6 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <6 x half> %args, i32 0
				%dtdh = extractelement <6 x half> %args, i32 1
				%dsdv = extractelement <6 x half> %args, i32 2
				%dtdv = extractelement <6 x half> %args, i32 3
				%s = extractelement <6 x half> %args, i32 4
				%t = extractelement <6 x half> %args, i32 5
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cd_cl_1d:			; GCN-LABEL: {{^}}sample_cd_cl_1d:
	; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <4 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <4 x half> %args, i32 0
				%dsdv = extractelement <4 x half> %args, i32 1
				%s = extractelement <4 x half> %args, i32 2
				%clamp = extractelement <4 x half> %args, i32 3
	%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_cd_cl_2d:			; GCN-LABEL: {{^}}sample_cd_cl_2d:
	; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <7 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <7 x half> %args, i32 0
				%dtdh = extractelement <7 x half> %args, i32 1
				%dsdv = extractelement <7 x half> %args, i32 2
				%dtdv = extractelement <7 x half> %args, i32 3
				%s = extractelement <7 x half> %args, i32 4
				%t = extractelement <7 x half> %args, i32 5
				%clamp = extractelement <7 x half> %args, i32 6
	%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cd_cl_1d:			; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
	; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <4 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <4 x half> %args, i32 0
				%dsdv = extractelement <4 x half> %args, i32 1
				%s = extractelement <4 x half> %args, i32 2
				%clamp = extractelement <4 x half> %args, i32 3
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_cd_cl_2d:			; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
	; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <7 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <7 x half> %args, i32 0
				%dtdh = extractelement <7 x half> %args, i32 1
				%dsdv = extractelement <7 x half> %args, i32 2
				%dtdv = extractelement <7 x half> %args, i32 3
				%s = extractelement <7 x half> %args, i32 4
				%t = extractelement <7 x half> %args, i32 5
				%clamp = extractelement <7 x half> %args, i32 6
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_l_1d:			; GCN-LABEL: {{^}}sample_l_1d:
	; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {			define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%lod = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_l_2d:			; GCN-LABEL: {{^}}sample_l_2d:
	; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {			define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%lod = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_l_1d:			; GCN-LABEL: {{^}}sample_c_l_1d:
	; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {			define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%lod = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_l_2d:			; GCN-LABEL: {{^}}sample_c_l_2d:
	; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {			define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <3 x half> %args) {
	main_body:			main_body:
				%s = extractelement <3 x half> %args, i32 0
				%t = extractelement <3 x half> %args, i32 1
				%lod = extractelement <3 x half> %args, i32 2
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_lz_1d:			; GCN-LABEL: {{^}}sample_lz_1d:
	; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {			define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_lz_2d:			; GCN-LABEL: {{^}}sample_lz_2d:
	; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_lz_1d:			; GCN-LABEL: {{^}}sample_c_lz_1d:
	; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {			define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_lz_2d:			; GCN-LABEL: {{^}}sample_c_lz_2d:
	; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}			; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
	define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {			define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, <2 x half> %args) {
	main_body:			main_body:
				%s = extractelement <2 x half> %args, i32 0
				%t = extractelement <2 x half> %args, i32 1
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:			; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
	; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}			; GCN: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}
	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {			define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, <7 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <7 x half> %args, i32 0
				%dtdh = extractelement <7 x half> %args, i32 1
				%dsdv = extractelement <7 x half> %args, i32 2
				%dtdv = extractelement <7 x half> %args, i32 3
				%s = extractelement <7 x half> %args, i32 4
				%t = extractelement <7 x half> %args, i32 5
				%slice = extractelement <7 x half> %args, i32 6
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:			; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
	; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}			; GCN: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}
	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, <7 x half> %args) {
	main_body:			main_body:
				%dsdh = extractelement <7 x half> %args, i32 0
				%dtdh = extractelement <7 x half> %args, i32 1
				%dsdv = extractelement <7 x half> %args, i32 2
				%dtdv = extractelement <7 x half> %args, i32 3
				%s = extractelement <7 x half> %args, i32 4
				%t = extractelement <7 x half> %args, i32 5
				%slice = extractelement <7 x half> %args, i32 6
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix lowering a16 image intrinsics
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 240589

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix lowering a16 image intrinsicsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 240589

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

[AMDGPU] Fix lowering a16 image intrinsics
ClosedPublic