This is an archive of the discontinued LLVM Phabricator instance.

InstCombine/AMDGPU: Add dimension-aware image intrinsics to SimplifyDemanded
ClosedPublic

Authored by nhaehnle on Jun 14 2018, 6:18 AM.

Download Raw Diff

Details

Reviewers

arsenm
rampitec
majnemer

Commits

rGb29ee701229b: InstCombine/AMDGPU: Add dimension-aware image intrinsics to SimplifyDemanded
rL335230: InstCombine/AMDGPU: Add dimension-aware image intrinsics to SimplifyDemanded

Summary

Use the expanded features of the TableGen generic tables to avoid manually
adding the combinatorially exploded set of intrinsics. The
getAMDGPUImageDimIntrinsic lookup function is early-out,
i.e. non-AMDGPU intrinsics will never look at the underlying table.

Use a generic approach for getting the new intrinsic overload to keep the
code simple, and make the image dmask handling more generic:

handle non-sampler image loads
handle the case where the set of demanded elements is not a prefix

There is some overlap between this code and an optimization that happens
in the backend during code generation. They currently complement each other:

only the codegen optimization can generate vec3 loads
only the InstCombine optimization can handle D16

The InstCombine optimization also likely covers more cases since the
codegen optimization is fairly ad-hoc. Ideally, we'll remove the optimization
in codegen once the infrastructure for vec3 is in place (which will probably
take a long time).

Modify the test cases to use dimension-aware intrinsics. This makes it
easier to see that the test coverage for the new intrinsics is equivalent,
and the old style intrinsics will be removed in a follow-up commit anyway.

Change-Id: I4b91ea661413d13004956fe4ef7d13d41b8ce3ad

Diff Detail

Repository: rL LLVM

Event Timeline

nhaehnle created this revision.Jun 14 2018, 6:18 AM

Herald added subscribers: t-tye, tpr, dstuttard and 4 others. · View Herald TranscriptJun 14 2018, 6:18 AM

Harbormaster completed remote builds in B19338: Diff 151337.Jun 14 2018, 6:18 AM

nhaehnle added parent revisions: D48017: AMDGPU: Select MIMG instructions manually in SITargetLowering, D48013: TableGen/SearchableTables: Support more generic enums and tables.Jun 14 2018, 6:19 AM

nhaehnle added a child revision: D48167: AMDGPU: Remove old-style image intrinsics.Jun 14 2018, 6:23 AM

LGTM

This revision is now accepted and ready to land.Jun 14 2018, 9:38 AM

Rebased.

Harbormaster completed remote builds in B19463: Diff 151873.Jun 19 2018, 2:22 AM

Closed by commit rL335230: InstCombine/AMDGPU: Add dimension-aware image intrinsics to SimplifyDemanded (authored by nha). · Explain WhyJun 21 2018, 6:42 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

include/

llvm/

IR/

IntrinsicsAMDGPU.td

30 lines

lib/

Transforms/

InstCombine/

CMakeLists.txt

4 lines

InstCombineInternal.h

4 lines

InstCombineSimplifyDemanded.cpp

193 lines

InstCombineTables.td

11 lines

test/

Transforms/

InstCombine/

AMDGPU/

amdgcn-demanded-vector-elts.ll

979 lines

Diff 152280

llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td

Show First 20 Lines • Show All 797 Lines • ▼ Show 20 Lines	!listconcat(
llvm_i1_ty], []), // unorm(imm)		llvm_i1_ty], []), // unorm(imm)
[llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)		[llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc)		llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
props, "", sdnodeprops>,		props, "", sdnodeprops>,
AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),		AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
!if(P_.IsAtomic, 0, 1)), 1> {		!if(P_.IsAtomic, 0, 1)), 1> {
AMDGPUDimProfile P = P_;		AMDGPUDimProfile P = P_;

		AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME);

let TargetPrefix = "amdgcn";		let TargetPrefix = "amdgcn";
}		}

		// Marker class for intrinsics with a DMask that determines the returned
		// channels.
		class AMDGPUImageDMaskIntrinsic;

defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {		defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {

//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
// Load and store intrinsics		// Load and store intrinsics
//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,		multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,
list<LLVMType> retty,		list<LLVMType> retty,
list<AMDGPUArg> dataargs,		list<AMDGPUArg> dataargs,
Show All 17 Lines	multiclass AMDGPUImageDimIntrinsicsAll<string opmod,
foreach dim = AMDGPUDims.All in {		foreach dim = AMDGPUDims.All in {
def !strconcat(NAME, "_", dim.Name)		def !strconcat(NAME, "_", dim.Name)
: AMDGPUImageDimIntrinsic<		: AMDGPUImageDimIntrinsic<
AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,		AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
props, sdnodeprops>;		props, sdnodeprops>;
}		}
}		}

defm int_amdgcn_image_load : AMDGPUImageDimIntrinsicsAll<		defm int_amdgcn_image_load
"LOAD", [llvm_anyfloat_ty], [], [IntrReadMem], [SDNPMemOperand]>;		: AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_anyfloat_ty], [], [IntrReadMem],
defm int_amdgcn_image_load_mip : AMDGPUImageDimIntrinsicsNoMsaa<		[SDNPMemOperand]>,
"LOAD_MIP", [llvm_anyfloat_ty], [], [IntrReadMem], [SDNPMemOperand], 1>;		AMDGPUImageDMaskIntrinsic;
		defm int_amdgcn_image_load_mip
		: AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_anyfloat_ty], [],
		[IntrReadMem], [SDNPMemOperand], 1>,
		AMDGPUImageDMaskIntrinsic;

defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<		defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
"STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],		"STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
[IntrWriteMem], [SDNPMemOperand]>;		[IntrWriteMem], [SDNPMemOperand]>;
defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<		defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
"STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],		"STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
[IntrWriteMem], [SDNPMemOperand], 1>;		[IntrWriteMem], [SDNPMemOperand], 1>;

//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
// sample and getlod intrinsics		// sample and getlod intrinsics
//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
multiclass AMDGPUImageDimSampleDims<string opmod,		multiclass AMDGPUImageDimSampleDims<string opmod,
AMDGPUSampleVariant sample,		AMDGPUSampleVariant sample,
bit NoMem = 0> {		bit NoMem = 0> {
foreach dim = AMDGPUDims.NoMsaa in {		foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<		def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleProfile<opmod, dim, sample>,		AMDGPUDimSampleProfile<opmod, dim, sample>,
!if(NoMem, [IntrNoMem], [IntrReadMem]),		!if(NoMem, [IntrNoMem], [IntrReadMem]),
!if(NoMem, [], [SDNPMemOperand])>;		!if(NoMem, [], [SDNPMemOperand])>;
}		}
}		}

foreach sample = AMDGPUSampleVariants in {		foreach sample = AMDGPUSampleVariants in {
defm int_amdgcn_image_sample # sample.LowerCaseMod :		defm int_amdgcn_image_sample # sample.LowerCaseMod
AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>;		: AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>,
		AMDGPUImageDMaskIntrinsic;
}		}

defm int_amdgcn_image_getlod : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>;		defm int_amdgcn_image_getlod
		: AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
		AMDGPUImageDMaskIntrinsic;

//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
// getresinfo intrinsics		// getresinfo intrinsics
//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
foreach dim = AMDGPUDims.All in {		foreach dim = AMDGPUDims.All in {
def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)		def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
: AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>;		: AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>,
		AMDGPUImageDMaskIntrinsic;
}		}

//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
// gather4 intrinsics		// gather4 intrinsics
//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
foreach sample = AMDGPUSampleVariantsNoGradients in {		foreach sample = AMDGPUSampleVariantsNoGradients in {
foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {		foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:		def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:
▲ Show 20 Lines • Show All 594 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/InstCombine/CMakeLists.txt

				set(LLVM_TARGET_DEFINITIONS InstCombineTables.td)
				tablegen(LLVM InstCombineTables.inc -gen-searchable-tables)
				add_public_tablegen_target(InstCombineTableGen)

	add_llvm_library(LLVMInstCombine			add_llvm_library(LLVMInstCombine
	InstructionCombining.cpp			InstructionCombining.cpp
	InstCombineAddSub.cpp			InstCombineAddSub.cpp
	InstCombineAndOrXor.cpp			InstCombineAndOrXor.cpp
	InstCombineCalls.cpp			InstCombineCalls.cpp
	InstCombineCasts.cpp			InstCombineCasts.cpp
	InstCombineCompares.cpp			InstCombineCompares.cpp
	InstCombineLoadStoreAlloca.cpp			InstCombineLoadStoreAlloca.cpp
	Show All 14 Lines

llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h

Show First 20 Lines • Show All 700 Lines • ▼ Show 20 Lines	private:
Value *simplifyShrShlDemandedBits(		Value *simplifyShrShlDemandedBits(
Instruction Shr, const APInt &ShrOp1, Instruction Shl,		Instruction Shr, const APInt &ShrOp1, Instruction Shl,
const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known);		const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known);

/// Tries to simplify operands to an integer instruction based on its		/// Tries to simplify operands to an integer instruction based on its
/// demanded bits.		/// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);		bool SimplifyDemandedInstructionBits(Instruction &Inst);

		Value simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst II,
		APInt DemandedElts,
		int DmaskIdx = -1);

Value SimplifyDemandedVectorElts(Value V, APInt DemandedElts,		Value SimplifyDemandedVectorElts(Value V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0);		APInt &UndefElts, unsigned Depth = 0);

/// Canonicalize the position of binops relative to shufflevector.		/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldShuffledBinop(BinaryOperator &Inst);		Instruction *foldShuffledBinop(BinaryOperator &Inst);

/// Given a binary operator, cast instruction, or select which has a PHI node		/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is		/// as operand #0, see if we can fold the instruction into the PHI (which is
▲ Show 20 Lines • Show All 126 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Show All 17 Lines
#include "llvm/IR/PatternMatch.h"		#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"		#include "llvm/Support/KnownBits.h"

using namespace llvm;		using namespace llvm;
using namespace llvm::PatternMatch;		using namespace llvm::PatternMatch;

#define DEBUG_TYPE "instcombine"		#define DEBUG_TYPE "instcombine"

		namespace {

		struct AMDGPUImageDMaskIntrinsic {
		unsigned Intr;
		};

		#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
		#include "InstCombineTables.inc"

		} // end anonymous namespace

/// Check to see if the specified operand of the specified instruction is a		/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the		/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return true.		/// constant that are not demanded. If so, shrink the constant and return true.
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,		static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
const APInt &Demanded) {		const APInt &Demanded) {
assert(I && "No instruction?");		assert(I && "No instruction?");
assert(OpNo < I->getNumOperands() && "Operand index too large");		assert(OpNo < I->getNumOperands() && "Operand index too large");

▲ Show 20 Lines • Show All 870 Lines • ▼ Show 20 Lines	if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
}		}

return InsertNewInstWith(New, *Shl);		return InsertNewInstWith(New, *Shl);
}		}

return nullptr;		return nullptr;
}		}

		/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
		Value InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst II,
		APInt DemandedElts,
		int DMaskIdx) {
		unsigned VWidth = II->getType()->getVectorNumElements();
		if (VWidth == 1)
		return nullptr;

		ConstantInt *NewDMask = nullptr;

		if (DMaskIdx < 0) {
		// Pretend that a prefix of elements is demanded to simplify the code
		// below.
		DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
		} else {
		ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
		if (!DMask)
		return nullptr; // non-constant dmask is not supported by codegen

		unsigned DMaskVal = DMask->getZExtValue() & 0xf;

		// Mask off values that are undefined because the dmask doesn't cover them
		DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;

		unsigned NewDMaskVal = 0;
		unsigned OrigLoadIdx = 0;
		for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
		const unsigned Bit = 1 << SrcIdx;
		if (!!(DMaskVal & Bit)) {
		if (!!(DemandedElts & (1 << OrigLoadIdx)))
		NewDMaskVal \|= Bit;
		OrigLoadIdx++;
		}
		}

		if (DMaskVal != NewDMaskVal)
		NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
		}

		// TODO: Handle 3 vectors when supported in code gen.
		unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
		if (!NewNumElts)
		return UndefValue::get(II->getType());

		if (NewNumElts >= VWidth && DemandedElts.isMask()) {
		if (NewDMask)
		II->setArgOperand(DMaskIdx, NewDMask);
		return nullptr;
		}

		// Determine the overload types of the original intrinsic.
		auto IID = II->getIntrinsicID();
		SmallVector<Intrinsic::IITDescriptor, 16> Table;
		getIntrinsicInfoTableEntries(IID, Table);
		ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;

		FunctionType *FTy = II->getCalledFunction()->getFunctionType();
		SmallVector<Type *, 6> OverloadTys;
		Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
		for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
		Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);

		// Get the new return type overload of the intrinsic.
		Module *M = II->getParent()->getParent()->getParent();
		Type *EltTy = II->getType()->getVectorElementType();
		Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);

		OverloadTys[0] = NewTy;
		Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys);

		SmallVector<Value *, 16> Args;
		for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
		Args.push_back(II->getArgOperand(I));

		if (NewDMask)
		Args[DMaskIdx] = NewDMask;

		IRBuilderBase::InsertPointGuard Guard(Builder);
		Builder.SetInsertPoint(II);

		CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
		NewCall->takeName(II);
		NewCall->copyMetadata(*II);

		if (NewNumElts == 1) {
		return Builder.CreateInsertElement(UndefValue::get(II->getType()), NewCall,
		DemandedElts.countTrailingZeros());
		}

		SmallVector<uint32_t, 8> EltMask;
		unsigned NewLoadIdx = 0;
		for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
		if (!!(DemandedElts & (1 << OrigLoadIdx)))
		EltMask.push_back(NewLoadIdx++);
		else
		EltMask.push_back(NewNumElts);
		}

		Value *Shuffle =
		Builder.CreateShuffleVector(NewCall, UndefValue::get(NewTy), EltMask);

		return Shuffle;
		}

/// The specified value produces a vector with any number of elements.		/// The specified value produces a vector with any number of elements.
/// DemandedElts contains the set of elements that are actually used by the		/// DemandedElts contains the set of elements that are actually used by the
/// caller. This method analyzes which elements of the operand are undef and		/// caller. This method analyzes which elements of the operand are undef and
/// returns that information in UndefElts.		/// returns that information in UndefElts.
///		///
/// If the information about demanded elements can be used to simplify the		/// If the information about demanded elements can be used to simplify the
/// operation, the operation is simplified, then the resultant value is		/// operation, the operation is simplified, then the resultant value is
/// returned. This returns null if no change was made.		/// returned. This returns null if no change was made.
▲ Show 20 Lines • Show All 342 Lines • ▼ Show 20 Lines	TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
Depth + 1);		Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }		if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;		break;

case Instruction::Call: {		case Instruction::Call: {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);		IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
if (!II) break;		if (!II) break;
switch (II->getIntrinsicID()) {		switch (II->getIntrinsicID()) {
default: break;

case Intrinsic::x86_xop_vfrcz_ss:		case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:		case Intrinsic::x86_xop_vfrcz_sd:
// The instructions for these intrinsics are speced to zero upper bits not		// The instructions for these intrinsics are speced to zero upper bits not
// pass them through like other scalar intrinsics. So we shouldn't just		// pass them through like other scalar intrinsics. So we shouldn't just
// use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.		// use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
// Instead we should return a zero vector.		// Instead we should return a zero vector.
if (!DemandedElts[0]) {		if (!DemandedElts[0]) {
Worklist.Add(II);		Worklist.Add(II);
▲ Show 20 Lines • Show All 297 Lines • ▼ Show 20 Lines	case Instruction::Call: {
case Intrinsic::amdgcn_image_sample_c_l_o:		case Intrinsic::amdgcn_image_sample_c_l_o:
case Intrinsic::amdgcn_image_sample_c_b_o:		case Intrinsic::amdgcn_image_sample_c_b_o:
case Intrinsic::amdgcn_image_sample_c_b_cl_o:		case Intrinsic::amdgcn_image_sample_c_b_cl_o:
case Intrinsic::amdgcn_image_sample_c_lz_o:		case Intrinsic::amdgcn_image_sample_c_lz_o:
case Intrinsic::amdgcn_image_sample_c_cd_o:		case Intrinsic::amdgcn_image_sample_c_cd_o:
case Intrinsic::amdgcn_image_sample_c_cd_cl_o:		case Intrinsic::amdgcn_image_sample_c_cd_cl_o:

case Intrinsic::amdgcn_image_getlod: {		case Intrinsic::amdgcn_image_getlod: {
if (VWidth == 1 \|\| !DemandedElts.isMask())
return nullptr;

// TODO: Handle 3 vectors when supported in code gen.
unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes());
if (NewNumElts == VWidth)
return nullptr;

Module *M = II->getParent()->getParent()->getParent();
Type *EltTy = V->getType()->getVectorElementType();

Type *NewTy = (NewNumElts == 1) ? EltTy :
VectorType::get(EltTy, NewNumElts);

auto IID = II->getIntrinsicID();		auto IID = II->getIntrinsicID();

bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load \|\|		bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load \|\|
IID == Intrinsic::amdgcn_buffer_load_format;		IID == Intrinsic::amdgcn_buffer_load_format;
		return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts,
Function *NewIntrin = IsBuffer ?		IsBuffer ? -1 : 3);
Intrinsic::getDeclaration(M, IID, NewTy) :
// Samplers have 3 mangled types.
Intrinsic::getDeclaration(M, IID,
{ NewTy, II->getArgOperand(0)->getType(),
II->getArgOperand(1)->getType()});

SmallVector<Value *, 5> Args;
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
Args.push_back(II->getArgOperand(I));

IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(II);

CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
NewCall->takeName(II);
NewCall->copyMetadata(*II);

if (!IsBuffer) {
ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
if (DMask) {
unsigned DMaskVal = DMask->getZExtValue() & 0xf;

unsigned PopCnt = 0;
unsigned NewDMask = 0;
for (unsigned I = 0; I < 4; ++I) {
const unsigned Bit = 1 << I;
if (!!(DMaskVal & Bit)) {
if (++PopCnt > NewNumElts)
break;

NewDMask \|= Bit;
}
}

NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
}
}


if (NewNumElts == 1) {
return Builder.CreateInsertElement(UndefValue::get(V->getType()),
NewCall, static_cast<uint64_t>(0));
}		}
		default: {
		if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
		return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);

SmallVector<uint32_t, 8> EltMask;		break;
for (unsigned I = 0; I < VWidth; ++I)
EltMask.push_back(I);

Value *Shuffle = Builder.CreateShuffleVector(
NewCall, UndefValue::get(NewTy), EltMask);

MadeChange = true;
return Shuffle;
}		}
}		}
break;		break;
}		}
}		}
return MadeChange ? I : nullptr;		return MadeChange ? I : nullptr;
}		}

llvm/trunk/lib/Transforms/InstCombine/InstCombineTables.td

				include "llvm/TableGen/SearchableTable.td"
				include "llvm/IR/Intrinsics.td"

				def AMDGPUImageDMaskIntrinsicTable : GenericTable {
				let FilterClass = "AMDGPUImageDMaskIntrinsic";
				let Fields = ["Intr"];

				let PrimaryKey = ["Intr"];
				let PrimaryKeyName = "getAMDGPUImageDMaskIntrinsic";
				let PrimaryKeyEarlyOut = 1;
				}

llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll

	Show First 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {			define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
	%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt1_buffer_load_v4f32(			; CHECK-LABEL: @extract_elt1_buffer_load_v4f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
	; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1			; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
	; CHECK-NEXT: ret float %elt1			; CHECK-NEXT: ret float %elt1
	define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {			define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
	%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
	%elt1 = extractelement <4 x float> %data, i32 1			%elt1 = extractelement <4 x float> %data, i32 1
	ret float %elt1			ret float %elt1
	}			}

	; CHECK-LABEL: @extract_elt2_buffer_load_v4f32(			; CHECK-LABEL: @extract_elt2_buffer_load_v4f32(
	▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {			define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
	%data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)			%data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
	%elt0 = extractelement <3 x float> %data, i32 0			%elt0 = extractelement <3 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt1_buffer_load_v3f32(			; CHECK-LABEL: @extract_elt1_buffer_load_v3f32(
	; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
	; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1			; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
	; CHECK-NEXT: ret float %elt1			; CHECK-NEXT: ret float %elt1
	define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {			define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
	%data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)			%data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
	%elt1 = extractelement <3 x float> %data, i32 1			%elt1 = extractelement <3 x float> %data, i32 1
	ret float %elt1			ret float %elt1
	}			}

	; CHECK-LABEL: @extract_elt2_buffer_load_v3f32(			; CHECK-LABEL: @extract_elt2_buffer_load_v3f32(
	▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines
	declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1			declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
	declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1			declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1			declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample			; llvm.amdgcn.image.sample
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v4i32(			; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v2f32_v4i32(			; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
	define amdgpu_ps float @extract_elt0_image_sample_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			; CHECK-NEXT: ret float %elt0
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
	%elt0 = extractelement <4 x float> %data, i32 0			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; FIXME: Should really fold to undef			; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
	; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(			; CHECK-NEXT: ret float undef
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)			define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	; CHECK-NEXT: ret float %data			%data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; FIXME: Should really fold to undef			; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
	; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; FIXME: Should really fold to undef			; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
	; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; FIXME: Should really fold to undef			; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
	; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 9, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret <2 x float> %data			; CHECK-NEXT: %1 = insertelement <2 x float> undef, float %data, i32 0
	define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			; CHECK-NEXT: ret <2 x float> %1
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
				%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	ret <2 x float> %shuf			ret <2 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret <2 x float> %data			; CHECK-NEXT: ret <2 x float> %data
	define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	ret <2 x float> %shuf			ret <2 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret <2 x float> %data			; CHECK-NEXT: ret <2 x float> %data
	define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	ret <2 x float> %shuf			ret <2 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret <2 x float> %data			; CHECK-NEXT: ret <2 x float> %data
	define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	ret <2 x float> %shuf			ret <2 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			; CHECK-NEXT: %1 = insertelement <3 x float> undef, float %data, i32 0
				; CHECK-NEXT: ret <3 x float> %1
				define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
				%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
				%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
				ret <3 x float> %shuf
				}

				; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
				; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
				; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
	; CHECK-NEXT: ret <3 x float> %shuf			; CHECK-NEXT: ret <3 x float> %shuf
	define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
	ret <3 x float> %shuf			ret <3 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
	; CHECK-NEXT: ret <3 x float> %shuf			; CHECK-NEXT: ret <3 x float> %shuf
	define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
	ret <3 x float> %shuf			ret <3 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
	; CHECK-NEXT: ret <3 x float> %shuf			; CHECK-NEXT: ret <3 x float> %shuf
	define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
	ret <3 x float> %shuf			ret <3 x float> %shuf
	}			}

	; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
	; CHECK-NEXT: ret <3 x float> %shuf			; CHECK-NEXT: ret <3 x float> %shuf
	define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
	ret <3 x float> %shuf			ret <3 x float> %shuf
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.cl			; llvm.amdgcn.image.sample.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 1
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.d			; llvm.amdgcn.image.sample.d
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 2
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.d.cl			; llvm.amdgcn.image.sample.d.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 3
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.l			; llvm.amdgcn.image.sample.l
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <2 x float> %data, i32 1
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.b			; llvm.amdgcn.image.sample.b
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 1
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.b.cl			; llvm.amdgcn.image.sample.b.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret <2 x float> %data
	define amdgpu_ps float @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
	ret float %elt0			ret <2 x float> %shuf
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.lz			; llvm.amdgcn.image.sample.lz
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret <2 x float> %data
	define amdgpu_ps float @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 3>
	ret float %elt0			ret <2 x float> %shuf
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.cd			; llvm.amdgcn.image.sample.cd
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 14, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			; TODO: This should be combined with the following shufflevector
	%data = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %1 = shufflevector <4 x float> %data, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			; CHECK-NEXT: %shuf = shufflevector <4 x float> %1, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
				; CHECK-NEXT: ret <3 x float> %shuf
				define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
				%data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
				%shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
				ret <3 x float> %shuf
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.cd.cl			; llvm.amdgcn.image.sample.cd.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret half %data
	define amdgpu_ps float @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x half> %data, i32 0
	ret float %elt0			ret half %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c			; llvm.amdgcn.image.sample.c
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(			declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.cl			; llvm.amdgcn.image.sample.c.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.d			; llvm.amdgcn.image.sample.c.d
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.d.cl			; llvm.amdgcn.image.sample.c.d.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.l			; llvm.amdgcn.image.sample.c.l
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.b			; llvm.amdgcn.image.sample.c.b
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.b.cl			; llvm.amdgcn.image.sample.c.b.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.lz			; llvm.amdgcn.image.sample.c.lz
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.cd			; llvm.amdgcn.image.sample.c.cd
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.cd.cl			; llvm.amdgcn.image.sample.c.cd.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.o			; llvm.amdgcn.image.sample.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(			declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.cl.o			; llvm.amdgcn.image.sample.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.d.o			; llvm.amdgcn.image.sample.d.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.d.cl.o			; llvm.amdgcn.image.sample.d.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.l.o			; llvm.amdgcn.image.sample.l.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.b.o			; llvm.amdgcn.image.sample.b.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.b.cl.o			; llvm.amdgcn.image.sample.b.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.lz.o			; llvm.amdgcn.image.sample.lz.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.cd.o			; llvm.amdgcn.image.sample.cd.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.cd.cl.o			; llvm.amdgcn.image.sample.cd.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.o			; llvm.amdgcn.image.sample.c.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.cl.o			; llvm.amdgcn.image.sample.c.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.d.o			; llvm.amdgcn.image.sample.c.d.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.d.cl.o			; llvm.amdgcn.image.sample.c.d.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.l.o			; llvm.amdgcn.image.sample.c.l.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.b.o			; llvm.amdgcn.image.sample.c.b.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.b.cl.o			; llvm.amdgcn.image.sample.c.b.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.lz.o			; llvm.amdgcn.image.sample.c.lz.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.cd.o			; llvm.amdgcn.image.sample.c.cd.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.sample.c.cd.cl.o			; llvm.amdgcn.image.sample.c.cd.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4			; llvm.amdgcn.image.gather4
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; Don't handle gather4*			; Don't handle gather4*

	; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
	; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i3			; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v4i32(			declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v2f32_v4i32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.cl			; llvm.amdgcn.image.gather4.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.l			; llvm.amdgcn.image.gather4.l
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.b			; llvm.amdgcn.image.gather4.b
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.b.cl			; llvm.amdgcn.image.gather4.b.cl
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.lz			; llvm.amdgcn.image.gather4.lz
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.o			; llvm.amdgcn.image.gather4.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.cl.o			; llvm.amdgcn.image.gather4.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.l.o			; llvm.amdgcn.image.gather4.l.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.b.o			; llvm.amdgcn.image.gather4.b.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.b.cl.o			; llvm.amdgcn.image.gather4.b.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.lz.o			; llvm.amdgcn.image.gather4.lz.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.c.o			; llvm.amdgcn.image.gather4.c.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(			declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
	%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0
	}

	declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.c.cl.o			; llvm.amdgcn.image.gather4.c.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.c.l.o			; llvm.amdgcn.image.gather4.c.l.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.c.b.o			; llvm.amdgcn.image.gather4.c.b.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.c.b.cl.o			; llvm.amdgcn.image.gather4.c.b.cl.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.gather4.c.lz.o			; llvm.amdgcn.image.gather4.c.lz.o
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
	; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

	; --------------------------------------------------------------------			; --------------------------------------------------------------------
	; llvm.amdgcn.image.getlod			; llvm.amdgcn.image.getlod
	; --------------------------------------------------------------------			; --------------------------------------------------------------------

	; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v8i32(			; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)			; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
				; CHECK-NEXT: ret float %data
				define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
				%data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
				%elt0 = extractelement <4 x float> %data, i32 0
				ret float %elt0
				}

				declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				; --------------------------------------------------------------------
				; llvm.amdgcn.image.load
				; --------------------------------------------------------------------

				; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
				; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v4i32(			declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
				; --------------------------------------------------------------------
				; llvm.amdgcn.image.load.mip
				; --------------------------------------------------------------------

				; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
				; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v2f32_v4i32(			declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
	; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
				; --------------------------------------------------------------------
				; llvm.amdgcn.image.getresinfo
				; --------------------------------------------------------------------

				; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
				; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
	; CHECK-NEXT: ret float %data			; CHECK-NEXT: ret float %data
	define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {			define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
	%data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)			%data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
	%elt0 = extractelement <4 x float> %data, i32 0			%elt0 = extractelement <4 x float> %data, i32 0
	ret float %elt0			ret float %elt0
	}			}

	declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1			declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
	declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
	declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1

	attributes #0 = { nounwind }			attributes #0 = { nounwind }
	attributes #1 = { nounwind readonly }			attributes #1 = { nounwind readonly }

	!0 = !{float 2.500000e+00}			!0 = !{float 2.500000e+00}