Diff 401916

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Show First 20 Lines • Show All 91 Lines • ▼ Show 20 Lines	static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
if (VTy->isIntegerTy())		if (VTy->isIntegerTy())
return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);		return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
if (VTy->isFloatingPointTy())		if (VTy->isFloatingPointTy())
return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));		return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));

llvm_unreachable("Should never be called!");		llvm_unreachable("Should never be called!");
}		}

		/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
		/// the modified arguments.
		static Optional<Instruction *> modifyIntrinsicCall(
		IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
		foadUnsubmitted Done Reply Inline Actions I don't understand why this needs to be a template, if FuncT is always the same as you have documented. foad: I don't understand why this needs to be a template, if FuncT is always the same as you have…
		std::function<void(SmallVectorImpl<Value > &, SmallVectorImpl<Type > &)>
		Func) {
		SmallVector<Type *, 4> ArgTys;
		if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
		return None;

		SmallVector<Value *, 8> Args(II.args());

		// Modify arguments and types
		Func(Args, ArgTys);

		Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);

		CallInst *NewCall = IC.Builder.CreateCall(I, Args);
		NewCall->takeName(&II);
		NewCall->copyMetadata(II);
		if (isa<FPMathOperator>(NewCall))
		NewCall->copyFastMathFlags(&II);

		// Erase and replace uses
		if (!II.getType()->isVoidTy())
		IC.replaceInstUsesWith(II, NewCall);
		return IC.eraseInstFromFunction(II);
		}

static Optional<Instruction *>		static Optional<Instruction *>
simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,		simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,		const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
IntrinsicInst &II, InstCombiner &IC) {		IntrinsicInst &II, InstCombiner &IC) {
		// Optimize _L to _LZ when _L is zero
		if (const auto *LZMappingInfo =
		AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
		if (auto *ConstantLod =
		dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
		if (ConstantLod->isZero() \|\| ConstantLod->isNegative()) {
		const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
		AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
		ImageDimIntr->Dim);
		return modifyIntrinsicCall(
		foadUnsubmitted Not Done Reply Inline Actions Is it possible that more than one of these optimizations applies to a single intrinsic call, and if so do you handle that case correctly? (E.g. when you apply one optimization, does simplifyAMDGCNImageIntrinsic get called again to apply other optimizations to the new intrinsic?) foad: Is it possible that more than one of these optimizations applies to a single intrinsic call…
		sebastian-neAuthorUnsubmitted Done Reply Inline Actions Yes, it gets called again. (E.g. the combines for `Intrinsic::amdgcn_fma_legacy` also return early when one of them matches.) The `@sample_b_1d_a16` test also checks that the no-bias and a16 combine is applied. sebastian-ne: Yes, it gets called again. (E.g. the combines for `Intrinsic::amdgcn_fma_legacy` also return…
		II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
		Args.erase(Args.begin() + ImageDimIntr->LodIndex);
		});
		}
		}
		}

		// Optimize _mip away, when 'lod' is zero
		if (const auto *MIPMappingInfo =
		AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
		if (auto *ConstantMip =
		dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
		if (ConstantMip->isZero()) {
		const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
		AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
		ImageDimIntr->Dim);
		return modifyIntrinsicCall(
		II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
		Args.erase(Args.begin() + ImageDimIntr->MipIndex);
		});
		}
		}
		}

		// Optimize _bias away when 'bias' is zero
		if (const auto *BiasMappingInfo =
		AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
		if (auto *ConstantBias =
		dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
		if (ConstantBias->isZero()) {
		const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
		AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
		ImageDimIntr->Dim);
		return modifyIntrinsicCall(
		II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
		Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
		ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
		});
		}
		}
		}

		// Try to use A16 or G16
if (!ST->hasA16() && !ST->hasG16())		if (!ST->hasA16() && !ST->hasG16())
return None;		return None;

bool FloatCoord = false;		bool FloatCoord = false;
// true means derivatives can be converted to 16 bit, coordinates not		// true means derivatives can be converted to 16 bit, coordinates not
bool OnlyDerivatives = false;		bool OnlyDerivatives = false;

for (unsigned OperandIndex = ImageDimIntr->GradientStart;		for (unsigned OperandIndex = ImageDimIntr->GradientStart;
Show All 27 Lines	simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,

if (OnlyDerivatives && (!ST->hasG16() \|\| ImageDimIntr->GradientStart ==		if (OnlyDerivatives && (!ST->hasG16() \|\| ImageDimIntr->GradientStart ==
ImageDimIntr->CoordStart))		ImageDimIntr->CoordStart))
return None;		return None;

Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())		Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
: Type::getInt16Ty(II.getContext());		: Type::getInt16Ty(II.getContext());

SmallVector<Type *, 4> ArgTys;		return modifyIntrinsicCall(
if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))		II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
return None;

ArgTys[ImageDimIntr->GradientTyArg] = CoordType;		ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
if (!OnlyDerivatives) {		if (!OnlyDerivatives) {
ArgTys[ImageDimIntr->CoordTyArg] = CoordType;		ArgTys[ImageDimIntr->CoordTyArg] = CoordType;

// Change the bias type		// Change the bias type
if (ImageDimIntr->NumBiasArgs != 0)		if (ImageDimIntr->NumBiasArgs != 0)
ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());		ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
}		}
Function *I =
Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);

SmallVector<Value *, 8> Args(II.args());

unsigned EndIndex =		unsigned EndIndex =
OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;		OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
for (unsigned OperandIndex = ImageDimIntr->GradientStart;		for (unsigned OperandIndex = ImageDimIntr->GradientStart;
OperandIndex < EndIndex; OperandIndex++) {		OperandIndex < EndIndex; OperandIndex++) {
Args[OperandIndex] =		Args[OperandIndex] =
convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);		convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
}		}

// Convert the bias		// Convert the bias
if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {		if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);		Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);		Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
}		}
		});
CallInst *NewCall = IC.Builder.CreateCall(I, Args);
NewCall->takeName(&II);
NewCall->copyMetadata(II);
if (isa<FPMathOperator>(NewCall))
NewCall->copyFastMathFlags(&II);
return IC.replaceInstUsesWith(II, NewCall);
}		}

bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value Op0, const Value Op1,		bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value Op0, const Value Op1,
InstCombiner &IC) const {		InstCombiner &IC) const {
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or		// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
// infinity, gives +0.0. If we can prove we don't have one of the special		// infinity, gives +0.0. If we can prove we don't have one of the special
// cases then we can use a normal multiply instead.		// cases then we can use a normal multiply instead.
// TODO: Create and use isKnownFiniteNonZero instead of just matching		// TODO: Create and use isKnownFiniteNonZero instead of just matching
▲ Show 20 Lines • Show All 907 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines	def MIMGMIPMappingTable : GenericTable {
let Fields = ["MIP", "NONMIP"];		let Fields = ["MIP", "NONMIP"];
string TypeOf_MIP = "MIMGBaseOpcode";		string TypeOf_MIP = "MIMGBaseOpcode";
string TypeOf_NONMIP = "MIMGBaseOpcode";		string TypeOf_NONMIP = "MIMGBaseOpcode";

let PrimaryKey = ["MIP"];		let PrimaryKey = ["MIP"];
let PrimaryKeyName = "getMIMGMIPMappingInfo";		let PrimaryKeyName = "getMIMGMIPMappingInfo";
}		}

		class MIMGBiasMapping<MIMGBaseOpcode bias, MIMGBaseOpcode nobias> {
		MIMGBaseOpcode Bias = bias;
		MIMGBaseOpcode NoBias = nobias;
		}

		def MIMGBiasMappingTable : GenericTable {
		let FilterClass = "MIMGBiasMapping";
		let CppTypeName = "MIMGBiasMappingInfo";
		let Fields = ["Bias", "NoBias"];
		string TypeOf_Bias = "MIMGBaseOpcode";
		string TypeOf_NoBias = "MIMGBaseOpcode";

		let PrimaryKey = ["Bias"];
		let PrimaryKeyName = "getMIMGBiasMappingInfo";
		}

class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {		class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
MIMGBaseOpcode G = g;		MIMGBaseOpcode G = g;
MIMGBaseOpcode G16 = g16;		MIMGBaseOpcode G16 = g16;
}		}

def MIMGG16MappingTable : GenericTable {		def MIMGG16MappingTable : GenericTable {
let FilterClass = "MIMGG16Mapping";		let FilterClass = "MIMGG16Mapping";
let CppTypeName = "MIMGG16MappingInfo";		let CppTypeName = "MIMGG16MappingInfo";
▲ Show 20 Lines • Show All 993 Lines • ▼ Show 20 Lines
def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;		def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;		def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;		def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;

// MIP to NONMIP Optimization Mapping		// MIP to NONMIP Optimization Mapping
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;		def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;		def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;

		// Bias to NoBias Optimization Mapping
		def : MIMGBiasMapping<IMAGE_SAMPLE_B, IMAGE_SAMPLE>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL, IMAGE_SAMPLE_CL>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B, IMAGE_SAMPLE_C>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL, IMAGE_SAMPLE_C_CL>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_O>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_CL_O>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_O>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_CL_O>;
		def : MIMGBiasMapping<IMAGE_GATHER4_B, IMAGE_GATHER4>;
		def : MIMGBiasMapping<IMAGE_GATHER4_B_CL, IMAGE_GATHER4_CL>;
		def : MIMGBiasMapping<IMAGE_GATHER4_C_B, IMAGE_GATHER4_C>;
		def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL, IMAGE_GATHER4_C_CL>;
		def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>;
		def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>;
		def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>;
		def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>;

// G to G16 Optimization Mapping		// G to G16 Optimization Mapping
def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_C_D, IMAGE_SAMPLE_C_D_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_C_D, IMAGE_SAMPLE_C_D_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL, IMAGE_SAMPLE_C_D_CL_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL, IMAGE_SAMPLE_C_D_CL_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D_O_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D_O_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL_O_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL_O_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D_O_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D_O_G16>;
Show All 9 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	struct GcnBufferFormatInfo {
unsigned DataFormat;		unsigned DataFormat;
};		};

#define GET_MIMGBaseOpcode_DECL		#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL		#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL		#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL		#define GET_MIMGLZMapping_DECL
#define GET_MIMGMIPMapping_DECL		#define GET_MIMGMIPMapping_DECL
		#define GET_MIMGBiASMapping_DECL
#include "AMDGPUGenSearchableTables.inc"		#include "AMDGPUGenSearchableTables.inc"

namespace IsaInfo {		namespace IsaInfo {

enum {		enum {
// The closed Vulkan driver sets 96, which limits the wave count to 8 but		// The closed Vulkan driver sets 96, which limits the wave count to 8 but
// doesn't spill SGPRs as much as when 80 is set.		// doesn't spill SGPRs as much as when 80 is set.
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,		FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
▲ Show 20 Lines • Show All 250 Lines • ▼ Show 20 Lines	struct MIMGLZMappingInfo {
MIMGBaseOpcode LZ;		MIMGBaseOpcode LZ;
};		};

struct MIMGMIPMappingInfo {		struct MIMGMIPMappingInfo {
MIMGBaseOpcode MIP;		MIMGBaseOpcode MIP;
MIMGBaseOpcode NONMIP;		MIMGBaseOpcode NONMIP;
};		};

		struct MIMGBiasMappingInfo {
		MIMGBaseOpcode Bias;
		MIMGBaseOpcode NoBias;
		};

struct MIMGG16MappingInfo {		struct MIMGG16MappingInfo {
MIMGBaseOpcode G;		MIMGBaseOpcode G;
MIMGBaseOpcode G16;		MIMGBaseOpcode G16;
};		};

LLVM_READONLY		LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);		const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);

LLVM_READONLY		LLVM_READONLY
const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);		const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);

LLVM_READONLY		LLVM_READONLY
		const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);

		LLVM_READONLY
const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);		const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);

LLVM_READONLY		LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,		int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);		unsigned VDataDwords, unsigned VAddrDwords);

LLVM_READONLY		LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);		int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
▲ Show 20 Lines • Show All 669 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines	bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
return isHsaAbiVersion3(STI) \|\| isHsaAbiVersion4(STI);		return isHsaAbiVersion3(STI) \|\| isHsaAbiVersion4(STI);
}		}

#define GET_MIMGBaseOpcodesTable_IMPL		#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL		#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL		#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL		#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL		#define GET_MIMGMIPMappingTable_IMPL
		#define GET_MIMGBiasMappingTable_IMPL
#define GET_MIMGG16MappingTable_IMPL		#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"		#include "AMDGPUGenSearchableTables.inc"

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,		int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords) {		unsigned VDataDwords, unsigned VAddrDwords) {
const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,		const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
VDataDwords, VAddrDwords);		VDataDwords, VAddrDwords);
return Info ? Info->Opcode : -1;		return Info ? Info->Opcode : -1;
▲ Show 20 Lines • Show All 1,924 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
; CHECK-NEXT: ret double 0x3F97D05F417D05F4		; CHECK-NEXT: ret double 0x3F97D05F417D05F4
;		;
%val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone		%val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
ret double %val		ret double %val
}		}

define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {		define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(		; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR12:[0-9]+]]		; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]]
; CHECK-NEXT: ret float [[VAL]]		; CHECK-NEXT: ret float [[VAL]]
;		;
%val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone		%val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
ret float %val		ret float %val
}		}

; --------------------------------------------------------------------		; --------------------------------------------------------------------
; llvm.amdgcn.rsq		; llvm.amdgcn.rsq
▲ Show 20 Lines • Show All 1,579 Lines • ▼ Show 20 Lines
; CHECK-NEXT: ret i64 0		; CHECK-NEXT: ret i64 0
;		;
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32)		%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32)
ret i64 %result		ret i64 %result
}		}

define i64 @icmp_constant_inputs_true() {		define i64 @icmp_constant_inputs_true() {
; CHECK-LABEL: @icmp_constant_inputs_true(		; CHECK-LABEL: @icmp_constant_inputs_true(
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR13:[0-9]+]]		; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: ret i64 [[RESULT]]		; CHECK-NEXT: ret i64 [[RESULT]]
;		;
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)		%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
ret i64 %result		ret i64 %result
}		}

define i64 @icmp_constant_to_rhs_slt(i32 %x) {		define i64 @icmp_constant_to_rhs_slt(i32 %x) {
; CHECK-LABEL: @icmp_constant_to_rhs_slt(		; CHECK-LABEL: @icmp_constant_to_rhs_slt(
▲ Show 20 Lines • Show All 690 Lines • ▼ Show 20 Lines
; CHECK-NEXT: ret i64 0		; CHECK-NEXT: ret i64 0
;		;
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1)		%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1)
ret i64 %result		ret i64 %result
}		}

define i64 @fcmp_constant_inputs_true() {		define i64 @fcmp_constant_inputs_true() {
; CHECK-LABEL: @fcmp_constant_inputs_true(		; CHECK-LABEL: @fcmp_constant_inputs_true(
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]]		; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]]
; CHECK-NEXT: ret i64 [[RESULT]]		; CHECK-NEXT: ret i64 [[RESULT]]
;		;
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)		%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
ret i64 %result		ret i64 %result
}		}

define i64 @fcmp_constant_to_rhs_olt(float %x) {		define i64 @fcmp_constant_to_rhs_olt(float %x) {
; CHECK-LABEL: @fcmp_constant_to_rhs_olt(		; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
Show All 25 Lines
; CHECK-NEXT: ret i64 0		; CHECK-NEXT: ret i64 0
;		;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 0)		%b = call i64 @llvm.amdgcn.ballot.i64(i1 0)
ret i64 %b		ret i64 %b
}		}

define i64 @ballot_one_64() {		define i64 @ballot_one_64() {
; CHECK-LABEL: @ballot_one_64(		; CHECK-LABEL: @ballot_one_64(
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]]		; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]]
; CHECK-NEXT: ret i64 [[B]]		; CHECK-NEXT: ret i64 [[B]]
;		;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)		%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
ret i64 %b		ret i64 %b
}		}

define i32 @ballot_nocombine_32(i1 %i) {		define i32 @ballot_nocombine_32(i1 %i) {
; CHECK-LABEL: @ballot_nocombine_32(		; CHECK-LABEL: @ballot_nocombine_32(
Show All 9 Lines
; CHECK-NEXT: ret i32 0		; CHECK-NEXT: ret i32 0
;		;
%b = call i32 @llvm.amdgcn.ballot.i32(i1 0)		%b = call i32 @llvm.amdgcn.ballot.i32(i1 0)
ret i32 %b		ret i32 %b
}		}

define i32 @ballot_one_32() {		define i32 @ballot_one_32() {
; CHECK-LABEL: @ballot_one_32(		; CHECK-LABEL: @ballot_one_32(
; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR13]]		; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR15]]
; CHECK-NEXT: ret i32 [[B]]		; CHECK-NEXT: ret i32 [[B]]
;		;
%b = call i32 @llvm.amdgcn.ballot.i32(i1 1)		%b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
ret i32 %b		ret i32 %b
}		}

; --------------------------------------------------------------------		; --------------------------------------------------------------------
; llvm.amdgcn.wqm.vote		; llvm.amdgcn.wqm.vote
▲ Show 20 Lines • Show All 1,598 Lines • ▼ Show 20 Lines	;
%t32 = fpext half %t to float		%t32 = fpext half %t to float
%slice32 = fpext half %slice to float		%slice32 = fpext half %slice to float
%res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out		store <4 x float> %res, <4 x float> addrspace(1)* %out
ret void		ret void
}		}

; --------------------------------------------------------------------		; --------------------------------------------------------------------
		; llvm.amdgcn.image.sample l to lz
		; --------------------------------------------------------------------

		declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

		declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

		define amdgpu_kernel void @sample_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
		; CHECK-LABEL: @sample_l_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
		; CHECK-LABEL: @sample_l_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
		; CHECK-LABEL: @sample_c_l_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.]], float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
		; CHECK-LABEL: @sample_c_l_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
		; CHECK-LABEL: @sample_l_o_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
		; CHECK-LABEL: @sample_l_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
		; CHECK-LABEL: @sample_c_l_o_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
		; CHECK-LABEL: @sample_c_l_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
		; CHECK-LABEL: @gather4_l_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 15, float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
		; CHECK-LABEL: @gather4_c_l_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
		; CHECK-LABEL: @gather4_l_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
		; CHECK-LABEL: @gather4_c_l_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_c_l_o_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) {
		; CHECK-LABEL: @gather4_c_l_o_2darray(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], float [[SLICE:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		; --------------------------------------------------------------------
		; llvm.amdgcn.image.sample mipmap zero
		; --------------------------------------------------------------------

		define amdgpu_kernel void @load_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s) {
		; CHECK-LABEL: @load_mip_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @load_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
		; CHECK-LABEL: @load_mip_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S:%.]], i32 [[T:%.]], <8 x i32> [[RSRC:%.]], i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @load_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
		; CHECK-LABEL: @load_mip_3d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 [[S:%.]], i32 [[T:%.]], i32 [[U:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @load_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
		; CHECK-LABEL: @load_mip_1darray(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 [[S:%.]], i32 [[T:%.]], <8 x i32> [[RSRC:%.]], i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @load_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
		; CHECK-LABEL: @load_mip_2darray(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 [[S:%.]], i32 [[T:%.]], i32 [[U:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @load_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
		; CHECK-LABEL: @load_mip_cube(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 [[S:%.]], i32 [[T:%.]], i32 [[U:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}


		define amdgpu_kernel void @store_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
		; CHECK-LABEL: @store_mip_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[VDATA:%.]], i32 15, i32 [[S:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: ret void
		;
		main_body:
		call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		ret void
		}

		define amdgpu_kernel void @store_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
		; CHECK-LABEL: @store_mip_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> [[VDATA:%.]], i32 15, i32 [[S:%.]], i32 [[T:%.]], <8 x i32> [[RSRC:%.]], i32 0, i32 0)
		; CHECK-NEXT: ret void
		;
		main_body:
		call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		ret void
		}

		define amdgpu_kernel void @store_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
		; CHECK-LABEL: @store_mip_3d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> [[VDATA:%.]], i32 15, i32 [[S:%.]], i32 [[T:%.]], i32 [[U:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: ret void
		;
		main_body:
		call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		ret void
		}

		define amdgpu_kernel void @store_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
		; CHECK-LABEL: @store_mip_1darray(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> [[VDATA:%.]], i32 15, i32 [[S:%.]], i32 [[T:%.]], <8 x i32> [[RSRC:%.]], i32 0, i32 0)
		; CHECK-NEXT: ret void
		;
		main_body:
		call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		ret void
		}

		define amdgpu_kernel void @store_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
		; CHECK-LABEL: @store_mip_2darray(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> [[VDATA:%.]], i32 15, i32 [[S:%.]], i32 [[T:%.]], i32 [[U:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: ret void
		;
		main_body:
		call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		ret void
		}

		define amdgpu_kernel void @store_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
		; CHECK-LABEL: @store_mip_cube(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> [[VDATA:%.]], i32 15, i32 [[S:%.]], i32 [[T:%.]], i32 [[U:%.]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
		; CHECK-NEXT: ret void
		;
		main_body:
		call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
		ret void
		}

		declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1


		declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
		declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
		declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
		declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
		declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
		declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0

		; --------------------------------------------------------------------
		; llvm.amdgcn.image.sample bias zero
		; --------------------------------------------------------------------

		declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

		declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
		declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

		define amdgpu_kernel void @sample_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
		; CHECK-LABEL: @sample_b_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
		; CHECK-LABEL: @sample_b_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
		; CHECK-LABEL: @sample_c_b_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.]], float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
		; CHECK-LABEL: @sample_c_b_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
		; CHECK-LABEL: @sample_b_o_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
		; CHECK-LABEL: @sample_b_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
		; CHECK-LABEL: @sample_c_b_o_1d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
		; CHECK-LABEL: @sample_c_b_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
		; CHECK-LABEL: @gather4_b_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 15, float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
		; CHECK-LABEL: @gather4_c_b_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
		; CHECK-LABEL: @gather4_b_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @gather4_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
		; CHECK-LABEL: @gather4_c_b_o_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], float [[S:%.]], float [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		define amdgpu_kernel void @sample_c_b_o_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {
		; CHECK-LABEL: @sample_c_b_o_a16_2d(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16(i32 15, i32 [[OFFSET:%.]], float [[ZCOMPARE:%.]], half [[S:%.]], half [[T:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		; Check that bias is not optimized away if > 0
		define amdgpu_kernel void @sample_b_1d_pos(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
		; CHECK-LABEL: @sample_b_1d_pos(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float 1.000000e+00, float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		; Check that bias is not optimized away if < 0
		define amdgpu_kernel void @sample_b_1d_neg(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
		; CHECK-LABEL: @sample_b_1d_neg(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float -1.000000e+00, float [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		; Zero bias + A16
		define amdgpu_kernel void @sample_b_1d_a16(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
		; CHECK-LABEL: @sample_b_1d_a16(
		; CHECK-NEXT: main_body:
		; CHECK-NEXT: [[V:%.]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.]], <8 x i32> [[RSRC:%.]], <4 x i32> [[SAMP:%.]], i1 false, i32 0, i32 0)
		; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
		; CHECK-NEXT: ret void
		;
		main_body:
		%s32 = fpext half %s to float
		%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
		store <4 x float> %v, <4 x float> addrspace(1)* %out
		ret void
		}

		; --------------------------------------------------------------------
; llvm.amdgcn.is.shared		; llvm.amdgcn.is.shared
; --------------------------------------------------------------------		; --------------------------------------------------------------------

declare i1 @llvm.amdgcn.is.shared(i8*) nounwind readnone		declare i1 @llvm.amdgcn.is.shared(i8*) nounwind readnone

define i1 @test_is_shared_null() nounwind {		define i1 @test_is_shared_null() nounwind {
; CHECK-LABEL: @test_is_shared_null(		; CHECK-LABEL: @test_is_shared_null(
; CHECK-NEXT: ret i1 false		; CHECK-NEXT: ret i1 false
Show All 34 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][InstCombine] Remove zero LOD bias
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 401916

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

llvm/lib/Target/AMDGPU/MIMGInstructions.td

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][InstCombine] Remove zero LOD biasClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 401916

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

llvm/lib/Target/AMDGPU/MIMGInstructions.td

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

[AMDGPU][InstCombine] Remove zero LOD bias
ClosedPublic