Diff 395458

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 4,439 Lines • ▼ Show 20 Lines	if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI,
// TODO: Change intrinsic opcode and remove operand instead or replacing		// TODO: Change intrinsic opcode and remove operand instead or replacing
// it with 0, as the _L to _LZ handling is done above.		// it with 0, as the _L to _LZ handling is done above.
MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0);		MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0);
--CorrectedNumVAddrs;		--CorrectedNumVAddrs;
}		}
}		}
}		}

		// Optimize _bias away when 'bias' is zero
		if (const AMDGPU::MIMGBiasMappingInfo *BiasMappingInfo =
		AMDGPU::getMIMGBiasMappingInfo(Intr->BaseOpcode)) {
		const ConstantFP *ConstantBias;

		if (mi_match(MI.getOperand(ArgOffset + Intr->BiasIndex).getReg(), *MRI,
		m_GFCst(ConstantBias))) {
		if (ConstantBias->isZero()) {
		arsenmUnsubmitted Done Reply Inline Actions The DAG version doesn't have the isNegative check? arsenm: The DAG version doesn't have the isNegative check?
		// Set new opcode to _lz variant of _l, and change the intrinsic ID.
		const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
		AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
		Intr->Dim);

		// The starting indexes should remain in the same place.
		--CorrectedNumVAddrs;

		MI.getOperand(MI.getNumExplicitDefs())
		.setIntrinsicID(static_cast<Intrinsic::ID>(NewImageDimIntr->Intr));
		MI.RemoveOperand(ArgOffset + Intr->BiasIndex);
		Intr = NewImageDimIntr;
		}
		}
		}

// Rewrite the addressing register layout before doing anything else.		// Rewrite the addressing register layout before doing anything else.
if (BaseOpcode->Gradients && !ST.hasG16() && (IsA16 != IsG16)) {		if (BaseOpcode->Gradients && !ST.hasG16() && (IsA16 != IsG16)) {
// 16 bit gradients are supported, but are tied to the A16 control		// 16 bit gradients are supported, but are tied to the A16 control
// so both gradients and addresses must be 16 bit		// so both gradients and addresses must be 16 bit
return false;		return false;
}		}

if (IsA16 && !ST.hasA16()) {		if (IsA16 && !ST.hasA16()) {
▲ Show 20 Lines • Show All 700 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines	def MIMGMIPMappingTable : GenericTable {
let Fields = ["MIP", "NONMIP"];		let Fields = ["MIP", "NONMIP"];
string TypeOf_MIP = "MIMGBaseOpcode";		string TypeOf_MIP = "MIMGBaseOpcode";
string TypeOf_NONMIP = "MIMGBaseOpcode";		string TypeOf_NONMIP = "MIMGBaseOpcode";

let PrimaryKey = ["MIP"];		let PrimaryKey = ["MIP"];
let PrimaryKeyName = "getMIMGMIPMappingInfo";		let PrimaryKeyName = "getMIMGMIPMappingInfo";
}		}

		class MIMGBiasMapping<MIMGBaseOpcode bias, MIMGBaseOpcode nobias> {
		MIMGBaseOpcode Bias = bias;
		MIMGBaseOpcode NoBias = nobias;
		}

		def MIMGBiasMappingTable : GenericTable {
		let FilterClass = "MIMGBiasMapping";
		let CppTypeName = "MIMGBiasMappingInfo";
		let Fields = ["Bias", "NoBias"];
		string TypeOf_Bias = "MIMGBaseOpcode";
		string TypeOf_NoBias = "MIMGBaseOpcode";

		let PrimaryKey = ["Bias"];
		let PrimaryKeyName = "getMIMGBiasMappingInfo";
		}

class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {		class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
MIMGBaseOpcode G = g;		MIMGBaseOpcode G = g;
MIMGBaseOpcode G16 = g16;		MIMGBaseOpcode G16 = g16;
}		}

def MIMGG16MappingTable : GenericTable {		def MIMGG16MappingTable : GenericTable {
let FilterClass = "MIMGG16Mapping";		let FilterClass = "MIMGG16Mapping";
let CppTypeName = "MIMGG16MappingInfo";		let CppTypeName = "MIMGG16MappingInfo";
▲ Show 20 Lines • Show All 993 Lines • ▼ Show 20 Lines
def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;		def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;		def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;		def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;

// MIP to NONMIP Optimization Mapping		// MIP to NONMIP Optimization Mapping
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;		def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;		def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;

		// Bias to NoBias Optimization Mapping
		def : MIMGBiasMapping<IMAGE_SAMPLE_B, IMAGE_SAMPLE>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL, IMAGE_SAMPLE_CL>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B, IMAGE_SAMPLE_C>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL, IMAGE_SAMPLE_C_CL>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_O>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_CL_O>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_O>;
		def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_CL_O>;

// G to G16 Optimization Mapping		// G to G16 Optimization Mapping
def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_C_D, IMAGE_SAMPLE_C_D_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_C_D, IMAGE_SAMPLE_C_D_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL, IMAGE_SAMPLE_C_D_CL_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL, IMAGE_SAMPLE_C_D_CL_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D_O_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D_O_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL_O_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL_O_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D_O_G16>;		def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D_O_G16>;
Show All 9 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,153 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::lowerImage(SDValue Op,
const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();		const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =		const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);		AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);		const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =		const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);		AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =		const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);		AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
		const AMDGPU::MIMGBiasMappingInfo *BiasMappingInfo =
		AMDGPU::getMIMGBiasMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;		unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);		bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);

SmallVector<EVT, 3> ResultTypes(Op->values());		SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());		SmallVector<EVT, 3> OrigResultTypes(Op->values());
bool IsD16 = false;		bool IsD16 = false;
bool IsG16 = false;		bool IsG16 = false;
bool IsA16 = false;		bool IsA16 = false;
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	if (BaseOpcode->Store) {

AdjustRetType = true;		AdjustRetType = true;
}		}
}		}

unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;		unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;
SmallVector<SDValue, 4> VAddrs;		SmallVector<SDValue, 4> VAddrs;

// Optimize _L to _LZ when _L is zero		// Optimize _L to _LZ when _L is zero
arsenmUnsubmitted Done Reply Inline Actions I would split the codegen change into a separate patch arsenm: I would split the codegen change into a separate patch
if (LZMappingInfo) {		if (LZMappingInfo) {
if (auto *ConstantLod = dyn_cast<ConstantFPSDNode>(		if (auto *ConstantLod = dyn_cast<ConstantFPSDNode>(
Op.getOperand(ArgOffset + Intr->LodIndex))) {		Op.getOperand(ArgOffset + Intr->LodIndex))) {
if (ConstantLod->isZero() \|\| ConstantLod->isNegative()) {		if (ConstantLod->isZero() \|\| ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l		IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
VAddrEnd--; // remove 'lod'		VAddrEnd--; // remove 'lod'
}		}
}		}
}		}

// Optimize _mip away, when 'lod' is zero		// Optimize _mip away, when 'lod' is zero
if (MIPMappingInfo) {		if (MIPMappingInfo) {
if (auto *ConstantLod = dyn_cast<ConstantSDNode>(		if (auto *ConstantLod = dyn_cast<ConstantSDNode>(
Op.getOperand(ArgOffset + Intr->MipIndex))) {		Op.getOperand(ArgOffset + Intr->MipIndex))) {
if (ConstantLod->isZero()) {		if (ConstantLod->isZero()) {
IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip		IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
VAddrEnd--; // remove 'mip'		VAddrEnd--; // remove 'mip'
}		}
}		}
}		}

		// Optimize _B away, when the 'bias' is zero
		bool RemoveBias = false;
		if (BiasMappingInfo) {
		if (auto *ConstantBias = dyn_cast<ConstantFPSDNode>(
		Op.getOperand(ArgOffset + Intr->BiasIndex))) {
		if (ConstantBias->isZero()) {
		// set new opcode to variant without _b
		IntrOpcode = BiasMappingInfo->NoBias;
		RemoveBias = true;
		}
		}
		}

// Check for 16 bit addresses or derivatives and pack if true.		// Check for 16 bit addresses or derivatives and pack if true.
MVT VAddrVT =		MVT VAddrVT =
Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();		Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
MVT VAddrScalarVT = VAddrVT.getScalarType();		MVT VAddrScalarVT = VAddrVT.getScalarType();
MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;		MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
IsG16 = VAddrScalarVT == MVT::f16 \|\| VAddrScalarVT == MVT::i16;		IsG16 = VAddrScalarVT == MVT::f16 \|\| VAddrScalarVT == MVT::i16;

VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType();		VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType();
VAddrScalarVT = VAddrVT.getScalarType();		VAddrScalarVT = VAddrVT.getScalarType();
MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;		MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
IsA16 = VAddrScalarVT == MVT::f16 \|\| VAddrScalarVT == MVT::i16;		IsA16 = VAddrScalarVT == MVT::f16 \|\| VAddrScalarVT == MVT::i16;

// Push back extra arguments.		// Push back extra arguments.
for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {		for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
		if (RemoveBias && I == Intr->BiasIndex)
		continue;

if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {		if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");		assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
// Special handling of bias when A16 is on. Bias is of type half but		// Special handling of bias when A16 is on. Bias is of type half but
// occupies full 32-bit.		// occupies full 32-bit.
SDValue Bias = DAG.getBuildVector(		SDValue Bias = DAG.getBuildVector(
MVT::v2f16, DL,		MVT::v2f16, DL,
{Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});		{Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
VAddrs.push_back(Bias);		VAddrs.push_back(Bias);
▲ Show 20 Lines • Show All 6,191 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	struct GcnBufferFormatInfo {
unsigned DataFormat;		unsigned DataFormat;
};		};

#define GET_MIMGBaseOpcode_DECL		#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL		#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL		#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL		#define GET_MIMGLZMapping_DECL
#define GET_MIMGMIPMapping_DECL		#define GET_MIMGMIPMapping_DECL
		#define GET_MIMGBiASMapping_DECL
#include "AMDGPUGenSearchableTables.inc"		#include "AMDGPUGenSearchableTables.inc"

namespace IsaInfo {		namespace IsaInfo {

enum {		enum {
// The closed Vulkan driver sets 96, which limits the wave count to 8 but		// The closed Vulkan driver sets 96, which limits the wave count to 8 but
// doesn't spill SGPRs as much as when 80 is set.		// doesn't spill SGPRs as much as when 80 is set.
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,		FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
▲ Show 20 Lines • Show All 250 Lines • ▼ Show 20 Lines	struct MIMGLZMappingInfo {
MIMGBaseOpcode LZ;		MIMGBaseOpcode LZ;
};		};

struct MIMGMIPMappingInfo {		struct MIMGMIPMappingInfo {
MIMGBaseOpcode MIP;		MIMGBaseOpcode MIP;
MIMGBaseOpcode NONMIP;		MIMGBaseOpcode NONMIP;
};		};

		struct MIMGBiasMappingInfo {
		MIMGBaseOpcode Bias;
		MIMGBaseOpcode NoBias;
		};

struct MIMGG16MappingInfo {		struct MIMGG16MappingInfo {
MIMGBaseOpcode G;		MIMGBaseOpcode G;
MIMGBaseOpcode G16;		MIMGBaseOpcode G16;
};		};

LLVM_READONLY		LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);		const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);

LLVM_READONLY		LLVM_READONLY
const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);		const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);

LLVM_READONLY		LLVM_READONLY
		const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);

		LLVM_READONLY
const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);		const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);

LLVM_READONLY		LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,		int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);		unsigned VDataDwords, unsigned VAddrDwords);

LLVM_READONLY		LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);		int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
▲ Show 20 Lines • Show All 669 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines	bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
return isHsaAbiVersion3(STI) \|\| isHsaAbiVersion4(STI);		return isHsaAbiVersion3(STI) \|\| isHsaAbiVersion4(STI);
}		}

#define GET_MIMGBaseOpcodesTable_IMPL		#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL		#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL		#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL		#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL		#define GET_MIMGMIPMappingTable_IMPL
		#define GET_MIMGBiasMappingTable_IMPL
#define GET_MIMGG16MappingTable_IMPL		#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"		#include "AMDGPUGenSearchableTables.inc"

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,		int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords) {		unsigned VDataDwords, unsigned VAddrDwords) {
const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,		const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
VDataDwords, VAddrDwords);		VDataDwords, VAddrDwords);
return Info ? Info->Opcode : -1;		return Info ? Info->Opcode : -1;
▲ Show 20 Lines • Show All 1,915 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.bias_zero.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -global-isel -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s		; RUN: llc -march=amdgcn -global-isel -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s

define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {		define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GCN-LABEL: sample_b_1d:		; GCN-LABEL: sample_b_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GCN-LABEL: sample_b_2d:		; GCN-LABEL: sample_b_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v0
; GCN-NEXT: v_mov_b32_e32 v3, v1
; GCN-NEXT: v_bfrev_b32_e32 v1, 1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {		define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GCN-LABEL: sample_c_b_1d:		; GCN-LABEL: sample_c_b_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v0
; GCN-NEXT: v_mov_b32_e32 v3, v1
; GCN-NEXT: v_bfrev_b32_e32 v1, 1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GCN-LABEL: sample_c_b_2d:		; GCN-LABEL: sample_c_b_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v0
; GCN-NEXT: v_mov_b32_e32 v4, v1
; GCN-NEXT: v_mov_b32_e32 v5, v2
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {		define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
; GCN-LABEL: sample_b_o_1d:		; GCN-LABEL: sample_b_o_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
; GCN-LABEL: sample_b_o_2d:		; GCN-LABEL: sample_b_o_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v0
; GCN-NEXT: v_mov_b32_e32 v5, v1
; GCN-NEXT: v_mov_b32_e32 v6, v2
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b_o v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {		define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
; GCN-LABEL: sample_c_b_o_1d:		; GCN-LABEL: sample_c_b_o_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v0
; GCN-NEXT: v_mov_b32_e32 v5, v1
; GCN-NEXT: v_mov_b32_e32 v6, v2
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_o v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
; GCN-LABEL: sample_c_b_o_2d:		; GCN-LABEL: sample_c_b_o_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v4, v0
; GCN-NEXT: v_mov_b32_e32 v6, v1
; GCN-NEXT: v_mov_b32_e32 v7, v2
; GCN-NEXT: v_mov_b32_e32 v8, v3
; GCN-NEXT: v_mov_b32_e32 v5, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_o v[0:3], v[4:8], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {		define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	main_body:
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_o_a16_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_b_o_a16_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {
; GCN-LABEL: sample_c_b_o_a16_2d:		; GCN-LABEL: sample_c_b_o_a16_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v4, v2		; GCN-NEXT: v_mov_b32_e32 v4, 0xffff
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0xffff
; GCN-NEXT: v_lshlrev_b32_e32 v3, 16, v3		; GCN-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GCN-NEXT: v_and_or_b32 v3, v4, v1, v3		; GCN-NEXT: v_and_or_b32 v2, v2, v4, v3
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_1d_pos(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {		define amdgpu_ps <4 x float> @sample_b_1d_pos(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.bias_zero.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s		; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s

define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {		define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GCN-LABEL: sample_b_1d:		; GCN-LABEL: sample_b_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GCN-LABEL: sample_b_2d:		; GCN-LABEL: sample_b_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: v_bfrev_b32_e32 v0, 1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {		define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GCN-LABEL: sample_c_b_1d:		; GCN-LABEL: sample_c_b_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: v_bfrev_b32_e32 v0, 1
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GCN-LABEL: sample_c_b_2d:		; GCN-LABEL: sample_c_b_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {		define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
; GCN-LABEL: sample_b_o_1d:		; GCN-LABEL: sample_b_o_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
; GCN-LABEL: sample_b_o_2d:		; GCN-LABEL: sample_b_o_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {		define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
; GCN-LABEL: sample_c_b_o_1d:		; GCN-LABEL: sample_c_b_o_1d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {		define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
; GCN-LABEL: sample_c_b_o_2d:		; GCN-LABEL: sample_c_b_o_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v4, v3
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf		; GCN-NEXT: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {		define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	main_body:
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_c_b_o_a16_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {		define amdgpu_ps <4 x float> @sample_c_b_o_a16_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {
; GCN-LABEL: sample_c_b_o_a16_2d:		; GCN-LABEL: sample_c_b_o_a16_2d:
; GCN: ; %bb.0: ; %main_body		; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: s_mov_b64 s[12:13], exec		; GCN-NEXT: s_mov_b64 s[12:13], exec
; GCN-NEXT: s_wqm_b64 exec, exec		; GCN-NEXT: s_wqm_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v4, v2		; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1		; GCN-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v4
; GCN-NEXT: v_lshl_or_b32 v3, v3, 16, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_and_b64 exec, exec, s[12:13]		; GCN-NEXT: s_and_b64 exec, exec, s[12:13]
; GCN-NEXT: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16		; GCN-NEXT: image_sample_c_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GCN-NEXT: s_waitcnt vmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ; return to shader part epilog		; GCN-NEXT: ; return to shader part epilog
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v		ret <4 x float> %v
}		}

define amdgpu_ps <4 x float> @sample_b_1d_pos(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {		define amdgpu_ps <4 x float> @sample_b_1d_pos(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][InstCombine] Remove zero LOD bias
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 395458

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/MIMGInstructions.td

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.bias_zero.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.bias_zero.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][InstCombine] Remove zero LOD biasClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 395458

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/MIMGInstructions.td

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.bias_zero.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.bias_zero.ll

[AMDGPU][InstCombine] Remove zero LOD bias
ClosedPublic