Diff 361108

llvm/lib/Target/AMDGPU/AMDGPU.td

	Show First 20 Lines • Show All 623 Lines • ▼ Show 20 Lines
	>;			>;

	def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",			def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
	"HasNoDataDepHazard",			"HasNoDataDepHazard",
	"true",			"true",
	"Does not need SW waitstates"			"Does not need SW waitstates"
	>;			>;

				class SubtargetFeatureNSAMaxSize <int Value> : SubtargetFeature <
				"nsa-max-size-"#Value,
				"NSAMaxSize",
				!cast<string>(Value),
				"The maximum non-sequential address size in VGPRs."
				>;
				foadUnsubmitted Not Done Reply Inline Actions It seems a bit odd to define it like this gives the impression that the limit varies on different subtargets by design, which it doesn't really. The only reason to limit it to 5 is to "avoid stability issues" (i.e. work around hardware bugs). But I'm not sure what else to suggest. foad: It seems a bit odd to define it like this gives the impression that the limit varies on…
				critsonAuthorUnsubmitted Done Reply Inline Actions I agree it is a bit odd, but does provide us flexibility if we need to change the limit again for other hardware issues. critson: I agree it is a bit odd, but does provide us flexibility if we need to change the limit again…
				foadUnsubmitted Not Done Reply Inline Actions Fair enough foad: Fair enough

				def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>;
				def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>;

	//===------------------------------------------------------------===//			//===------------------------------------------------------------===//
	// Subtarget Features (options and debugging)			// Subtarget Features (options and debugging)
	//===------------------------------------------------------------===//			//===------------------------------------------------------------===//

	class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<			class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
	"max-private-element-size-"#size,			"max-private-element-size-"#size,
	"MaxPrivateElementSize",			"MaxPrivateElementSize",
	!cast<string>(size),			!cast<string>(size),
	▲ Show 20 Lines • Show All 386 Lines • ▼ Show 20 Lines
	}			}

	def FeatureISAVersion10_1_0 : FeatureSet<			def FeatureISAVersion10_1_0 : FeatureSet<
	!listconcat(FeatureGroup.GFX10_1_Bugs,			!listconcat(FeatureGroup.GFX10_1_Bugs,
	[FeatureGFX10,			[FeatureGFX10,
	FeatureLDSBankCount32,			FeatureLDSBankCount32,
	FeatureDLInsts,			FeatureDLInsts,
	FeatureNSAEncoding,			FeatureNSAEncoding,
				FeatureNSAMaxSize5,
	FeatureWavefrontSize32,			FeatureWavefrontSize32,
	FeatureScalarStores,			FeatureScalarStores,
	FeatureScalarAtomics,			FeatureScalarAtomics,
	FeatureScalarFlatScratchInsts,			FeatureScalarFlatScratchInsts,
	FeatureGetWaveIdInst,			FeatureGetWaveIdInst,
	FeatureMadMacF32Insts,			FeatureMadMacF32Insts,
	FeatureDsSrc2Insts,			FeatureDsSrc2Insts,
	FeatureLdsMisalignedBug,			FeatureLdsMisalignedBug,
	FeatureSupportsXNACK])>;			FeatureSupportsXNACK])>;

	def FeatureISAVersion10_1_1 : FeatureSet<			def FeatureISAVersion10_1_1 : FeatureSet<
	!listconcat(FeatureGroup.GFX10_1_Bugs,			!listconcat(FeatureGroup.GFX10_1_Bugs,
	[FeatureGFX10,			[FeatureGFX10,
	FeatureLDSBankCount32,			FeatureLDSBankCount32,
	FeatureDLInsts,			FeatureDLInsts,
	FeatureDot1Insts,			FeatureDot1Insts,
	FeatureDot2Insts,			FeatureDot2Insts,
	FeatureDot5Insts,			FeatureDot5Insts,
	FeatureDot6Insts,			FeatureDot6Insts,
	FeatureDot7Insts,			FeatureDot7Insts,
	FeatureNSAEncoding,			FeatureNSAEncoding,
				FeatureNSAMaxSize5,
	FeatureWavefrontSize32,			FeatureWavefrontSize32,
	FeatureScalarStores,			FeatureScalarStores,
	FeatureScalarAtomics,			FeatureScalarAtomics,
	FeatureScalarFlatScratchInsts,			FeatureScalarFlatScratchInsts,
	FeatureGetWaveIdInst,			FeatureGetWaveIdInst,
	FeatureMadMacF32Insts,			FeatureMadMacF32Insts,
	FeatureDsSrc2Insts,			FeatureDsSrc2Insts,
	FeatureLdsMisalignedBug,			FeatureLdsMisalignedBug,
	FeatureSupportsXNACK])>;			FeatureSupportsXNACK])>;

	def FeatureISAVersion10_1_2 : FeatureSet<			def FeatureISAVersion10_1_2 : FeatureSet<
	!listconcat(FeatureGroup.GFX10_1_Bugs,			!listconcat(FeatureGroup.GFX10_1_Bugs,
	[FeatureGFX10,			[FeatureGFX10,
	FeatureLDSBankCount32,			FeatureLDSBankCount32,
	FeatureDLInsts,			FeatureDLInsts,
	FeatureDot1Insts,			FeatureDot1Insts,
	FeatureDot2Insts,			FeatureDot2Insts,
	FeatureDot5Insts,			FeatureDot5Insts,
	FeatureDot6Insts,			FeatureDot6Insts,
	FeatureDot7Insts,			FeatureDot7Insts,
	FeatureNSAEncoding,			FeatureNSAEncoding,
				FeatureNSAMaxSize5,
	FeatureWavefrontSize32,			FeatureWavefrontSize32,
	FeatureScalarStores,			FeatureScalarStores,
	FeatureScalarAtomics,			FeatureScalarAtomics,
	FeatureScalarFlatScratchInsts,			FeatureScalarFlatScratchInsts,
	FeatureGetWaveIdInst,			FeatureGetWaveIdInst,
	FeatureMadMacF32Insts,			FeatureMadMacF32Insts,
	FeatureDsSrc2Insts,			FeatureDsSrc2Insts,
	FeatureLdsMisalignedBug,			FeatureLdsMisalignedBug,
	FeatureSupportsXNACK])>;			FeatureSupportsXNACK])>;

	def FeatureISAVersion10_1_3 : FeatureSet<			def FeatureISAVersion10_1_3 : FeatureSet<
	!listconcat(FeatureGroup.GFX10_1_Bugs,			!listconcat(FeatureGroup.GFX10_1_Bugs,
	[FeatureGFX10,			[FeatureGFX10,
	FeatureGFX10_AEncoding,			FeatureGFX10_AEncoding,
	FeatureLDSBankCount32,			FeatureLDSBankCount32,
	FeatureDLInsts,			FeatureDLInsts,
	FeatureNSAEncoding,			FeatureNSAEncoding,
				FeatureNSAMaxSize5,
	FeatureWavefrontSize32,			FeatureWavefrontSize32,
	FeatureScalarStores,			FeatureScalarStores,
	FeatureScalarAtomics,			FeatureScalarAtomics,
	FeatureScalarFlatScratchInsts,			FeatureScalarFlatScratchInsts,
	FeatureGetWaveIdInst,			FeatureGetWaveIdInst,
	FeatureMadMacF32Insts,			FeatureMadMacF32Insts,
	FeatureDsSrc2Insts,			FeatureDsSrc2Insts,
	FeatureLdsMisalignedBug,			FeatureLdsMisalignedBug,
	FeatureSupportsXNACK])>;			FeatureSupportsXNACK])>;

	def FeatureISAVersion10_3_0 : FeatureSet<			def FeatureISAVersion10_3_0 : FeatureSet<
	[FeatureGFX10,			[FeatureGFX10,
	FeatureGFX10_AEncoding,			FeatureGFX10_AEncoding,
	FeatureGFX10_BEncoding,			FeatureGFX10_BEncoding,
	FeatureGFX10_3Insts,			FeatureGFX10_3Insts,
	FeatureLDSBankCount32,			FeatureLDSBankCount32,
	FeatureDLInsts,			FeatureDLInsts,
	FeatureDot1Insts,			FeatureDot1Insts,
	FeatureDot2Insts,			FeatureDot2Insts,
	FeatureDot5Insts,			FeatureDot5Insts,
	FeatureDot6Insts,			FeatureDot6Insts,
	FeatureDot7Insts,			FeatureDot7Insts,
	FeatureNSAEncoding,			FeatureNSAEncoding,
				FeatureNSAMaxSize13,
	FeatureWavefrontSize32,			FeatureWavefrontSize32,
	FeatureShaderCyclesRegister]>;			FeatureShaderCyclesRegister]>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def AMDGPUInstrInfo : InstrInfo {			def AMDGPUInstrInfo : InstrInfo {
	let guessInstructionProperties = 1;			let guessInstructionProperties = 1;
	let noNamedPositionallyEncodedOperands = 1;			let noNamedPositionallyEncodedOperands = 1;
	▲ Show 20 Lines • Show All 375 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 4,147 Lines • ▼ Show 20 Lines	static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI,

for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) {		for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) {
MachineOperand &SrcOp = MI.getOperand(ArgOffset + I);		MachineOperand &SrcOp = MI.getOperand(ArgOffset + I);
if (!SrcOp.isReg())		if (!SrcOp.isReg())
continue; // _L to _LZ may have eliminated this.		continue; // _L to _LZ may have eliminated this.

Register AddrReg = SrcOp.getReg();		Register AddrReg = SrcOp.getReg();

if (I < Intr->GradientStart) {		if ((I < Intr->GradientStart) \|\|
AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);		(I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) \|\|
PackedAddrs.push_back(AddrReg);
} else if ((I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) \|\|
(I >= Intr->CoordStart && !IsA16)) {		(I >= Intr->CoordStart && !IsA16)) {
// Handle any gradient or coordinate operands that should not be packed		// Handle any gradient or coordinate operands that should not be packed
		AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
PackedAddrs.push_back(AddrReg);		PackedAddrs.push_back(AddrReg);
		foadUnsubmitted Not Done Reply Inline Actions This part looks like a separate clean-up or latent bug fix? foad: This part looks like a separate clean-up or latent bug fix?
		critsonAuthorUnsubmitted Done Reply Inline Actions It is a fix for a bug triggered by changes in this diff. I can move it to another review, but I do not think I'll be able to build a test case for it. critson: It is a fix for a bug triggered by changes in this diff. I can move it to another review, but I…
		foadUnsubmitted Not Done Reply Inline Actions Fair enough foad: Fair enough
} else {		} else {
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,		// Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
// derivatives dx/dh and dx/dv are packed with undef.		// derivatives dx/dh and dx/dv are packed with undef.
if (((I + 1) >= EndIdx) \|\|		if (((I + 1) >= EndIdx) \|\|
((Intr->NumGradients / 2) % 2 == 1 &&		((Intr->NumGradients / 2) % 2 == 1 &&
(I == static_cast<unsigned>(Intr->GradientStart +		(I == static_cast<unsigned>(Intr->GradientStart +
(Intr->NumGradients / 2) - 1) \|\|		(Intr->NumGradients / 2) - 1) \|\|
I == static_cast<unsigned>(Intr->GradientStart +		I == static_cast<unsigned>(Intr->GradientStart +
Show All 10 Lines	if ((I < Intr->GradientStart) \|\|
.getReg(0));		.getReg(0));
++I;		++I;
}		}
}		}
}		}
}		}

/// Convert from separate vaddr components to a single vector address register,		/// Convert from separate vaddr components to a single vector address register,
/// and replace the remaining operands with $noreg.		/// and replace the remaining operands with $noreg.
		arsenmUnsubmitted Done Reply Inline Actions Why does this need to round up? We should be able to directly handle non powers of 2 arsenm: Why does this need to round up? We should be able to directly handle non powers of 2
		critsonAuthorUnsubmitted Done Reply Inline Actions I think the point after which this needs to round up should be higher, i.e. 6 instead of 4. The problem I am hitting is that it seems instruction definitions for the appropriate sizes are missing at the moment in a lot of cases. Perhaps the MIMG definitions predate when we added VReg_160 / VReg_192? I guess I can try and fix those first. However I think we will have to round up for larger vector sizes as we do not have arbitrary register sizes beyond VReg_192? There are plenty of instructions which take arbitrary sizes beyond 6 VGPRs, so can only be defined with a single VReg_256/VReg_512 argument. critson: I think the point after which this needs to round up should be higher, i.e. 6 instead of 4. The…
		arsenmUnsubmitted Done Reply Inline Actions We should add the register classes eventually. If we are going to workaround the missing classes, this isn't the place for it. The selector can fixup the classes when we actually need to pick a class arsenm: We should add the register classes eventually. If we are going to workaround the missing…
		critsonAuthorUnsubmitted Done Reply Inline Actions I have moved this to the selector, but it seems kind of messy -- which makes me suspect I am doing it wrong. critson: I have moved this to the selector, but it seems kind of messy -- which makes me suspect I am…
static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,		static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
int DimIdx, int NumVAddrs) {		int DimIdx, int NumVAddrs) {
const LLT S32 = LLT::scalar(32);		const LLT S32 = LLT::scalar(32);

SmallVector<Register, 8> AddrRegs;		SmallVector<Register, 8> AddrRegs;
for (int I = 0; I != NumVAddrs; ++I) {		for (int I = 0; I != NumVAddrs; ++I) {
MachineOperand &SrcOp = MI.getOperand(DimIdx + I);		MachineOperand &SrcOp = MI.getOperand(DimIdx + I);
if (SrcOp.isReg()) {		if (SrcOp.isReg()) {
AddrRegs.push_back(SrcOp.getReg());		AddrRegs.push_back(SrcOp.getReg());
assert(B.getMRI()->getType(SrcOp.getReg()) == S32);		assert(B.getMRI()->getType(SrcOp.getReg()) == S32);
}		}
}		}

int NumAddrRegs = AddrRegs.size();		int NumAddrRegs = AddrRegs.size();
if (NumAddrRegs != 1) {		if (NumAddrRegs != 1) {
// Round up to 8 elements for v5-v7		// Above 8 elements round up to next power of 2 (i.e. 16).
// FIXME: Missing intermediate sized register classes and instructions.		if (NumAddrRegs > 8 && !isPowerOf2_32(NumAddrRegs)) {
if (NumAddrRegs > 4 && !isPowerOf2_32(NumAddrRegs)) {
const int RoundedNumRegs = NextPowerOf2(NumAddrRegs);		const int RoundedNumRegs = NextPowerOf2(NumAddrRegs);
auto Undef = B.buildUndef(S32);		auto Undef = B.buildUndef(S32);
AddrRegs.append(RoundedNumRegs - NumAddrRegs, Undef.getReg(0));		AddrRegs.append(RoundedNumRegs - NumAddrRegs, Undef.getReg(0));
NumAddrRegs = RoundedNumRegs;		NumAddrRegs = RoundedNumRegs;
}		}

auto VAddr =		auto VAddr =
B.buildBuildVector(LLT::fixed_vector(NumAddrRegs, 32), AddrRegs);		B.buildBuildVector(LLT::fixed_vector(NumAddrRegs, 32), AddrRegs);
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines	bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
if (IsA16 \|\| IsG16) {		if (IsA16 \|\| IsG16) {
if (Intr->NumVAddrs > 1) {		if (Intr->NumVAddrs > 1) {
SmallVector<Register, 4> PackedRegs;		SmallVector<Register, 4> PackedRegs;

packImage16bitOpsToDwords(B, MI, PackedRegs, ArgOffset, Intr, IsA16,		packImage16bitOpsToDwords(B, MI, PackedRegs, ArgOffset, Intr, IsA16,
IsG16);		IsG16);

// See also below in the non-a16 branch		// See also below in the non-a16 branch
const bool UseNSA = PackedRegs.size() >= 3 && ST.hasNSAEncoding();		const bool UseNSA = ST.hasNSAEncoding() && PackedRegs.size() >= 3 &&
		PackedRegs.size() <= ST.getNSAMaxSize();
		foadUnsubmitted Done Reply Inline Actions Don't need the cast foad: Don't need the cast

if (!UseNSA && PackedRegs.size() > 1) {		if (!UseNSA && PackedRegs.size() > 1) {
LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16);		LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16);
auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs);		auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs);
PackedRegs[0] = Concat.getReg(0);		PackedRegs[0] = Concat.getReg(0);
PackedRegs.resize(1);		PackedRegs.resize(1);
}		}

Show All 20 Lines	if (IsA16 \|\| IsG16) {
// wash in terms of code size or even better.		// wash in terms of code size or even better.
//		//
// However, we currently have no way of hinting to the register allocator		// However, we currently have no way of hinting to the register allocator
// that MIMG addresses should be placed contiguously when it is possible to		// that MIMG addresses should be placed contiguously when it is possible to
// do so, so force non-NSA for the common 2-address case as a heuristic.		// do so, so force non-NSA for the common 2-address case as a heuristic.
//		//
// SIShrinkInstructions will convert NSA encodings to non-NSA after register		// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.		// allocation when possible.
const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding();		const bool UseNSA = ST.hasNSAEncoding() && CorrectedNumVAddrs >= 3 &&
		CorrectedNumVAddrs <= ST.getNSAMaxSize();
		foadUnsubmitted Done Reply Inline Actions Don't need the cast (that was the whole point of making it unsigned). foad: Don't need the cast (that was the whole point of making it unsigned).

if (!UseNSA && Intr->NumVAddrs > 1)		if (!UseNSA && Intr->NumVAddrs > 1)
convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart,		convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart,
Intr->NumVAddrs);		Intr->NumVAddrs);
}		}

int Flags = 0;		int Flags = 0;
if (IsA16)		if (IsA16)
▲ Show 20 Lines • Show All 617 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines	GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDPP8(false),		HasDPP8(false),
Has64BitDPP(false),		Has64BitDPP(false),
HasPackedFP32Ops(false),		HasPackedFP32Ops(false),
HasExtendedImageInsts(false),		HasExtendedImageInsts(false),
HasR128A16(false),		HasR128A16(false),
HasGFX10A16(false),		HasGFX10A16(false),
HasG16(false),		HasG16(false),
HasNSAEncoding(false),		HasNSAEncoding(false),
		NSAMaxSize(0),
GFX10_AEncoding(false),		GFX10_AEncoding(false),
GFX10_BEncoding(false),		GFX10_BEncoding(false),
HasDLInsts(false),		HasDLInsts(false),
HasDot1Insts(false),		HasDot1Insts(false),
HasDot2Insts(false),		HasDot2Insts(false),
HasDot3Insts(false),		HasDot3Insts(false),
HasDot4Insts(false),		HasDot4Insts(false),
HasDot5Insts(false),		HasDot5Insts(false),
▲ Show 20 Lines • Show All 876 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Show First 20 Lines • Show All 130 Lines • ▼ Show 20 Lines	protected:
bool HasDPP8;		bool HasDPP8;
bool Has64BitDPP;		bool Has64BitDPP;
bool HasPackedFP32Ops;		bool HasPackedFP32Ops;
bool HasExtendedImageInsts;		bool HasExtendedImageInsts;
bool HasR128A16;		bool HasR128A16;
bool HasGFX10A16;		bool HasGFX10A16;
bool HasG16;		bool HasG16;
bool HasNSAEncoding;		bool HasNSAEncoding;
		unsigned NSAMaxSize;
		foadUnsubmitted Done Reply Inline Actions Is there a reason this can't be unsigned? foad: Is there a reason this can't be unsigned?
		critsonAuthorUnsubmitted Done Reply Inline Actions The tablegen part needs to be int, but this can be unsigned. critson: The tablegen part needs to be int, but this can be unsigned.
bool GFX10_AEncoding;		bool GFX10_AEncoding;
bool GFX10_BEncoding;		bool GFX10_BEncoding;
bool HasDLInsts;		bool HasDLInsts;
bool HasDot1Insts;		bool HasDot1Insts;
bool HasDot2Insts;		bool HasDot2Insts;
bool HasDot3Insts;		bool HasDot3Insts;
bool HasDot4Insts;		bool HasDot4Insts;
bool HasDot5Insts;		bool HasDot5Insts;
▲ Show 20 Lines • Show All 726 Lines • ▼ Show 20 Lines	public:
}		}

bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }		bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }

bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }		bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }

bool hasNSAEncoding() const { return HasNSAEncoding; }		bool hasNSAEncoding() const { return HasNSAEncoding; }

		unsigned getNSAMaxSize() const { return NSAMaxSize; }

bool hasGFX10_AEncoding() const {		bool hasGFX10_AEncoding() const {
return GFX10_AEncoding;		return GFX10_AEncoding;
}		}

bool hasGFX10_BEncoding() const {		bool hasGFX10_BEncoding() const {
return GFX10_BEncoding;		return GFX10_BEncoding;
}		}

▲ Show 20 Lines • Show All 298 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,205 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::lowerImage(SDValue Op,
// wash in terms of code size or even better.		// wash in terms of code size or even better.
//		//
// However, we currently have no way of hinting to the register allocator that		// However, we currently have no way of hinting to the register allocator that
// MIMG addresses should be placed contiguously when it is possible to do so,		// MIMG addresses should be placed contiguously when it is possible to do so,
// so force non-NSA for the common 2-address case as a heuristic.		// so force non-NSA for the common 2-address case as a heuristic.
//		//
// SIShrinkInstructions will convert NSA encodings to non-NSA after register		// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.		// allocation when possible.
bool UseNSA =		bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) &&
ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3;		VAddrs.size() >= 3 &&
		VAddrs.size() <= (unsigned)ST->getNSAMaxSize();
		foadUnsubmitted Not Done Reply Inline Actions Don't need the cast. foad: Don't need the cast.
SDValue VAddr;		SDValue VAddr;
if (!UseNSA)		if (!UseNSA)
VAddr = getBuildDwordsVector(DAG, DL, VAddrs);		VAddr = getBuildDwordsVector(DAG, DL, VAddrs);

SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);		SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);		SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
SDValue Unorm;		SDValue Unorm;
if (!BaseOpcode->Sampler) {		if (!BaseOpcode->Sampler) {
▲ Show 20 Lines • Show All 6,043 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,663 Lines • ▼ Show 20 Lines	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)		; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)		; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
; GFX10: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)		; GFX10: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
; GFX10: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)		; GFX10: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
▲ Show 20 Lines • Show All 1,825 Lines • ▼ Show 20 Lines	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)		; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)		; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)		; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)		; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource")		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource")
; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)		; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:		main_body:
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret float %v		ret float %v
}		}

define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {		define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)		; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)		; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)		; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)		; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
main_body:		main_body:
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <2 x float> %v		ret <2 x float> %v
}		}
▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll

Show All 21 Lines	define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0		; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1		; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2		; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 25 Lines	define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 33 Lines	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[DEF]](s32)
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)		; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32)
		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 23 Lines	define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2		; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 27 Lines	define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 22 Lines	define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1		; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2		; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 26 Lines	define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 24 Lines	define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 28 Lines	define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 21 Lines	define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0		; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1		; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2		; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 25 Lines	define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 23 Lines	define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2		; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 27 Lines	define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 22 Lines	define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1		; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2		; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 26 Lines	define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 24 Lines	define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3		; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4		; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF		; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 28 Lines	define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7		; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)		; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32)
		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: $vgpr2 = COPY [[UV2]](s32)		; GFX10: $vgpr2 = COPY [[UV2]](s32)
; GFX10: $vgpr3 = COPY [[UV3]](s32)		; GFX10: $vgpr3 = COPY [[UV3]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:		main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Show All 30 Lines	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)		; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource")		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
		; GFX10: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32)
		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource")
; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)		; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:		main_body:
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret float %v		ret float %v
}		}

define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {		define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
Show All 26 Lines	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)		; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)		; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)		; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)		; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)		; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)		; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)		; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")		; GFX10: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32)
		; GFX10: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32)
		; GFX10: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32)
		; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
		; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)		; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)		; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)		; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1		; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
main_body:		main_body:
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)		%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <2 x float> %v		ret <2 x float> %v
}		}
Show All 26 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll

	Show First 20 Lines • Show All 499 Lines • ▼ Show 20 Lines
	; GFX6-NEXT: s_mov_b32 s7, s9			; GFX6-NEXT: s_mov_b32 s7, s9
	; GFX6-NEXT: s_mov_b32 s8, s10			; GFX6-NEXT: s_mov_b32 s8, s10
	; GFX6-NEXT: s_mov_b32 s9, s11			; GFX6-NEXT: s_mov_b32 s9, s11
	; GFX6-NEXT: s_mov_b64 s[14:15], exec			; GFX6-NEXT: s_mov_b64 s[14:15], exec
	; GFX6-NEXT: s_mov_b32 s10, s12			; GFX6-NEXT: s_mov_b32 s10, s12
	; GFX6-NEXT: s_mov_b32 s11, s13			; GFX6-NEXT: s_mov_b32 s11, s13
	; GFX6-NEXT: s_wqm_b64 exec, exec			; GFX6-NEXT: s_wqm_b64 exec, exec
	; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]			; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
	; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1			; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
	; GFX6-NEXT: s_waitcnt vmcnt(0)			; GFX6-NEXT: s_waitcnt vmcnt(0)
	; GFX6-NEXT: ; return to shader part epilog			; GFX6-NEXT: ; return to shader part epilog
	;			;
	; GFX10NSA-LABEL: gather4_c_b_cl_2d:			; GFX10NSA-LABEL: gather4_c_b_cl_2d:
	; GFX10NSA: ; %bb.0: ; %main_body			; GFX10NSA: ; %bb.0: ; %main_body
	; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo			; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo
	; GFX10NSA-NEXT: s_mov_b32 s0, s2			; GFX10NSA-NEXT: s_mov_b32 s0, s2
	; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo			; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
	▲ Show 20 Lines • Show All 342 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll

	Show First 20 Lines • Show All 155 Lines • ▼ Show 20 Lines
	; GFX6-NEXT: s_mov_b32 s7, s9			; GFX6-NEXT: s_mov_b32 s7, s9
	; GFX6-NEXT: s_mov_b32 s8, s10			; GFX6-NEXT: s_mov_b32 s8, s10
	; GFX6-NEXT: s_mov_b32 s9, s11			; GFX6-NEXT: s_mov_b32 s9, s11
	; GFX6-NEXT: s_mov_b64 s[14:15], exec			; GFX6-NEXT: s_mov_b64 s[14:15], exec
	; GFX6-NEXT: s_mov_b32 s10, s12			; GFX6-NEXT: s_mov_b32 s10, s12
	; GFX6-NEXT: s_mov_b32 s11, s13			; GFX6-NEXT: s_mov_b32 s11, s13
	; GFX6-NEXT: s_wqm_b64 exec, exec			; GFX6-NEXT: s_wqm_b64 exec, exec
	; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]			; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
	; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1			; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
	; GFX6-NEXT: s_waitcnt vmcnt(0)			; GFX6-NEXT: s_waitcnt vmcnt(0)
	; GFX6-NEXT: ; return to shader part epilog			; GFX6-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: gather4_c_cl_o_2d:			; GFX10-LABEL: gather4_c_cl_o_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: s_mov_b32 s1, exec_lo			; GFX10-NEXT: s_mov_b32 s1, exec_lo
	; GFX10-NEXT: s_mov_b32 s0, s2			; GFX10-NEXT: s_mov_b32 s0, s2
	; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo			; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
	▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
	; GFX6-NEXT: s_mov_b32 s7, s9			; GFX6-NEXT: s_mov_b32 s7, s9
	; GFX6-NEXT: s_mov_b32 s8, s10			; GFX6-NEXT: s_mov_b32 s8, s10
	; GFX6-NEXT: s_mov_b32 s9, s11			; GFX6-NEXT: s_mov_b32 s9, s11
	; GFX6-NEXT: s_mov_b64 s[14:15], exec			; GFX6-NEXT: s_mov_b64 s[14:15], exec
	; GFX6-NEXT: s_mov_b32 s10, s12			; GFX6-NEXT: s_mov_b32 s10, s12
	; GFX6-NEXT: s_mov_b32 s11, s13			; GFX6-NEXT: s_mov_b32 s11, s13
	; GFX6-NEXT: s_wqm_b64 exec, exec			; GFX6-NEXT: s_wqm_b64 exec, exec
	; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]			; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
	; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1			; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
	; GFX6-NEXT: s_waitcnt vmcnt(0)			; GFX6-NEXT: s_waitcnt vmcnt(0)
	; GFX6-NEXT: ; return to shader part epilog			; GFX6-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: gather4_c_b_o_2d:			; GFX10-LABEL: gather4_c_b_o_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: s_mov_b32 s1, exec_lo			; GFX10-NEXT: s_mov_b32 s1, exec_lo
	; GFX10-NEXT: s_mov_b32 s0, s2			; GFX10-NEXT: s_mov_b32 s0, s2
	; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo			; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
	Show All 27 Lines
	; GFX6-NEXT: s_mov_b32 s4, s6			; GFX6-NEXT: s_mov_b32 s4, s6
	; GFX6-NEXT: s_mov_b32 s5, s7			; GFX6-NEXT: s_mov_b32 s5, s7
	; GFX6-NEXT: s_mov_b32 s6, s8			; GFX6-NEXT: s_mov_b32 s6, s8
	; GFX6-NEXT: s_mov_b32 s7, s9			; GFX6-NEXT: s_mov_b32 s7, s9
	; GFX6-NEXT: s_mov_b32 s8, s10			; GFX6-NEXT: s_mov_b32 s8, s10
	; GFX6-NEXT: s_mov_b32 s9, s11			; GFX6-NEXT: s_mov_b32 s9, s11
	; GFX6-NEXT: s_mov_b32 s10, s12			; GFX6-NEXT: s_mov_b32 s10, s12
	; GFX6-NEXT: s_mov_b32 s11, s13			; GFX6-NEXT: s_mov_b32 s11, s13
	; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1			; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
	; GFX6-NEXT: s_waitcnt vmcnt(0)			; GFX6-NEXT: s_waitcnt vmcnt(0)
	; GFX6-NEXT: ; return to shader part epilog			; GFX6-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: gather4_b_cl_o_2d:			; GFX10-LABEL: gather4_b_cl_o_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: s_mov_b32 s0, s2			; GFX10-NEXT: s_mov_b32 s0, s2
	; GFX10-NEXT: s_mov_b32 s1, s3			; GFX10-NEXT: s_mov_b32 s1, s3
	; GFX10-NEXT: s_mov_b32 s2, s4			; GFX10-NEXT: s_mov_b32 s2, s4
	Show All 27 Lines
	; GFX6-NEXT: s_mov_b32 s7, s9			; GFX6-NEXT: s_mov_b32 s7, s9
	; GFX6-NEXT: s_mov_b32 s8, s10			; GFX6-NEXT: s_mov_b32 s8, s10
	; GFX6-NEXT: s_mov_b32 s9, s11			; GFX6-NEXT: s_mov_b32 s9, s11
	; GFX6-NEXT: s_mov_b64 s[14:15], exec			; GFX6-NEXT: s_mov_b64 s[14:15], exec
	; GFX6-NEXT: s_mov_b32 s10, s12			; GFX6-NEXT: s_mov_b32 s10, s12
	; GFX6-NEXT: s_mov_b32 s11, s13			; GFX6-NEXT: s_mov_b32 s11, s13
	; GFX6-NEXT: s_wqm_b64 exec, exec			; GFX6-NEXT: s_wqm_b64 exec, exec
	; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]			; GFX6-NEXT: s_and_b64 exec, exec, s[14:15]
	; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1			; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1
	; GFX6-NEXT: s_waitcnt vmcnt(0)			; GFX6-NEXT: s_waitcnt vmcnt(0)
	; GFX6-NEXT: ; return to shader part epilog			; GFX6-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: gather4_c_b_cl_o_2d:			; GFX10-LABEL: gather4_c_b_cl_o_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: s_mov_b32 s1, exec_lo			; GFX10-NEXT: s_mov_b32 s1, exec_lo
	; GFX10-NEXT: s_mov_b32 s0, s2			; GFX10-NEXT: s_mov_b32 s0, s2
	; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo			; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
	▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines
	; GFX6-NEXT: s_mov_b32 s4, s6			; GFX6-NEXT: s_mov_b32 s4, s6
	; GFX6-NEXT: s_mov_b32 s5, s7			; GFX6-NEXT: s_mov_b32 s5, s7
	; GFX6-NEXT: s_mov_b32 s6, s8			; GFX6-NEXT: s_mov_b32 s6, s8
	; GFX6-NEXT: s_mov_b32 s7, s9			; GFX6-NEXT: s_mov_b32 s7, s9
	; GFX6-NEXT: s_mov_b32 s8, s10			; GFX6-NEXT: s_mov_b32 s8, s10
	; GFX6-NEXT: s_mov_b32 s9, s11			; GFX6-NEXT: s_mov_b32 s9, s11
	; GFX6-NEXT: s_mov_b32 s10, s12			; GFX6-NEXT: s_mov_b32 s10, s12
	; GFX6-NEXT: s_mov_b32 s11, s13			; GFX6-NEXT: s_mov_b32 s11, s13
	; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1			; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
	; GFX6-NEXT: s_waitcnt vmcnt(0)			; GFX6-NEXT: s_waitcnt vmcnt(0)
	; GFX6-NEXT: ; return to shader part epilog			; GFX6-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: gather4_c_l_o_2d:			; GFX10-LABEL: gather4_c_l_o_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: s_mov_b32 s0, s2			; GFX10-NEXT: s_mov_b32 s0, s2
	; GFX10-NEXT: s_mov_b32 s1, s3			; GFX10-NEXT: s_mov_b32 s1, s3
	; GFX10-NEXT: s_mov_b32 s2, s4			; GFX10-NEXT: s_mov_b32 s2, s4
	▲ Show 20 Lines • Show All 113 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll

	Show All 29 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
	; GFX10-LABEL: sample_d_3d:			; GFX10-LABEL: sample_d_3d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v2
	; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1			; GFX10-NEXT: v_mov_b32_e32 v10, v3
				; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff
	; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4			; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
				; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
	; GFX10-NEXT: s_lshl_b32 s12, s0, 16			; GFX10-NEXT: s_lshl_b32 s12, s0, 16
	; GFX10-NEXT: v_and_or_b32 v0, v0, v9, v1			; GFX10-NEXT: v_and_or_b32 v3, v9, v11, s12
	; GFX10-NEXT: v_and_or_b32 v1, v2, v9, s12			; GFX10-NEXT: v_and_or_b32 v4, v10, v11, v4
	; GFX10-NEXT: v_and_or_b32 v2, v3, v9, v4			; GFX10-NEXT: v_and_or_b32 v2, v0, v11, v1
	; GFX10-NEXT: v_and_or_b32 v3, v5, v9, s12			; GFX10-NEXT: v_and_or_b32 v5, v5, v11, s12
	; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D			; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_c_d_cl_2d:			; GFX10-LABEL: sample_c_d_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2			; GFX10-NEXT: v_mov_b32_e32 v9, v3
				; GFX10-NEXT: v_mov_b32_e32 v2, v0
				; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4			; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
	; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2			; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v8
	; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v4			; GFX10-NEXT: v_and_or_b32 v4, v9, v0, v4
	; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_and_or_b32 v3, v1, v0, v3
				; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_c_cd_cl_2d:			; GFX10-LABEL: sample_c_cd_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2			; GFX10-NEXT: v_mov_b32_e32 v9, v3
				; GFX10-NEXT: v_mov_b32_e32 v2, v0
				; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4			; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
	; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2			; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v8
	; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v4			; GFX10-NEXT: v_and_or_b32 v4, v9, v0, v4
	; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_and_or_b32 v3, v1, v0, v3
				; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V1:			; GFX10-LABEL: sample_c_d_o_2darray_V1:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v3
	; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3			; GFX10-NEXT: v_mov_b32_e32 v10, v2
				; GFX10-NEXT: v_mov_b32_e32 v2, v0
				; GFX10-NEXT: v_mov_b32_e32 v3, v1
				; GFX10-NEXT: v_mov_b32_e32 v11, v4
				; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5			; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
	; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3			; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v9
	; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v5			; GFX10-NEXT: v_and_or_b32 v5, v11, v0, v5
	; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10-NEXT: v_and_or_b32 v4, v10, v0, v1
				; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V2:			; GFX10-LABEL: sample_c_d_o_2darray_V2:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v3
	; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3			; GFX10-NEXT: v_mov_b32_e32 v10, v2
				; GFX10-NEXT: v_mov_b32_e32 v2, v0
				; GFX10-NEXT: v_mov_b32_e32 v3, v1
				; GFX10-NEXT: v_mov_b32_e32 v11, v4
				; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5			; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
	; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3			; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v9
	; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v5			; GFX10-NEXT: v_and_or_b32 v5, v11, v0, v5
	; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10-NEXT: v_and_or_b32 v4, v10, v0, v1
				; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	Show All 24 Lines

llvm/test/CodeGen/AMDGPU/cluster_stores.ll

	Show First 20 Lines • Show All 345 Lines • ▼ Show 20 Lines
	; GFX9-NEXT: v_add_f32_e32 v4, v4, v8			; GFX9-NEXT: v_add_f32_e32 v4, v4, v8
	; GFX9-NEXT: v_add_f32_e32 v3, v3, v7			; GFX9-NEXT: v_add_f32_e32 v3, v3, v7
	; GFX9-NEXT: v_add_f32_e32 v2, v2, v6			; GFX9-NEXT: v_add_f32_e32 v2, v2, v6
	; GFX9-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf unorm			; GFX9-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf unorm
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; GFX10-LABEL: cluster_image_sample:			; GFX10-LABEL: cluster_image_sample:
	; GFX10: ; %bb.0: ; %entry			; GFX10: ; %bb.0: ; %entry
	; GFX10-NEXT: v_cvt_f32_i32_e32 v2, v0			; GFX10-NEXT: v_mov_b32_e32 v4, 0
	; GFX10-NEXT: v_cvt_f32_i32_e32 v3, v1			; GFX10-NEXT: v_cvt_f32_i32_e32 v8, v0
	; GFX10-NEXT: v_mov_b32_e32 v13, 0			; GFX10-NEXT: v_cvt_f32_i32_e32 v9, v1
	; GFX10-NEXT: v_mov_b32_e32 v10, 1.0			; GFX10-NEXT: v_mov_b32_e32 v10, 1.0
	; GFX10-NEXT: v_add_f32_e32 v11, 1.0, v2			; GFX10-NEXT: v_mov_b32_e32 v5, v4
	; GFX10-NEXT: v_add_f32_e32 v12, 1.0, v3			; GFX10-NEXT: v_add_f32_e32 v2, 1.0, v8
	; GFX10-NEXT: v_add_f32_e32 v14, 2.0, v2			; GFX10-NEXT: v_add_f32_e32 v3, 1.0, v9
	; GFX10-NEXT: v_add_f32_e32 v15, 2.0, v3			; GFX10-NEXT: v_mov_b32_e32 v6, v4
	; GFX10-NEXT: image_sample_d v[2:5], [v11, v12, v13, v13, v13, v13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_mov_b32_e32 v7, v4
	; GFX10-NEXT: image_sample_d v[6:9], [v14, v15, v10, v10, v10, v10], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_add_f32_e32 v8, 2.0, v8
				; GFX10-NEXT: v_add_f32_e32 v9, 2.0, v9
				; GFX10-NEXT: v_mov_b32_e32 v11, v10
				; GFX10-NEXT: v_mov_b32_e32 v12, v10
				; GFX10-NEXT: v_mov_b32_e32 v13, v10
				; GFX10-NEXT: s_clause 0x1
				; GFX10-NEXT: image_sample_d v[14:17], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
				; GFX10-NEXT: image_sample_d v[18:21], v[8:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: v_add_f32_e32 v5, v5, v9			; GFX10-NEXT: v_add_f32_e32 v5, v17, v21
	; GFX10-NEXT: v_add_f32_e32 v4, v4, v8			; GFX10-NEXT: v_add_f32_e32 v4, v16, v20
	; GFX10-NEXT: v_add_f32_e32 v3, v3, v7			; GFX10-NEXT: v_add_f32_e32 v3, v15, v19
	; GFX10-NEXT: v_add_f32_e32 v2, v2, v6			; GFX10-NEXT: v_add_f32_e32 v2, v14, v18
	; GFX10-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf dim:SQ_RSRC_IMG_2D unorm			; GFX10-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
	; GFX10-NEXT: s_endpgm			; GFX10-NEXT: s_endpgm
	entry:			entry:
	%s = sitofp i32 %x to float			%s = sitofp i32 %x to float
	%t = sitofp i32 %y to float			%t = sitofp i32 %y to float
	%s1 = fadd float %s, 1.0			%s1 = fadd float %s, 1.0
	%t1 = fadd float %t, 1.0			%t1 = fadd float %t, 1.0
	%val1 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32(i32 15, float %s1, float %t1, float 0.0, float 0.0, float 0.0, float 0.0, <8 x i32> %src, <4 x i32> %smp, i1 false, i32 0, i32 0)			%val1 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32(i32 15, float %s1, float %t1, float 0.0, float 0.0, float 0.0, float 0.0, <8 x i32> %src, <4 x i32> %smp, i1 false, i32 0, i32 0)
	Show All 11 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll

	; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs -show-mc-encoding < %s \| FileCheck -check-prefixes=GCN,NONSA %s			; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,NONSA %s
	; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s \| FileCheck -check-prefixes=GCN,NSA %s			; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1010,NSA %s
				; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GFX1030,NSA %s

	; GCN-LABEL: {{^}}sample_2d:			; GCN-LABEL: {{^}}sample_2d:
	;			;
	; TODO: use NSA here			; TODO: use NSA here
	; GCN: v_mov_b32_e32 v2, v0			; GCN: v_mov_b32_e32 v2, v0
	;			;
	; GCN: image_sample v[0:3], v[1:2],			; GCN: image_sample v[0:3], v[1:2],
	define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) {			define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_3d:			; GCN-LABEL: {{^}}sample_3d:
	; NONSA: v_mov_b32_e32 v3, v0			; NONSA: v_mov_b32_e32 v3, v0
	; NONSA: image_sample v[0:3], v[1:3],			; NONSA: image_sample v[0:3], v[1:3],
	; NSA: image_sample v[0:3], [v1, v2, v0],			; NSA: image_sample v[0:3], [v1, v2, v0],
	define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) {			define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_d_3d:			; GCN-LABEL: {{^}}sample_d_3d:
	; NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],			; GFX1010: image_sample_d v[0:3], v[7:22],
				; GFX1030: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],
	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	; GCN-LABEL: {{^}}sample_contig_nsa:			; GCN-LABEL: {{^}}sample_contig_nsa:
	; NONSA: image_sample_c_l v5, v[0:4],			; NONSA: image_sample_c_l v5, v[0:4],
	; NSA: image_sample_c_l v8, v[0:4],			; NSA: image_sample_c_l v{{[0-9]+}}, v[0:4],
	; NSA: image_sample v9, [v6, v7, v5],			; NSA: image_sample v{{[0-9]+}}, [v6, v7, v5],
	define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {			define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
	main_body:			main_body:
	%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%r.0 = insertelement <2 x float> undef, float %v1, i32 0			%r.0 = insertelement <2 x float> undef, float %v1, i32 0
	%r = insertelement <2 x float> %r.0, float %v2, i32 1			%r = insertelement <2 x float> %r.0, float %v2, i32 1
	ret <2 x float> %r			ret <2 x float> %r
	}			}

	; GCN-LABEL: {{^}}sample_nsa_nsa:			; GCN-LABEL: {{^}}sample_nsa_nsa:
	; NSA: image_sample_c_l v8, [v1, v2, v3, v4, v0],			; NSA: image_sample_c_l v{{[0-9]+}}, [v1, v2, v3, v4, v0],
	; NSA: image_sample v9, [v6, v7, v5],			; NSA: image_sample v{{[0-9]+}}, [v6, v7, v5],
	define amdgpu_ps <2 x float> @sample_nsa_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %r2, float %s2, float %t2) {			define amdgpu_ps <2 x float> @sample_nsa_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %r2, float %s2, float %t2) {
	main_body:			main_body:
	%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%r.0 = insertelement <2 x float> undef, float %v1, i32 0			%r.0 = insertelement <2 x float> undef, float %v1, i32 0
	%r = insertelement <2 x float> %r.0, float %v2, i32 1			%r = insertelement <2 x float> %r.0, float %v2, i32 1
	ret <2 x float> %r			ret <2 x float> %r
	}			}

	; GCN-LABEL: {{^}}sample_nsa_contig:			; GCN-LABEL: {{^}}sample_nsa_contig:
	; NSA: image_sample_c_l v8, [v1, v2, v3, v4, v0],			; NSA: image_sample_c_l v{{[0-9]+}}, [v1, v2, v3, v4, v0],
	; NSA: image_sample v9, v[5:7],			; NSA: image_sample v{{[0-9]+}}, v[5:7],
	define amdgpu_ps <2 x float> @sample_nsa_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %s2, float %t2, float %r2) {			define amdgpu_ps <2 x float> @sample_nsa_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %s2, float %t2, float %r2) {
	main_body:			main_body:
	%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%r.0 = insertelement <2 x float> undef, float %v1, i32 0			%r.0 = insertelement <2 x float> undef, float %v1, i32 0
	%r = insertelement <2 x float> %r.0, float %v2, i32 1			%r = insertelement <2 x float> %r.0, float %v2, i32 1
	ret <2 x float> %r			ret <2 x float> %r
	}			}

	; GCN-LABEL: {{^}}sample_contig_contig:			; GCN-LABEL: {{^}}sample_contig_contig:
	; NSA: image_sample_c_l v8, v[0:4],			; NSA: image_sample_c_l v{{[0-9]+}}, v[0:4],
	; NSA: image_sample v9, v[5:7],			; NSA: image_sample v{{[0-9]+}}, v[5:7],
	; NONSA: image_sample_c_l v8, v[0:4],			; NONSA: image_sample_c_l v{{[0-9]+}}, v[0:4],
	; NONSA: image_sample v9, v[5:7],			; NONSA: image_sample v{{[0-9]+}}, v[5:7],
	define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) {			define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) {
	main_body:			main_body:
	%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	%r.0 = insertelement <2 x float> undef, float %v1, i32 0			%r.0 = insertelement <2 x float> undef, float %v1, i32 0
	%r = insertelement <2 x float> %r.0, float %v2, i32 1			%r = insertelement <2 x float> %r.0, float %v2, i32 1
	ret <2 x float> %r			ret <2 x float> %r
	}			}
	▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

	Show First 20 Lines • Show All 594 Lines • ▼ Show 20 Lines
	; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3			; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3
	; GFX9-NEXT: v_lshl_or_b32 v7, v1, 16, v0			; GFX9-NEXT: v_lshl_or_b32 v7, v1, 16, v0
	; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16			; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: ; return to shader part epilog			; GFX9-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_d_3d:			; GFX10-LABEL: sample_d_3d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v12, v8
	; GFX10-NEXT: v_and_b32_e32 v6, v9, v6			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_and_b32_e32 v3, v9, v3			; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
	; GFX10-NEXT: v_and_b32_e32 v0, v9, v0			; GFX10-NEXT: v_mov_b32_e32 v10, v5
	; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6			; GFX10-NEXT: v_and_b32_e32 v5, v2, v6
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v3, v2, v3
	; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX10-NEXT: v_and_b32_e32 v0, v2, v0
	; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16			; GFX10-NEXT: v_lshl_or_b32 v11, v7, 16, v5
				; GFX10-NEXT: v_lshl_or_b32 v9, v4, 16, v3
				; GFX10-NEXT: v_lshl_or_b32 v7, v1, 16, v0
				; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {			define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
	▲ Show 20 Lines • Show All 532 Lines • ▼ Show 20 Lines
	; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1			; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1
	; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0			; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0
	; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da			; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: ; return to shader part epilog			; GFX9-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_c_d_o_2darray_V1:			; GFX10-LABEL: sample_c_d_o_2darray_V1:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v13, v8
	; GFX10-NEXT: v_and_b32_e32 v6, v9, v6			; GFX10-NEXT: v_mov_b32_e32 v8, v0
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2			; GFX10-NEXT: v_mov_b32_e32 v9, v1
	; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6			; GFX10-NEXT: v_and_b32_e32 v1, v0, v6
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_and_b32_e32 v4, v0, v4
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2			; GFX10-NEXT: v_and_b32_e32 v0, v0, v2
	; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16			; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v1
				; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v4
				; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v0
				; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
	Show All 10 Lines
	; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1			; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1
	; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0			; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0
	; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da			; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: ; return to shader part epilog			; GFX9-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_c_d_o_2darray_V2:			; GFX10-LABEL: sample_c_d_o_2darray_V2:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v13, v8
	; GFX10-NEXT: v_and_b32_e32 v6, v9, v6			; GFX10-NEXT: v_mov_b32_e32 v8, v0
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2			; GFX10-NEXT: v_mov_b32_e32 v9, v1
	; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6			; GFX10-NEXT: v_and_b32_e32 v1, v0, v6
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_and_b32_e32 v4, v0, v4
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2			; GFX10-NEXT: v_and_b32_e32 v0, v0, v2
	; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16			; GFX10-NEXT: v_lshl_or_b32 v12, v7, 16, v1
				; GFX10-NEXT: v_lshl_or_b32 v11, v5, 16, v4
				; GFX10-NEXT: v_lshl_or_b32 v10, v3, 16, v0
				; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll

	Show First 20 Lines • Show All 1,733 Lines • ▼ Show 20 Lines
	; GFX6789-NEXT: v_mov_b32_e32 v0, v9			; GFX6789-NEXT: v_mov_b32_e32 v0, v9
	; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]			; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: ; return to shader part epilog			; GFX6789-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:			; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v11, 0 ; encoding: [0x80,0x02,0x16,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v11, 0 ; encoding: [0x80,0x02,0x16,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; encoding: [0x01,0x03,0x12,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v10, v0 ; encoding: [0x00,0x03,0x14,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v12, v11 ; encoding: [0x0b,0x03,0x18,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v12, v11 ; encoding: [0x0b,0x03,0x18,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v0, v11 ; encoding: [0x0b,0x03,0x00,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v9, v11 ; encoding: [0x0b,0x03,0x12,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v1, v12 ; encoding: [0x0c,0x03,0x02,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v10, v12 ; encoding: [0x0c,0x03,0x14,0x7e]
	; GFX10-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x04,0xe9,0xf0,0x0a,0x00,0x40,0x00,0x09,0x02,0x03,0x04,0x05,0x06,0x07,0x08]			; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x28,0x04,0xe9,0xf0,0x00,0x09,0x40,0x00]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: global_store_dword v11, v1, s[12:13] ; encoding: [0x00,0x80,0x70,0xdc,0x0b,0x01,0x0c,0x00]			; GFX10-NEXT: v_mov_b32_e32 v0, v9 ; encoding: [0x09,0x03,0x00,0x7e]
				; GFX10-NEXT: global_store_dword v11, v10, s[12:13] ; encoding: [0x00,0x80,0x70,0xdc,0x0b,0x0a,0x0c,0x00]
	; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]			; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)			%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
	%v.vec = extractvalue {float, i32} %v, 0			%v.vec = extractvalue {float, i32} %v, 0
	%v.err = extractvalue {float, i32} %v, 1			%v.err = extractvalue {float, i32} %v, 1
	store i32 %v.err, i32 addrspace(1)* %out, align 4			store i32 %v.err, i32 addrspace(1)* %out, align 4
	ret float %v.vec			ret float %v.vec
	▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
	; GFX6789-NEXT: s_waitcnt vmcnt(0)			; GFX6789-NEXT: s_waitcnt vmcnt(0)
	; GFX6789-NEXT: v_mov_b32_e32 v0, v9			; GFX6789-NEXT: v_mov_b32_e32 v0, v9
	; GFX6789-NEXT: v_mov_b32_e32 v1, v10			; GFX6789-NEXT: v_mov_b32_e32 v1, v10
	; GFX6789-NEXT: v_mov_b32_e32 v2, v11			; GFX6789-NEXT: v_mov_b32_e32 v2, v11
	; GFX6789-NEXT: ; return to shader part epilog			; GFX6789-NEXT: ; return to shader part epilog
	;			;
	; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:			; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v11, v0 ; encoding: [0x00,0x03,0x16,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; encoding: [0x80,0x02,0x12,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v10, v9 ; encoding: [0x09,0x03,0x14,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v11, v9 ; encoding: [0x09,0x03,0x16,0x7e]
	; GFX10-NEXT: v_mov_b32_e32 v10, v1 ; encoding: [0x01,0x03,0x14,0x7e]			; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x28,0x06,0xe9,0xf0,0x00,0x09,0x40,0x00]
	; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]			; GFX10-NEXT: v_mov_b32_e32 v0, v9 ; encoding: [0x09,0x03,0x00,0x7e]
	; GFX10-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x06,0xe9,0xf0,0x0b,0x00,0x40,0x00,0x0a,0x09,0x03,0x04,0x05,0x06,0x07,0x08]			; GFX10-NEXT: v_mov_b32_e32 v1, v10 ; encoding: [0x0a,0x03,0x02,0x7e]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: v_mov_b32_e32 v2, v11 ; encoding: [0x0b,0x03,0x04,0x7e]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)			%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
	%v.vec = extractvalue {<2 x float>, i32} %v, 0			%v.vec = extractvalue {<2 x float>, i32} %v, 0
	%v.f1 = extractelement <2 x float> %v.vec, i32 0			%v.f1 = extractelement <2 x float> %v.vec, i32 0
	%v.f2 = extractelement <2 x float> %v.vec, i32 1			%v.f2 = extractelement <2 x float> %v.vec, i32 1
	%v.err = extractvalue {<2 x float>, i32} %v, 1			%v.err = extractvalue {<2 x float>, i32} %v, 1
	%v.errf = bitcast i32 %v.err to float			%v.errf = bitcast i32 %v.err to float
	▲ Show 20 Lines • Show All 508 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll

	Show First 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
	; GFX10-LABEL: sample_d_3d:			; GFX10-LABEL: sample_d_3d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6			; GFX10-NEXT: v_mov_b32_e32 v15, v8
	; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6			; GFX10-NEXT: v_mov_b32_e32 v8, v0
	; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6
				; GFX10-NEXT: v_mov_b32_e32 v13, v5
				; GFX10-NEXT: v_mov_b32_e32 v12, v4
				; GFX10-NEXT: v_mov_b32_e32 v11, v3
				; GFX10-NEXT: v_mov_b32_e32 v10, v2
				; GFX10-NEXT: v_mov_b32_e32 v9, v1
				; GFX10-NEXT: v_lshl_or_b32 v14, v7, 16, v0
				; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_d_3d:			; GFX10GISEL-LABEL: sample_d_3d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
	; GFX10-LABEL: sample_d_cl_2d:			; GFX10-LABEL: sample_d_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4			; GFX10-NEXT: v_mov_b32_e32 v11, v6
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_mov_b32_e32 v6, v0
	; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
				; GFX10-NEXT: v_mov_b32_e32 v9, v3
				; GFX10-NEXT: v_mov_b32_e32 v8, v2
				; GFX10-NEXT: v_mov_b32_e32 v7, v1
				; GFX10-NEXT: v_lshl_or_b32 v10, v5, 16, v0
				; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_d_cl_2d:			; GFX10GISEL-LABEL: sample_d_cl_2d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	Show All 26 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
	; GFX10-LABEL: sample_c_d_cl_2d:			; GFX10-LABEL: sample_c_d_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5			; GFX10-NEXT: v_mov_b32_e32 v13, v7
	; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5			; GFX10-NEXT: v_mov_b32_e32 v7, v0
	; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v5
				; GFX10-NEXT: v_mov_b32_e32 v11, v4
				; GFX10-NEXT: v_mov_b32_e32 v10, v3
				; GFX10-NEXT: v_mov_b32_e32 v9, v2
				; GFX10-NEXT: v_mov_b32_e32 v8, v1
				; GFX10-NEXT: v_lshl_or_b32 v12, v6, 16, v0
				; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_c_d_cl_2d:			; GFX10GISEL-LABEL: sample_c_d_cl_2d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
	; GFX10-LABEL: sample_cd_cl_2d:			; GFX10-LABEL: sample_cd_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4			; GFX10-NEXT: v_mov_b32_e32 v11, v6
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_mov_b32_e32 v6, v0
	; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
				; GFX10-NEXT: v_mov_b32_e32 v9, v3
				; GFX10-NEXT: v_mov_b32_e32 v8, v2
				; GFX10-NEXT: v_mov_b32_e32 v7, v1
				; GFX10-NEXT: v_lshl_or_b32 v10, v5, 16, v0
				; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_cd_cl_2d:			; GFX10GISEL-LABEL: sample_cd_cl_2d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	Show All 26 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
	; GFX10-LABEL: sample_c_cd_cl_2d:			; GFX10-LABEL: sample_c_cd_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5			; GFX10-NEXT: v_mov_b32_e32 v13, v7
	; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5			; GFX10-NEXT: v_mov_b32_e32 v7, v0
	; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v5
				; GFX10-NEXT: v_mov_b32_e32 v11, v4
				; GFX10-NEXT: v_mov_b32_e32 v10, v3
				; GFX10-NEXT: v_mov_b32_e32 v9, v2
				; GFX10-NEXT: v_mov_b32_e32 v8, v1
				; GFX10-NEXT: v_lshl_or_b32 v12, v6, 16, v0
				; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_c_cd_cl_2d:			; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6			; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6
	; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12			; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
	; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16			; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {			define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V1:			; GFX10-LABEL: sample_c_d_o_2darray_V1:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6			; GFX10-NEXT: v_mov_b32_e32 v15, v8
	; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6			; GFX10-NEXT: v_mov_b32_e32 v8, v0
	; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6
				; GFX10-NEXT: v_mov_b32_e32 v13, v5
				; GFX10-NEXT: v_mov_b32_e32 v12, v4
				; GFX10-NEXT: v_mov_b32_e32 v11, v3
				; GFX10-NEXT: v_mov_b32_e32 v10, v2
				; GFX10-NEXT: v_mov_b32_e32 v9, v1
				; GFX10-NEXT: v_lshl_or_b32 v14, v7, 16, v0
				; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:			; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7			; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
	; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12			; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
	; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16			; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V2:			; GFX10-LABEL: sample_c_d_o_2darray_V2:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6			; GFX10-NEXT: v_mov_b32_e32 v15, v8
	; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6			; GFX10-NEXT: v_mov_b32_e32 v8, v0
	; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16			; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v6
				; GFX10-NEXT: v_mov_b32_e32 v13, v5
				; GFX10-NEXT: v_mov_b32_e32 v12, v4
				; GFX10-NEXT: v_mov_b32_e32 v11, v3
				; GFX10-NEXT: v_mov_b32_e32 v10, v2
				; GFX10-NEXT: v_mov_b32_e32 v9, v1
				; GFX10-NEXT: v_lshl_or_b32 v14, v7, 16, v0
				; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:			; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {			define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
	; GFX10-LABEL: sample_g16_noa16_d_3d:			; GFX10-LABEL: sample_g16_noa16_d_3d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v3
	; GFX10-NEXT: v_and_b32_e32 v3, v9, v3			; GFX10-NEXT: v_mov_b32_e32 v3, v2
	; GFX10-NEXT: v_and_b32_e32 v0, v9, v0			; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v9, v2, v9
	; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX10-NEXT: v_and_b32_e32 v0, v2, v0
	; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9
				; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0
				; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:			; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1			; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
				; GFX10GISEL-NEXT: v_mov_b32_e32 v11, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
				; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
	; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16			; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
	; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v9, v1			; GFX10GISEL-NEXT: v_and_or_b32 v3, v9, v11, s12
	; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v9, s12			; GFX10GISEL-NEXT: v_and_or_b32 v4, v10, v11, v4
	; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v9, v4			; GFX10GISEL-NEXT: v_and_or_b32 v2, v0, v11, v1
	; GFX10GISEL-NEXT: v_and_or_b32 v3, v5, v9, s12			; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v11, s12
	; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D			; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:			; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_and_b32_e32 v3, v8, v3			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v1, v8, v1			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
	; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1			; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
	; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
				; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
				; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:			; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
				; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
				; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
	; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v8
	; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4			; GFX10GISEL-NEXT: v_and_or_b32 v4, v9, v0, v4
	; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10GISEL-NEXT: v_and_or_b32 v3, v1, v0, v3
				; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 159 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:			; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_and_b32_e32 v3, v8, v3			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v1, v8, v1			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
	; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1			; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
	; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
				; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
				; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:			; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
				; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
				; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
	; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v8
	; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4			; GFX10GISEL-NEXT: v_and_or_b32 v4, v9, v0, v4
	; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10GISEL-NEXT: v_and_or_b32 v3, v1, v0, v3
				; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:			; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v2
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_mov_b32_e32 v10, v3
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2			; GFX10-NEXT: v_mov_b32_e32 v3, v1
	; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
				; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
				; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
				; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
				; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:			; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3			; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v2
				; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
				; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
				; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
				; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
	; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v9
	; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5			; GFX10GISEL-NEXT: v_and_or_b32 v5, v11, v0, v5
	; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10GISEL-NEXT: v_and_or_b32 v4, v10, v0, v1
				; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:			; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v2
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_mov_b32_e32 v10, v3
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2			; GFX10-NEXT: v_mov_b32_e32 v3, v1
	; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
				; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
				; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
				; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
				; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	;			;
	; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:			; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
	; GFX10GISEL: ; %bb.0: ; %main_body			; GFX10GISEL: ; %bb.0: ; %main_body
	; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3			; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v2
				; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
				; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
				; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
				; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
	; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3			; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v9
	; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5			; GFX10GISEL-NEXT: v_and_or_b32 v5, v11, v0, v5
	; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10GISEL-NEXT: v_and_or_b32 v4, v10, v0, v1
				; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)			; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
	; GFX10GISEL-NEXT: ; return to shader part epilog			; GFX10GISEL-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	Show All 24 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll

	Show All 25 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
	; GFX10-LABEL: sample_d_3d:			; GFX10-LABEL: sample_d_3d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]			; GFX10-NEXT: v_mov_b32_e32 v9, v3 ; encoding: [0x03,0x03,0x12,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 ; encoding: [0x09,0x07,0x06,0x36]			; GFX10-NEXT: v_mov_b32_e32 v3, v2 ; encoding: [0x02,0x03,0x06,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 ; encoding: [0x09,0x01,0x00,0x36]			; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff ; encoding: [0xff,0x02,0x04,0x7e,0xff,0xff,0x00,0x00]
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]			; GFX10-NEXT: v_and_b32_e32 v9, v2, v9 ; encoding: [0x02,0x13,0x12,0x36]
	; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]			; GFX10-NEXT: v_and_b32_e32 v0, v2, v0 ; encoding: [0x02,0x01,0x00,0x36]
	; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x15,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00,0x02,0x03,0x05,0x06,0x07,0x08,0x00,0x00]			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9 ; encoding: [0x04,0x00,0x6f,0xd7,0x04,0x21,0x25,0x04]
				; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 ; encoding: [0x02,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
				; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x11,0x0f,0x88,0xf0,0x02,0x00,0x40,0x00]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_c_d_cl_2d:			; GFX10-LABEL: sample_c_d_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; encoding: [0xff,0x02,0x10,0x7e,0xff,0xff,0x00,0x00]			; GFX10-NEXT: v_mov_b32_e32 v8, v2 ; encoding: [0x02,0x03,0x10,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 ; encoding: [0x08,0x07,0x06,0x36]			; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; encoding: [0x08,0x03,0x02,0x36]			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]			; GFX10-NEXT: v_and_b32_e32 v3, v0, v3 ; encoding: [0x00,0x07,0x06,0x36]
	; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]			; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x36]
	; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0d,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06,0x07,0x00,0x00,0x00]			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3 ; encoding: [0x04,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
				; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x08,0x21,0x01,0x04]
				; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x09,0x0f,0xac,0xf0,0x02,0x00,0x40,0x00]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_c_cd_cl_2d:			; GFX10-LABEL: sample_c_cd_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; encoding: [0xff,0x02,0x10,0x7e,0xff,0xff,0x00,0x00]			; GFX10-NEXT: v_mov_b32_e32 v8, v2 ; encoding: [0x02,0x03,0x10,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 ; encoding: [0x08,0x07,0x06,0x36]			; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; encoding: [0x08,0x03,0x02,0x36]			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]			; GFX10-NEXT: v_and_b32_e32 v3, v0, v3 ; encoding: [0x00,0x07,0x06,0x36]
	; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]			; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x36]
	; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0d,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06,0x07,0x00,0x00,0x00]			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3 ; encoding: [0x04,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
				; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x08,0x21,0x01,0x04]
				; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x09,0x0f,0xac,0xf1,0x02,0x00,0x40,0x00]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V1:			; GFX10-LABEL: sample_c_d_o_2darray_V1:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]			; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 ; encoding: [0x09,0x09,0x08,0x36]			; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; encoding: [0x09,0x05,0x04,0x36]			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; encoding: [0x04,0x00,0x6f,0xd7,0x05,0x21,0x11,0x04]			; GFX10-NEXT: v_mov_b32_e32 v10, v3 ; encoding: [0x03,0x03,0x14,0x7e]
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]			; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; encoding: [0x01,0x03,0x06,0x7e]
	; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2d,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00,0x01,0x02,0x04,0x06,0x07,0x08,0x00,0x00]			; GFX10-NEXT: v_and_b32_e32 v1, v0, v4 ; encoding: [0x00,0x09,0x02,0x36]
				; GFX10-NEXT: v_and_b32_e32 v0, v0, v9 ; encoding: [0x00,0x13,0x00,0x36]
				; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1 ; encoding: [0x05,0x00,0x6f,0xd7,0x05,0x21,0x05,0x04]
				; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0 ; encoding: [0x04,0x00,0x6f,0xd7,0x0a,0x21,0x01,0x04]
				; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x29,0x04,0xe8,0xf0,0x02,0x00,0x40,0x00]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V2:			; GFX10-LABEL: sample_c_d_o_2darray_V2:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]			; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 ; encoding: [0x09,0x09,0x08,0x36]			; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; encoding: [0x09,0x05,0x04,0x36]			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; encoding: [0xff,0x02,0x00,0x7e,0xff,0xff,0x00,0x00]
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; encoding: [0x04,0x00,0x6f,0xd7,0x05,0x21,0x11,0x04]			; GFX10-NEXT: v_mov_b32_e32 v10, v3 ; encoding: [0x03,0x03,0x14,0x7e]
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]			; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; encoding: [0x01,0x03,0x06,0x7e]
	; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2d,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00,0x01,0x02,0x04,0x06,0x07,0x08,0x00,0x00]			; GFX10-NEXT: v_and_b32_e32 v1, v0, v4 ; encoding: [0x00,0x09,0x02,0x36]
				; GFX10-NEXT: v_and_b32_e32 v0, v0, v9 ; encoding: [0x00,0x13,0x00,0x36]
				; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1 ; encoding: [0x05,0x00,0x6f,0xd7,0x05,0x21,0x05,0x04]
				; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0 ; encoding: [0x04,0x00,0x6f,0xd7,0x0a,0x21,0x01,0x04]
				; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x29,0x06,0xe8,0xf0,0x02,0x00,0x40,0x00]
	; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]			; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	Show All 24 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll

	Show All 25 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {			define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
	; GFX10-LABEL: sample_d_3d:			; GFX10-LABEL: sample_d_3d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v3
	; GFX10-NEXT: v_and_b32_e32 v3, v9, v3			; GFX10-NEXT: v_mov_b32_e32 v3, v2
	; GFX10-NEXT: v_and_b32_e32 v0, v9, v0			; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v9, v2, v9
	; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0			; GFX10-NEXT: v_and_b32_e32 v0, v2, v0
	; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9
				; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0
				; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_c_d_cl_2d:			; GFX10-LABEL: sample_c_d_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_and_b32_e32 v3, v8, v3			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v1, v8, v1			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
	; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1			; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
	; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
				; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
				; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {			define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
	▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {			define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
	; GFX10-LABEL: sample_c_cd_cl_2d:			; GFX10-LABEL: sample_c_cd_cl_2d:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v8, v2
	; GFX10-NEXT: v_and_b32_e32 v3, v8, v3			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v1, v8, v1			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3			; GFX10-NEXT: v_and_b32_e32 v3, v0, v3
	; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1			; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
	; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D			; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v3
				; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
				; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <4 x float> %v			ret <4 x float> %v
	}			}

	define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V1:			; GFX10-LABEL: sample_c_d_o_2darray_V1:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v2
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_mov_b32_e32 v10, v3
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2			; GFX10-NEXT: v_mov_b32_e32 v3, v1
	; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
				; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
				; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
				; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
				; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret float %v			ret float %v
	}			}

	define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {			define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
	; GFX10-LABEL: sample_c_d_o_2darray_V2:			; GFX10-LABEL: sample_c_d_o_2darray_V2:
	; GFX10: ; %bb.0: ; %main_body			; GFX10: ; %bb.0: ; %main_body
	; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff			; GFX10-NEXT: v_mov_b32_e32 v9, v2
	; GFX10-NEXT: v_and_b32_e32 v4, v9, v4			; GFX10-NEXT: v_mov_b32_e32 v2, v0
	; GFX10-NEXT: v_and_b32_e32 v2, v9, v2			; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff
	; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4			; GFX10-NEXT: v_mov_b32_e32 v10, v3
	; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2			; GFX10-NEXT: v_mov_b32_e32 v3, v1
	; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY			; GFX10-NEXT: v_and_b32_e32 v1, v0, v4
				; GFX10-NEXT: v_and_b32_e32 v0, v0, v9
				; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
				; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
				; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: ; return to shader part epilog			; GFX10-NEXT: ; return to shader part epilog
	main_body:			main_body:
	%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)			%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
	ret <2 x float> %v			ret <2 x float> %v
	}			}

	declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1			declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
	Show All 24 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add maximum NSA size limit ISA feature
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 361108

llvm/lib/Target/AMDGPU/AMDGPU.td

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

llvm/lib/Target/AMDGPU/GCNSubtarget.h

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll

llvm/test/CodeGen/AMDGPU/cluster_stores.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add maximum NSA size limit ISA featureClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 361108

llvm/lib/Target/AMDGPU/AMDGPU.td

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

llvm/lib/Target/AMDGPU/GCNSubtarget.h

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll

llvm/test/CodeGen/AMDGPU/cluster_stores.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll

[AMDGPU] Add maximum NSA size limit ISA feature
ClosedPublic